Snap for 11861033 from b1b8c8771490b286182357d1f2f8418a47e3297c to androidx-concurrent-releaseandroidx-concurrent-release

Change-Id: Ib053f36b0fd3bf7585788b84dfc6cc3314011d59
author: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2024-05-20 16:37:59 +0000
committer: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2024-05-20 16:37:59 +0000
commit: f280dc28ba4fd713d8b92243fc97dd32b79bd902 (patch)
tree: aaa7cb313ca956a5e7b01f65223311730f0c5388
parent: 1db05b5d41d80b78de1acafa6f061af6dac689f2 (diff)
parent: b1b8c8771490b286182357d1f2f8418a47e3297c (diff)
download: icing-f280dc28ba4fd713d8b92243fc97dd32b79bd902.tar.gz
556 files changed, 163055 insertions, 20125 deletions
diff --git a/.gitignore b/.gitignore
index f57bd5e..962fbd5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@
 
 # Files
 *.iml
+*.cmake.gen
+\ No newline at end of file
diff --git a/Android.bp b/Android.bp
index 7982c4f..82b7b59 100644
--- a/Android.bp
+++ b/Android.bp
@@ -50,6 +50,8 @@ cc_defaults {
 
         "-funsigned-char",
         "-fvisibility=hidden",
+
+        "-Bsymbolic",
     ],
 }
 
diff --git a/AndroidManifest.xml b/AndroidManifest.xml
deleted file mode 100644
index 7377c53..0000000
--- a/AndroidManifest.xml
+++ /dev/null
@@ -1,2 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<manifest package="com.google.android.icing" />
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0830783..4b7c752 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,9 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-cmake_minimum_required(VERSION 3.10.2)
+cmake_minimum_required(VERSION 3.22.1)
+
+project(icing)
 
 add_definitions("-DICING_REVERSE_JNI_SEGMENTATION=1")
+set(VERSION_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/icing/jni.lds")
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_SHARED_LINKER_FLAGS
+    "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections -Wl,--version-script=${VERSION_SCRIPT}")
 
 set(
     Protobuf_PREBUILTS_DIR
@@ -45,7 +51,7 @@ add_subdirectory("${Protobuf_SOURCE_DIR}/cmake" ${Protobuf_TARGET_BINARY_DIR})
 # Compile libandroidicu
 set(ICU_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../icu/libandroidicu")
 set(ICU_TARGET_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/icu-target")
-add_subdirectory(${ICU_SOURCE_DIR} ${ICU_TARGET_BINARY_DIR})
+add_subdirectory("${ICU_SOURCE_DIR}/static_shim" ${ICU_TARGET_BINARY_DIR})
 
 # Glob Icing proto sources. Results look like this: icing/proto/document.proto
 file(
@@ -55,7 +61,10 @@ file(
     "*.proto")
 message(STATUS "Icing_PROTO_FILES=${Icing_PROTO_FILES}")
 
+
 # Run protoc on Icing_PROTO_FILES to generate pb.cc and pb.h files
+# The DEPENDS section of add_custom_command could trigger a remake if any proto
+# source file has been updated.
 file(MAKE_DIRECTORY ${Icing_PROTO_GEN_DIR})
 foreach(FILE ${Icing_PROTO_FILES})
     # Find the name of the proto file without the .proto extension
@@ -68,10 +77,10 @@ foreach(FILE ${Icing_PROTO_FILES})
           "${Icing_PROTO_GEN_DIR}/${FILE_NOEXT}.pb.h"
         COMMAND ${Protobuf_PROTOC_PATH}
           --proto_path "${CMAKE_CURRENT_SOURCE_DIR}/proto"
-          --cpp_out ${Icing_PROTO_GEN_DIR}
+          --cpp_out "lite:${Icing_PROTO_GEN_DIR}"
           ${FILE}
         WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-        DEPENDS ${Protobuf_PROTOC_PATH}
+        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/proto/${FILE}
     )
 endforeach()
 message(STATUS "Icing_PROTO_SOURCES=${Icing_PROTO_SOURCES}")
@@ -89,6 +98,11 @@ file(
     # Glob expressions
     icing/*.cc icing/*.h
 )
+
+# TODO(b/170611579): When supporting cmake v3.12 or higher, use CONFIGURE_DEPENDS
+# in the glob and remove this section.
+include(synced_AOSP_CL_number.txt)
+
 # Exclude the same types of files as Android.bp. See the comments there.
 list(FILTER Icing_CC_SOURCES EXCLUDE REGEX "^icing/.*[^a-zA-Z0-9]test[^a-zA-Z0-9].*$")
 list(FILTER Icing_CC_SOURCES EXCLUDE REGEX "^icing/.*_benchmark\.cc$")
@@ -116,4 +130,4 @@ target_include_directories(icing PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
 target_include_directories(icing PRIVATE ${Icing_PROTO_GEN_DIR})
 target_include_directories(icing PRIVATE "${Protobuf_SOURCE_DIR}/src")
 target_include_directories(icing PRIVATE "${ICU_SOURCE_DIR}/include")
-target_link_libraries(icing protobuf::libprotobuf libandroidicu log)
+target_link_libraries(icing protobuf::libprotobuf-lite libandroidicu log z)
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..93c8e30
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1,2 @@
+adorokhine@google.com
+tjbarron@google.com
diff --git a/build.gradle b/build.gradle
new file mode 100644
index 0000000..97cc5e1
--- /dev/null
+++ b/build.gradle
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import androidx.build.SdkHelperKt
+
+plugins {
+    id("AndroidXPlugin")
+    id("AndroidXRepackagePlugin")
+    id("java-library")
+    id("com.google.protobuf")
+}
+
+repackage {
+    // Must match what is in frameworks/support/appsearch/appsearch-external-protobuf/build.gradle
+    addRelocation {
+        sourcePackage = "com.google.protobuf"
+        targetPackage = "com.google.android.icing.protobuf"
+    }
+}
+
+sourceSets {
+    main {
+        java.srcDir 'java/src/'
+        proto.srcDir 'proto/'
+    }
+}
+
+dependencies {
+    compileOnly("androidx.annotation:annotation:1.1.0")
+    compileOnly(SdkHelperKt.getSdkDependency(project))
+    compileOnly(libs.protobufLite)
+}
+
+afterEvaluate {
+    lint {
+        lintOptions {
+            // protobuf generates unannotated methods
+            disable("UnknownNullness")
+        }
+    }
+}
+
+protobuf {
+    protoc {
+        artifact = libs.protobufCompiler.get()
+    }
+    generateProtoTasks {
+        all().each { task ->
+            task.builtins {
+                java {
+                    option 'lite'
+                }
+            }
+        }
+    }
+}
+
+androidx {
+    mavenVersion = LibraryVersions.APPSEARCH
+}
diff --git a/icing/absl_ports/annotate.cc b/icing/absl_ports/annotate.cc
index d283e13..dfe5566 100644
--- a/icing/absl_ports/annotate.cc
+++ b/icing/absl_ports/annotate.cc
@@ -33,7 +33,7 @@ libtextclassifier3::Status Annotate(const libtextclassifier3::Status& s,
 
   std::string new_msg =
       (!s.error_message().empty())
-          ? absl_ports::StrCat(s.error_message(), kErrorSeparator, msg)
+          ? absl_ports::StrCat(msg, kErrorSeparator, s.error_message())
           : std::string(msg);
   return libtextclassifier3::Status(s.CanonicalCode(), new_msg);
 }
diff --git a/icing/absl_ports/arraysize_macros.h b/icing/absl_ports/arraysize_macros.h
new file mode 100644
index 0000000..e09c019
--- /dev/null
+++ b/icing/absl_ports/arraysize_macros.h
@@ -0,0 +1,40 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_ABSL_PORTS_ARRAYSIZE_MACROS_H_
+#define ICING_ABSL_PORTS_ARRAYSIZE_MACROS_H_
+
+#include <cstddef>
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+// ABSL_PORT_ARRAYSIZE()
+//
+// Returns the number of elements in an array as a compile-time constant, which
+// can be used in defining new arrays. If you use this macro on a pointer by
+// mistake, you will get a compile-time error.
+#define ABSL_PORT_ARRAYSIZE(array) (sizeof(absl_ports::ArraySizeHelper(array)))
+
+// Note: this internal template function declaration is used by ABSL_PORT_ARRAYSIZE.
+// The function doesn't need a definition, as we only use its type.
+template <typename T, size_t N>
+auto ArraySizeHelper(const T (&array)[N]) -> char (&)[N];
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_ARRAYSIZE_MACROS_H_
diff --git a/icing/absl_ports/status_imports.h b/icing/absl_ports/ascii_str_to_lower.cc
index 3a97fd6..f181751 100644
--- a/icing/absl_ports/status_imports.h
+++ b/icing/absl_ports/ascii_str_to_lower.cc
@@ -1,4 +1,4 @@
-// Copyright (C) 2019 Google LLC
+// Copyright (C) 2022 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -12,21 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef ICING_ABSL_PORTS_STATUS_IMPORTS_H_
-#define ICING_ABSL_PORTS_STATUS_IMPORTS_H_
+#include "icing/absl_ports/ascii_str_to_lower.h"
 
-#include "icing/text_classifier/lib3/utils/base/status.h"
+#include <string>
 
 namespace icing {
 namespace lib {
 namespace absl_ports {
 
-// TODO(b/144458732) Delete this file once visibility on TC3 Status has been
-// granted to the sample app.
-using Status = libtextclassifier3::Status;
+void AsciiStrToLower(std::string* s) {
+  for (auto& ch : *s) {
+    ch = std::tolower(static_cast<unsigned char>(ch));
+  }
+}
 
 }  // namespace absl_ports
 }  // namespace lib
 }  // namespace icing
-
-#endif  // ICING_ABSL_PORTS_STATUS_IMPORTS_H_
diff --git a/icing/absl_ports/ascii_str_to_lower.h b/icing/absl_ports/ascii_str_to_lower.h
new file mode 100644
index 0000000..0233fa8
--- /dev/null
+++ b/icing/absl_ports/ascii_str_to_lower.h
@@ -0,0 +1,38 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_ABSL_PORTS_ASCII_STR_TO_LOWER_H_
+#define ICING_ABSL_PORTS_ASCII_STR_TO_LOWER_H_
+
+#include <string>
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+// Converts the characters in `s` to lowercase, changing the contents of `s`.
+void AsciiStrToLower(std::string* s);
+
+// Creates a lowercase string from a given std::string_view.
+inline std::string AsciiStrToLower(std::string_view s) {
+  std::string result(s);
+  AsciiStrToLower(&result);
+  return result;
+}
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_ABSL_PORTS_ASCII_TO_LOWER_H_
diff --git a/icing/absl_ports/status_test.cc b/icing/absl_ports/status_test.cc
new file mode 100644
index 0000000..1909302
--- /dev/null
+++ b/icing/absl_ports/status_test.cc
@@ -0,0 +1,53 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/proto/document.pb.h"
+
+namespace icing {
+namespace lib {
+
+TEST(StatusTest, StatusOrOfProtoConstructorTest) {
+  libtextclassifier3::StatusOr<DocumentProto> status_or =
+      absl_ports::InvalidArgumentError("test");
+  libtextclassifier3::StatusOr<DocumentProto> new_status_or = status_or;
+}
+
+TEST(StatusTest, StatusOrOfProtoMoveConstructorTest) {
+  libtextclassifier3::StatusOr<DocumentProto> status_or =
+      absl_ports::InvalidArgumentError("test");
+  libtextclassifier3::StatusOr<DocumentProto> new_status_or =
+      std::move(status_or);
+}
+
+TEST(StatusTest, StatusOrOfProtoAssignmentTest) {
+  libtextclassifier3::StatusOr<DocumentProto> status_or =
+      absl_ports::InvalidArgumentError("test");
+  libtextclassifier3::StatusOr<DocumentProto> new_status_or;
+  new_status_or = status_or;
+}
+
+TEST(StatusTest, StatusOrOfProtoMoveAssignmentTest) {
+  libtextclassifier3::StatusOr<DocumentProto> status_or =
+      absl_ports::InvalidArgumentError("test");
+  libtextclassifier3::StatusOr<DocumentProto> new_status_or;
+  new_status_or = std::move(status_or);
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/absl_ports/str_cat.cc b/icing/absl_ports/str_cat.cc
index 2cf020d..8695824 100644
--- a/icing/absl_ports/str_cat.cc
+++ b/icing/absl_ports/str_cat.cc
@@ -175,9 +175,8 @@ void StrAppendPieces(std::string* dest, std::vector<std::string_view> pieces) {
   for (std::string_view s : pieces) {
     result_size += s.length();
   }
-  // Create result with enough room to fit all operands.
-  std::string result;
-  result.__resize_default_init(result_size);
+  // Resize dest with enough room to fit all operands.
+  dest->__resize_default_init(result_size);
 
   char* out = &(*dest)[old_size];
   for (std::string_view s : pieces) {
diff --git a/icing/absl_ports/str_join.cc b/icing/absl_ports/str_join.cc
new file mode 100644
index 0000000..2d105ca
--- /dev/null
+++ b/icing/absl_ports/str_join.cc
@@ -0,0 +1,41 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/absl_ports/str_join.h"
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+std::vector<std::string_view> StrSplit(std::string_view text,
+                                       std::string_view sep) {
+  std::vector<std::string_view> substrings;
+  size_t separator_position = text.find(sep);
+  size_t current_start = 0;
+  size_t current_end = separator_position;
+  while (separator_position != std::string_view::npos) {
+    substrings.push_back(
+        text.substr(current_start, current_end - current_start));
+    current_start = current_end + sep.length();
+    separator_position = text.find(sep, current_start);
+    current_end = separator_position;
+  }
+  current_end = text.length();
+  substrings.push_back(text.substr(current_start, current_end - current_start));
+  return substrings;
+}
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/absl_ports/str_join.h b/icing/absl_ports/str_join.h
index 7c8936a..5277bca 100644
--- a/icing/absl_ports/str_join.h
+++ b/icing/absl_ports/str_join.h
@@ -17,6 +17,7 @@
 
 #include <string>
 #include <string_view>
+#include <vector>
 
 #include "icing/absl_ports/str_cat.h"
 
@@ -92,6 +93,11 @@ std::string StrJoin(Iterator first, Iterator last, std::string_view sep,
   return result;
 }
 
+template <typename Iterator>
+std::string StrJoin(Iterator first, Iterator last, std::string_view sep) {
+  return absl_ports::StrJoin(first, last, sep, DefaultFormatter());
+}
+
 template <typename Container, typename Formatter>
 std::string StrJoin(const Container& container, std::string_view sep,
                     Formatter&& formatter) {
@@ -104,8 +110,11 @@ std::string StrJoin(const Container& container, std::string_view sep) {
   return absl_ports::StrJoin(container, sep, DefaultFormatter());
 }
 
+std::vector<std::string_view> StrSplit(std::string_view text,
+                                       std::string_view sep);
+
 }  // namespace absl_ports
 }  // namespace lib
 }  // namespace icing
 
-#endif  // ICING_ABSL_PORTS_STR_JOIN_H_
+#endif  // ICING_ABSL_PORTS_STR_JOIN_H_
+\ No newline at end of file
diff --git a/icing/document-builder.h b/icing/document-builder.h
index 4c95b89..44500f9 100644
--- a/icing/document-builder.h
+++ b/icing/document-builder.h
@@ -71,11 +71,6 @@ class DocumentBuilder {
     return *this;
   }
 
-  DocumentBuilder& ClearCustomProperties() {
-    document_.clear_custom_properties();
-    return *this;
-  }
-
   // Takes a property name and any number of string values.
   template <typename... V>
   DocumentBuilder& AddStringProperty(std::string property_name,
@@ -83,26 +78,25 @@ class DocumentBuilder {
     return AddStringProperty(std::move(property_name), {string_values...});
   }
 
-  // Takes a custom property name and any number of string values.
-  template <typename... V>
-  DocumentBuilder& AddCustomStringProperty(std::string property_name,
-                                           V... string_values) {
-    return AddCustomStringProperty(std::move(property_name),
-                                   {string_values...});
+  // Takes a property name and iterator of int64_t values.
+  template <typename InputIt>
+  DocumentBuilder& AddInt64Property(std::string property_name, InputIt first,
+                                    InputIt last) {
+    auto property = document_.add_properties();
+    property->set_name(std::move(property_name));
+    for (InputIt it = first; it != last; ++it) {
+      property->mutable_int64_values()->Add(*it);
+    }
+    return *this;
   }
 
   // Takes a property name and any number of int64_t values.
   template <typename... V>
   DocumentBuilder& AddInt64Property(std::string property_name,
                                     V... int64_values) {
-    return AddInt64Property(std::move(property_name), {int64_values...});
-  }
-
-  // Takes a custom property name and any number of int64_t values.
-  template <typename... V>
-  DocumentBuilder& AddCustomInt64Property(std::string property_name,
-                                          V... int64_values) {
-    return AddCustomInt64Property(std::move(property_name), {int64_values...});
+    std::initializer_list<int64_t> int64_values_list = {int64_values...};
+    return AddInt64Property(std::move(property_name), int64_values_list.begin(),
+                            int64_values_list.end());
   }
 
   // Takes a property name and any number of double values.
@@ -112,14 +106,6 @@ class DocumentBuilder {
     return AddDoubleProperty(std::move(property_name), {double_values...});
   }
 
-  // Takes a custom property name and any number of double values.
-  template <typename... V>
-  DocumentBuilder& AddCustomDoubleProperty(std::string property_name,
-                                           V... double_values) {
-    return AddCustomDoubleProperty(std::move(property_name),
-                                   {double_values...});
-  }
-
   // Takes a property name and any number of boolean values.
   template <typename... V>
   DocumentBuilder& AddBooleanProperty(std::string property_name,
@@ -127,28 +113,12 @@ class DocumentBuilder {
     return AddBooleanProperty(std::move(property_name), {boolean_values...});
   }
 
-  // Takes a custom property name and any number of boolean values.
-  template <typename... V>
-  DocumentBuilder& AddCustomBooleanProperty(std::string property_name,
-                                            V... boolean_values) {
-    return AddCustomBooleanProperty(std::move(property_name),
-                                    {boolean_values...});
-  }
-
   // Takes a property name and any number of bytes values.
   template <typename... V>
   DocumentBuilder& AddBytesProperty(std::string property_name,
                                     V... bytes_values) {
     return AddBytesProperty(std::move(property_name), {bytes_values...});
   }
-
-  // Takes a custom property name and any number of bytes values.
-  template <typename... V>
-  DocumentBuilder& AddCustomBytesProperty(std::string property_name,
-                                          V... bytes_values) {
-    return AddCustomBytesProperty(std::move(property_name), {bytes_values...});
-  }
-
   // Takes a property name and any number of document values.
   template <typename... V>
   DocumentBuilder& AddDocumentProperty(std::string property_name,
@@ -156,14 +126,6 @@ class DocumentBuilder {
     return AddDocumentProperty(std::move(property_name), {document_values...});
   }
 
-  // Takes a custom property name and any number of document values.
-  template <typename... V>
-  DocumentBuilder& AddCustomDocumentProperty(std::string property_name,
-                                             V&&... document_values) {
-    return AddCustomDocumentProperty(std::move(property_name),
-                                     {document_values...});
-  }
-
   DocumentProto Build() const { return document_; }
 
  private:
@@ -180,37 +142,6 @@ class DocumentBuilder {
     return *this;
   }
 
-  DocumentBuilder& AddCustomStringProperty(
-      std::string property_name,
-      std::initializer_list<std::string_view> string_values) {
-    auto custom_property = document_.add_custom_properties();
-    custom_property->set_name(std::move(property_name));
-    for (std::string_view string_value : string_values) {
-      custom_property->mutable_string_values()->Add(std::string(string_value));
-    }
-    return *this;
-  }
-
-  DocumentBuilder& AddInt64Property(
-      std::string property_name, std::initializer_list<int64_t> int64_values) {
-    auto property = document_.add_properties();
-    property->set_name(std::move(property_name));
-    for (int64_t int64_value : int64_values) {
-      property->mutable_int64_values()->Add(int64_value);
-    }
-    return *this;
-  }
-
-  DocumentBuilder& AddCustomInt64Property(
-      std::string property_name, std::initializer_list<int64_t> int64_values) {
-    auto custom_property = document_.add_custom_properties();
-    custom_property->set_name(std::move(property_name));
-    for (int64_t int64_value : int64_values) {
-      custom_property->mutable_int64_values()->Add(int64_value);
-    }
-    return *this;
-  }
-
   DocumentBuilder& AddDoubleProperty(
       std::string property_name, std::initializer_list<double> double_values) {
     auto property = document_.add_properties();
@@ -221,16 +152,6 @@ class DocumentBuilder {
     return *this;
   }
 
-  DocumentBuilder& AddCustomDoubleProperty(
-      std::string property_name, std::initializer_list<double> double_values) {
-    auto custom_property = document_.add_custom_properties();
-    custom_property->set_name(std::move(property_name));
-    for (double double_value : double_values) {
-      custom_property->mutable_double_values()->Add(double_value);
-    }
-    return *this;
-  }
-
   DocumentBuilder& AddBooleanProperty(
       std::string property_name, std::initializer_list<bool> boolean_values) {
     auto property = document_.add_properties();
@@ -241,16 +162,6 @@ class DocumentBuilder {
     return *this;
   }
 
-  DocumentBuilder& AddCustomBooleanProperty(
-      std::string property_name, std::initializer_list<bool> boolean_values) {
-    auto custom_property = document_.add_custom_properties();
-    custom_property->set_name(std::move(property_name));
-    for (bool boolean_value : boolean_values) {
-      custom_property->mutable_boolean_values()->Add(boolean_value);
-    }
-    return *this;
-  }
-
   DocumentBuilder& AddBytesProperty(
       std::string property_name,
       std::initializer_list<std::string> bytes_values) {
@@ -262,17 +173,6 @@ class DocumentBuilder {
     return *this;
   }
 
-  DocumentBuilder& AddCustomBytesProperty(
-      std::string property_name,
-      std::initializer_list<std::string> bytes_values) {
-    auto custom_property = document_.add_custom_properties();
-    custom_property->set_name(std::move(property_name));
-    for (const std::string& bytes_value : bytes_values) {
-      custom_property->mutable_bytes_values()->Add(std::string(bytes_value));
-    }
-    return *this;
-  }
-
   DocumentBuilder& AddDocumentProperty(
       std::string property_name,
       std::initializer_list<DocumentProto> document_values) {
@@ -283,18 +183,6 @@ class DocumentBuilder {
     }
     return *this;
   }
-
-  DocumentBuilder& AddCustomDocumentProperty(
-      std::string property_name,
-      std::initializer_list<DocumentProto> document_values) {
-    auto custom_property = document_.add_custom_properties();
-    custom_property->set_name(std::move(property_name));
-    for (DocumentProto document_value : document_values) {
-      custom_property->mutable_document_values()->Add(
-          std::move(document_value));
-    }
-    return *this;
-  }
 };
 
 }  // namespace lib
diff --git a/icing/file/destructible-directory.h b/icing/file/destructible-directory.h
new file mode 100644
index 0000000..9a8bd4b
--- /dev/null
+++ b/icing/file/destructible-directory.h
@@ -0,0 +1,74 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_DESTRUCTIBLE_DIRECTORY_H_
+#define ICING_FILE_DESTRUCTIBLE_DIRECTORY_H_
+
+#include "icing/file/filesystem.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+// A convenient RAII class which will recursively create the directory at the
+// specified file path and delete it upon destruction.
+class DestructibleDirectory {
+ public:
+  explicit DestructibleDirectory(const Filesystem* filesystem, std::string dir)
+      : filesystem_(filesystem), dir_(std::move(dir)) {
+    is_valid_ = filesystem_->CreateDirectoryRecursively(dir_.c_str());
+  }
+
+  DestructibleDirectory(const DestructibleDirectory&) = delete;
+  DestructibleDirectory& operator=(const DestructibleDirectory&) = delete;
+
+  DestructibleDirectory(DestructibleDirectory&& rhs)
+      : filesystem_(nullptr), is_valid_(false) {
+    Swap(rhs);
+  }
+
+  DestructibleDirectory& operator=(DestructibleDirectory&& rhs) {
+    Swap(rhs);
+    return *this;
+  }
+
+  ~DestructibleDirectory() {
+    if (filesystem_ != nullptr &&
+        !filesystem_->DeleteDirectoryRecursively(dir_.c_str())) {
+      // Swallow deletion failures as there's nothing actionable to do about
+      // them.
+      ICING_LOG(WARNING) << "Unable to delete temporary directory: " << dir_;
+    }
+  }
+
+  const std::string& dir() const { return dir_; }
+
+  bool is_valid() const { return is_valid_; }
+
+ private:
+  void Swap(DestructibleDirectory& other) {
+    std::swap(filesystem_, other.filesystem_);
+    std::swap(dir_, other.dir_);
+    std::swap(is_valid_, other.is_valid_);
+  }
+
+  const Filesystem* filesystem_;
+  std::string dir_;
+  bool is_valid_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_DESTRUCTIBLE_DIRECTORY_H_
diff --git a/icing/file/destructible-directory_test.cc b/icing/file/destructible-directory_test.cc
new file mode 100644
index 0000000..dae74ff
--- /dev/null
+++ b/icing/file/destructible-directory_test.cc
@@ -0,0 +1,118 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/destructible-directory.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+
+TEST(DestructibleDirectoryTest, DeletesDirectoryProperly) {
+  Filesystem filesystem;
+  std::string dir_path = GetTestTempDir() + "/dir1";
+  std::string file_path = dir_path + "/file1";
+
+  {
+    // 1. Create a file in the directory.
+    ASSERT_TRUE(filesystem.CreateDirectoryRecursively(dir_path.c_str()));
+    ScopedFd sfd(filesystem.OpenForWrite(file_path.c_str()));
+    ASSERT_TRUE(sfd.is_valid());
+    int i = 127;
+    ASSERT_TRUE(filesystem.Write(sfd.get(), &i, sizeof(i)));
+  }
+
+  {
+    // 2. Open the directory with a DestructibleDirectory
+    DestructibleDirectory destructible(&filesystem, dir_path);
+    EXPECT_TRUE(destructible.is_valid());
+    EXPECT_THAT(destructible.dir(), Eq(dir_path));
+  }
+
+  // 3. Ensure that the file and directory don't exist.
+  EXPECT_FALSE(filesystem.FileExists(file_path.c_str()));
+  EXPECT_FALSE(filesystem.DirectoryExists(dir_path.c_str()));
+}
+
+TEST(DestructibleDirectoryTest, MoveAssignDeletesDirectoryProperly) {
+  Filesystem filesystem;
+  std::string filepath1 = GetTestTempDir() + "/dir1";
+  std::string filepath2 = GetTestTempDir() + "/dir2";
+
+  // 1. Create dir1
+  DestructibleDirectory destructible1(&filesystem, filepath1);
+  ASSERT_TRUE(destructible1.is_valid());
+  ASSERT_TRUE(filesystem.DirectoryExists(filepath1.c_str()));
+
+  {
+    // 2. Create dir2
+    DestructibleDirectory destructible2(&filesystem, filepath2);
+    ASSERT_TRUE(destructible2.is_valid());
+
+    // Move assign destructible2 into destructible1
+    destructible1 = std::move(destructible2);
+  }
+
+  // 3. dir1 shouldn't exist because it was destroyed when destructible1 was
+  // move assigned to.
+  EXPECT_FALSE(filesystem.DirectoryExists(filepath1.c_str()));
+
+  // 4. dir2 should still exist because it moved into destructible1 from
+  // destructible2.
+  EXPECT_TRUE(filesystem.DirectoryExists(filepath2.c_str()));
+}
+
+TEST(DestructibleDirectoryTest, MoveConstructionDeletesDirectoryProperly) {
+  Filesystem filesystem;
+  std::string filepath1 = GetTestTempDir() + "/dir1";
+
+  // 1. Create destructible1, it'll be reconstructed soon anyways.
+  std::unique_ptr<DestructibleDirectory> destructible1;
+  {
+    // 2. Create file1
+    DestructibleDirectory destructible2(&filesystem, filepath1);
+    ASSERT_TRUE(destructible2.is_valid());
+
+    // Move construct destructible1 from destructible2
+    destructible1 =
+        std::make_unique<DestructibleDirectory>(std::move(destructible2));
+  }
+
+  // 3. dir1 should still exist because it moved into destructible1 from
+  // destructible2.
+  EXPECT_TRUE(destructible1->is_valid());
+  EXPECT_TRUE(filesystem.DirectoryExists(filepath1.c_str()));
+
+  {
+    // 4. Move construct destructible3 from destructible1
+    DestructibleDirectory destructible3(std::move(*destructible1));
+    EXPECT_TRUE(destructible3.is_valid());
+  }
+
+  // 5. dir1 shouldn't exist because it was destroyed when destructible3 was
+  // destroyed.
+  EXPECT_FALSE(filesystem.DirectoryExists(filepath1.c_str()));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/destructible-file.h b/icing/file/destructible-file.h
new file mode 100644
index 0000000..006dcb4
--- /dev/null
+++ b/icing/file/destructible-file.h
@@ -0,0 +1,72 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_DESTRUCTIBLE_FILE_H_
+#define ICING_FILE_DESTRUCTIBLE_FILE_H_
+
+#include <unistd.h>
+
+#include <string>
+
+#include "icing/file/filesystem.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+// A convenient RAII class which will open the specified file path for write and
+// delete the underlying file upon destruction.
+class DestructibleFile {
+ public:
+  explicit DestructibleFile(const std::string& filepath,
+                            const Filesystem* filesystem)
+      : filesystem_(filesystem), filepath_(filepath) {
+    fd_ = filesystem_->OpenForWrite(filepath_.c_str());
+  }
+
+  DestructibleFile(const DestructibleFile&) = delete;
+  DestructibleFile(DestructibleFile&& other) : filesystem_(nullptr), fd_(-1) {
+    *this = std::move(other);
+  }
+
+  DestructibleFile& operator=(const DestructibleFile&) = delete;
+  DestructibleFile& operator=(DestructibleFile&& other) {
+    std::swap(fd_, other.fd_);
+    std::swap(filesystem_, other.filesystem_);
+    std::swap(filepath_, other.filepath_);
+    return *this;
+  }
+
+  ~DestructibleFile() {
+    if (is_valid()) {
+      close(fd_);
+      if (!filesystem_->DeleteFile(filepath_.c_str())) {
+        ICING_VLOG(1) << "Failed to delete file " << filepath_;
+      }
+    }
+  }
+
+  bool is_valid() const { return fd_ >= 0; }
+  int get_fd() const { return fd_; }
+
+ private:
+  const Filesystem* filesystem_;
+  std::string filepath_;
+  int fd_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_DESTRUCTIBLE_FILE_H_
diff --git a/icing/file/destructible-file_test.cc b/icing/file/destructible-file_test.cc
new file mode 100644
index 0000000..61316d1
--- /dev/null
+++ b/icing/file/destructible-file_test.cc
@@ -0,0 +1,117 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/destructible-file.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+TEST(DestructibleFileTest, DeletesFileProperly) {
+  Filesystem filesystem;
+  std::string filepath1 = GetTestTempDir() + "/file1";
+
+  {
+    // 1. Create the file
+    ScopedFd sfd(filesystem.OpenForWrite(filepath1.c_str()));
+    ASSERT_TRUE(sfd.is_valid());
+    int i = 127;
+    ASSERT_TRUE(filesystem.Write(sfd.get(), &i, sizeof(i)));
+  }
+
+  {
+    // 2. Open with a Destructible file.
+    DestructibleFile destructible(filepath1, &filesystem);
+    ASSERT_TRUE(destructible.is_valid());
+  }
+
+  // 3. Ensure that the file doesn't exist.
+  EXPECT_FALSE(filesystem.FileExists(filepath1.c_str()));
+}
+
+TEST(DestructibleFileTest, MoveAssignDeletesFileProperly) {
+  Filesystem filesystem;
+  std::string filepath1 = GetTestTempDir() + "/file1";
+  std::string filepath2 = GetTestTempDir() + "/file2";
+
+  // 1. Create file1
+  DestructibleFile destructible1(filepath1, &filesystem);
+  ASSERT_TRUE(destructible1.is_valid());
+  int i = 127;
+  ASSERT_TRUE(filesystem.Write(destructible1.get_fd(), &i, sizeof(i)));
+
+  {
+    // 2. Create file2
+    DestructibleFile destructible2(filepath2, &filesystem);
+    ASSERT_TRUE(destructible2.is_valid());
+    i = 458;
+    ASSERT_TRUE(filesystem.Write(destructible2.get_fd(), &i, sizeof(i)));
+
+    // Move assign destructible2 into destructible1
+    destructible1 = std::move(destructible2);
+  }
+
+  // 3. file1 shouldn't exist because it was destroyed when destructible1 was
+  // move assigned to.
+  EXPECT_FALSE(filesystem.FileExists(filepath1.c_str()));
+
+  // 4. file2 should still exist because it moved into destructible1 from
+  // destructible2.
+  EXPECT_TRUE(filesystem.FileExists(filepath2.c_str()));
+}
+
+TEST(DestructibleFileTest, MoveConstructionDeletesFileProperly) {
+  Filesystem filesystem;
+  std::string filepath1 = GetTestTempDir() + "/file1";
+
+  // 1. Create destructible1, it'll be reconstructed soon anyways.
+  std::unique_ptr<DestructibleFile> destructible1;
+  {
+    // 2. Create file1
+    DestructibleFile destructible2(filepath1, &filesystem);
+    ASSERT_TRUE(destructible2.is_valid());
+    int i = 458;
+    ASSERT_TRUE(filesystem.Write(destructible2.get_fd(), &i, sizeof(i)));
+
+    // Move construct destructible1 from destructible2
+    destructible1 =
+        std::make_unique<DestructibleFile>(std::move(destructible2));
+  }
+
+  // 3. file1 should still exist because it moved into destructible1 from
+  // destructible2.
+  ASSERT_TRUE(destructible1->is_valid());
+  EXPECT_TRUE(filesystem.FileExists(filepath1.c_str()));
+
+  {
+    // 4. Move construct destructible3 from destructible1
+    DestructibleFile destructible3(std::move(*destructible1));
+    ASSERT_TRUE(destructible3.is_valid());
+  }
+
+  // 5. file1 shouldn't exist because it was destroyed when destructible3 was
+  // destroyed.
+  EXPECT_FALSE(filesystem.FileExists(filepath1.c_str()));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/file-backed-bitmap.cc b/icing/file/file-backed-bitmap.cc
index f1e568c..bdcfc79 100644
--- a/icing/file/file-backed-bitmap.cc
+++ b/icing/file/file-backed-bitmap.cc
@@ -47,10 +47,14 @@ FileBackedBitmap::Create(const Filesystem* filesystem,
         "mmap strategy.");
   }
 
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile mmapper,
+      MemoryMappedFile::Create(*filesystem, file_path, mmap_strategy));
+
   auto bitmap = std::unique_ptr<FileBackedBitmap>(
-      new FileBackedBitmap(filesystem, file_path, mmap_strategy));
+      new FileBackedBitmap(filesystem, file_path, std::move(mmapper)));
 
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = bitmap->Initialize();
   if (!status.ok()) {
@@ -62,10 +66,10 @@ FileBackedBitmap::Create(const Filesystem* filesystem,
 
 FileBackedBitmap::FileBackedBitmap(const Filesystem* filesystem,
                                    std::string_view file_path,
-                                   MemoryMappedFile::Strategy mmap_strategy)
+                                   MemoryMappedFile&& mmapper)
     : filesystem_(filesystem),
       file_path_(file_path),
-      mmapper_(new MemoryMappedFile(*filesystem, file_path, mmap_strategy)) {}
+      mmapper_(std::make_unique<MemoryMappedFile>(std::move(mmapper))) {}
 
 FileBackedBitmap::~FileBackedBitmap() {
   // Only update if we have auto_sync setup, otherwise the checksum will be
@@ -122,7 +126,7 @@ libtextclassifier3::Status FileBackedBitmap::FileBackedBitmap::Initialize() {
                   << " of size: " << file_size;
   }
 
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = mmapper_->Remap(0, file_size);
   if (!status.ok()) {
@@ -198,7 +202,7 @@ int FileBackedBitmap::NumBits() const {
 libtextclassifier3::Status FileBackedBitmap::Set(int bit_index,
                                                  bool bit_value) {
   if (bit_index >= NumBits()) {
-    // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+    // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
     // that can support error logging.
     libtextclassifier3::Status status = GrowTo(bit_index);
     if (!status.ok()) {
@@ -261,7 +265,7 @@ libtextclassifier3::Status FileBackedBitmap::GrowTo(int new_num_bits) {
                                       file_path_.c_str(), new_file_size));
   }
 
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
   if (!status.ok()) {
@@ -269,8 +273,8 @@ libtextclassifier3::Status FileBackedBitmap::GrowTo(int new_num_bits) {
     return status;
   }
 
-  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-      "Grew file %s to new size %zd", file_path_.c_str(), new_file_size);
+  ICING_VLOG(1) << "Grew file " << file_path_ << " to new size "
+                << new_file_size;
   mutable_header()->state = Header::ChecksumState::kStale;
   return libtextclassifier3::Status::OK;
 }
@@ -281,7 +285,7 @@ libtextclassifier3::Status FileBackedBitmap::TruncateTo(int new_num_bits) {
   }
 
   const size_t new_file_size = FileSizeForBits(new_num_bits);
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
   if (!status.ok()) {
diff --git a/icing/file/file-backed-bitmap.h b/icing/file/file-backed-bitmap.h
index e3d98ad..beba14e 100644
--- a/icing/file/file-backed-bitmap.h
+++ b/icing/file/file-backed-bitmap.h
@@ -175,8 +175,9 @@ class FileBackedBitmap {
   Header* mutable_header();
 
   // Use FileBackedBitmap::Create() to instantiate.
-  FileBackedBitmap(const Filesystem* filesystem, std::string_view file_path,
-                   MemoryMappedFile::Strategy mmap_strategy);
+  explicit FileBackedBitmap(const Filesystem* filesystem,
+                            std::string_view file_path,
+                            MemoryMappedFile&& mmapper);
 
   // Verify the contents of the bitmap and get ready for read/write operations.
   //
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 62943b8..095f832 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -14,16 +14,14 @@
 
 // File-backed log of protos with append-only writes and position based reads.
 //
-// There should only be one instance of a FileBackedProtoLog of the same file at
-// a time; using multiple instances at the same time may lead to undefined
-// behavior.
+// The implementation in this file is deprecated and replaced by
+// portable-file-backed-proto-log.h.
 //
-// The entire checksum is computed on initialization to verify the contents are
-// valid. On failure, the log will be truncated to the last verified state when
-// PersistToDisk() was called. If the log cannot successfully restore the last
-// state due to disk corruption or some other inconsistency, then the entire log
-// will be lost.
+// This deprecated implementation has been made read-only for the purposes of
+// migration; writing and erasing this format of log is no longer supported and
+// the methods to accomplish this have been removed.
 //
+// The details of this format follow below:
 // Each proto written to the file will have a metadata written just before it.
 // The metadata consists of
 //   {
@@ -31,49 +29,30 @@
 //     3 bytes of the proto size
 //     n bytes of the proto itself
 //   }
-//
-// Example usage:
-//   ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
-//       FileBackedProtoLog<DocumentProto>::Create(filesystem, file_path_,
-//                                                  options));
-//   auto proto_log = create_result.proto_log;
-//
-//   Document document;
-//   document.set_namespace("com.google.android.example");
-//   document.set_uri("www.google.com");
-//
-//   int64_t document_offset = proto_log->WriteProto(document));
-//   Document same_document = proto_log->ReadProto(document_offset));
-//   proto_log->PersistToDisk();
-//
 // TODO(b/136514769): Add versioning to the header and a UpgradeToVersion
 // migration method.
-
 #ifndef ICING_FILE_FILE_BACKED_PROTO_LOG_H_
 #define ICING_FILE_FILE_BACKED_PROTO_LOG_H_
 
-#include <cstddef>
 #include <cstdint>
-#include <cstring>
 #include <memory>
 #include <string>
 #include <string_view>
-#include <utility>
-#include <vector>
 
-#include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include <google/protobuf/io/gzip_stream.h>
-#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/absl_ports/str_cat.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/memory-mapped-file.h"
 #include "icing/legacy/core/icing-string-util.h"
+#include "icing/portable/gzip_stream.h"
+#include "icing/portable/platform.h"
 #include "icing/portable/zlib.h"
 #include "icing/util/crc32.h"
+#include "icing/util/data-loss.h"
 #include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
 
 namespace icing {
 namespace lib {
@@ -110,10 +89,6 @@ class FileBackedProtoLog {
 
   // Header stored at the beginning of the file before the rest of the log
   // contents. Stores metadata on the log.
-  //
-  // TODO(b/139375388): Migrate the Header struct to a proto. This makes
-  // migrations easier since we don't need to worry about different size padding
-  // (which would affect the checksum) and different endians.
   struct Header {
     static constexpr int32_t kMagic = 0xf4c6f67a;
 
@@ -151,11 +126,15 @@ class FileBackedProtoLog {
     // A successfully initialized log.
     std::unique_ptr<FileBackedProtoLog<ProtoT>> proto_log;
 
-    // Whether there was some data loss while initializing from a previous
-    // state. This can happen if the file is corrupted or some previously added
-    // data was unpersisted. This may be used to signal that any derived data
-    // off of the proto log may need to be regenerated.
-    bool data_loss;
+    // The data status after initializing from a previous state. Data loss can
+    // happen if the file is corrupted or some previously added data was
+    // unpersisted. This may be used to signal that any derived data off of the
+    // proto log may need to be regenerated.
+    DataLoss data_loss;
+
+    bool has_data_loss() {
+      return data_loss == DataLoss::PARTIAL || data_loss == DataLoss::COMPLETE;
+    }
   };
 
   // Factory method to create, initialize, and return a FileBackedProtoLog. Will
@@ -166,9 +145,10 @@ class FileBackedProtoLog {
   // log saves these checkpointed "good" states when PersistToDisk() is called
   // or the log is safely destructed. If the log rewinds successfully to the
   // last-good state, then the returned CreateResult.data_loss indicates
-  // there was some data loss so that any derived data may know that it
-  // needs to be updated. If the log re-initializes successfully without any
-  // data loss, the boolean will be false.
+  // whether it has a data loss and what kind of data loss it is (partial or
+  // complete) so that any derived data may know that it needs to be updated. If
+  // the log re-initializes successfully without any data loss,
+  // CreateResult.data_loss will be NONE.
   //
   // Params:
   //   filesystem: Handles system level calls
@@ -188,45 +168,15 @@ class FileBackedProtoLog {
   FileBackedProtoLog(const FileBackedProtoLog&) = delete;
   FileBackedProtoLog& operator=(const FileBackedProtoLog&) = delete;
 
-  // This will update the checksum of the log as well.
-  ~FileBackedProtoLog();
-
-  // Writes the serialized proto to the underlying file. Writes are applied
-  // directly to the underlying file. Users do not need to sync the file after
-  // writing.
-  //
-  // Returns:
-  //   Offset of the newly appended proto in file on success
-  //   INVALID_ARGUMENT if proto is too large, as decided by
-  //     Options.max_proto_size
-  //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<int64_t> WriteProto(const ProtoT& proto);
-
   // Reads out a proto located at file_offset from the file.
   //
   // Returns:
   //   A proto on success
+  //   NOT_FOUND if the proto at the given offset has been erased
   //   OUT_OF_RANGE_ERROR if file_offset exceeds file size
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::StatusOr<ProtoT> ReadProto(int64_t file_offset) const;
 
-  // Calculates and returns the disk usage in bytes. Rounds up to the nearest
-  // block size.
-  //
-  // Returns:
-  //   Disk usage on success
-  //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
-
-  // Returns the file size of all the elements held in the log. File size is in
-  // bytes. This excludes the size of any internal metadata of the log, e.g. the
-  // log's header.
-  //
-  // Returns:
-  //   File size on success
-  //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
-
   // An iterator helping to find offsets of all the protos in file.
   // Example usage:
   //
@@ -236,10 +186,11 @@ class FileBackedProtoLog {
   // }
   class Iterator {
    public:
-    Iterator(const Filesystem& filesystem, const std::string& file_path,
-             int64_t initial_offset);
+    explicit Iterator(const Filesystem& filesystem,
+                      const std::string& file_path, int64_t initial_offset,
+                      MemoryMappedFile&& mmapped_file);
 
-    // Advances to the position of next proto.
+    // Advances to the position of next proto whether it has been erased or not.
     //
     // Returns:
     //   OK on success
@@ -263,73 +214,7 @@ class FileBackedProtoLog {
   // Returns an iterator of current proto log. The caller needs to keep the
   // proto log unchanged while using the iterator, otherwise unexpected
   // behaviors could happen.
-  Iterator GetIterator();
-
-  // Persists all changes since initialization or the last call to
-  // PersistToDisk(). Any changes that aren't persisted may be lost if the
-  // system fails to close safely.
-  //
-  // Example use case:
-  //
-  //   Document document;
-  //   document.set_namespace("com.google.android.example");
-  //   document.set_uri("www.google.com");
-  //
-  //   {
-  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
-  //         FileBackedProtoLog<DocumentProto>::Create(filesystem, file_path,
-  //                                                    options));
-  //     auto proto_log = std::move(create_result.proto_log);
-  //
-  //     int64_t document_offset = proto_log->WriteProto(document));
-  //
-  //     // We lose the document here since it wasn't persisted.
-  //     // *SYSTEM CRASH*
-  //   }
-  //
-  //   {
-  //     // Can still successfully create after a crash since the log can
-  //     // rewind/truncate to recover into a previously good state
-  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
-  //         FileBackedProtoLog<DocumentProto>::Create(filesystem, file_path,
-  //                                                    options));
-  //     auto proto_log = std::move(create_result.proto_log);
-  //
-  //     // Lost the proto since we didn't PersistToDisk before the crash
-  //     proto_log->ReadProto(document_offset)); // INVALID_ARGUMENT error
-  //
-  //     int64_t document_offset = proto_log->WriteProto(document));
-  //
-  //     // Persisted this time, so we should be ok.
-  //     ICING_ASSERT_OK(proto_log->PersistToDisk());
-  //   }
-  //
-  //   {
-  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
-  //         FileBackedProtoLog<DocumentProto>::Create(filesystem, file_path,
-  //                                                    options));
-  //     auto proto_log = std::move(create_result.proto_log);
-  //
-  //     // SUCCESS
-  //     Document same_document = proto_log->ReadProto(document_offset));
-  //   }
-  //
-  // NOTE: Since all protos are already written to the file directly, this
-  // just updates the checksum and rewind position. Without these updates,
-  // future initializations will truncate the file and discard unpersisted
-  // changes.
-  //
-  // Returns:
-  //   OK on success
-  //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status PersistToDisk();
-
-  // Calculates the checksum of the log contents. Excludes the header content.
-  //
-  // Returns:
-  //   Crc of the log content
-  //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
+  libtextclassifier3::StatusOr<Iterator> GetIterator();
 
  private:
   // Object can only be instantiated via the ::Create factory.
@@ -339,7 +224,7 @@ class FileBackedProtoLog {
   // Initializes a new proto log.
   //
   // Returns:
-  //   std::unique_ptr<FileBackedProtoLog> that can be used immediately
+  //   std::unique_ptr<CreateResult> on success
   //   INTERNAL_ERROR on IO error
   static libtextclassifier3::StatusOr<CreateResult> InitializeNewFile(
       const Filesystem* filesystem, const std::string& file_path,
@@ -350,7 +235,7 @@ class FileBackedProtoLog {
   // content will be lost.
   //
   // Returns:
-  //   std::unique_ptr<FileBackedProtoLog> that can be used immediately
+  //   std::unique_ptr<CreateResult> on success
   //   INTERNAL_ERROR on IO error or internal inconsistencies in the file
   //   INVALID_ARGUMENT_ERROR if options aren't consistent with previous
   //     instances
@@ -369,6 +254,28 @@ class FileBackedProtoLog {
       const Filesystem* filesystem, const std::string& file_path,
       Crc32 initial_crc, int64_t start, int64_t end);
 
+  static bool IsEmptyBuffer(const char* buffer, int size) {
+    return std::all_of(buffer, buffer + size,
+                       [](const char byte) { return byte == 0; });
+  }
+
+  // Helper function to get stored proto size from the metadata.
+  // Metadata format: 8 bits magic + 24 bits size
+  static int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
+
+  // Helper function to get stored proto magic from the metadata.
+  // Metadata format: 8 bits magic + 24 bits size
+  static uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
+
+  // Reads out the metadata of a proto located at file_offset from the file.
+  //
+  // Returns:
+  //   Proto's metadata on success
+  //   OUT_OF_RANGE_ERROR if file_offset exceeds file_size
+  //   INTERNAL_ERROR if the metadata is invalid or any IO errors happen
+  static libtextclassifier3::StatusOr<int> ReadProtoMetadata(
+      MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
+
   // Magic number added in front of every proto. Used when reading out protos
   // as a first check for corruption in each entry in the file. Even if there is
   // a corruption, the best we can do is roll back to our last recovery point
@@ -386,31 +293,17 @@ class FileBackedProtoLog {
   static_assert(kMaxProtoSize <= 0x00FFFFFF,
                 "kMaxProtoSize doesn't fit in 3 bytes");
 
-  // Level of compression, BEST_SPEED = 1, BEST_COMPRESSION = 9
-  static constexpr int kDeflateCompressionLevel = 3;
-
   // Chunks of the file to mmap at a time, so we don't mmap the entire file.
-  static constexpr int kMmapChunkSize = 4 * 1024;
+  // Only used on 32-bit devices
+  static constexpr int kMmapChunkSize = 4 * 1024 * 1024;  // 4MiB
 
   ScopedFd fd_;
   const Filesystem* const filesystem_;
   const std::string file_path_;
-
-  // Reads out the metadata of a proto located at file_offset from the file.
-  //
-  // Returns:
-  //   Proto's metadata on success
-  //   OUT_OF_RANGE_ERROR if file_offset exceeds file_size
-  //   INTERNAL_ERROR if the metadata is invalid or any IO errors happen
-  static libtextclassifier3::StatusOr<int> ReadProtoMetadata(
-      MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
   std::unique_ptr<Header> header_;
 };
 
 template <typename ProtoT>
-constexpr uint8_t FileBackedProtoLog<ProtoT>::kProtoMagic;
-
-template <typename ProtoT>
 FileBackedProtoLog<ProtoT>::FileBackedProtoLog(const Filesystem* filesystem,
                                                const std::string& file_path,
                                                std::unique_ptr<Header> header)
@@ -421,15 +314,6 @@ FileBackedProtoLog<ProtoT>::FileBackedProtoLog(const Filesystem* filesystem,
 }
 
 template <typename ProtoT>
-FileBackedProtoLog<ProtoT>::~FileBackedProtoLog() {
-  if (!PersistToDisk().ok()) {
-    ICING_LOG(WARNING)
-        << "Error persisting to disk during destruction of FileBackedProtoLog: "
-        << file_path_;
-  }
-}
-
-template <typename ProtoT>
 libtextclassifier3::StatusOr<typename FileBackedProtoLog<ProtoT>::CreateResult>
 FileBackedProtoLog<ProtoT>::Create(const Filesystem* filesystem,
                                    const std::string& file_path,
@@ -485,7 +369,7 @@ FileBackedProtoLog<ProtoT>::InitializeNewFile(const Filesystem* filesystem,
       std::unique_ptr<FileBackedProtoLog<ProtoT>>(
           new FileBackedProtoLog<ProtoT>(filesystem, file_path,
                                          std::move(header))),
-      /*data_loss=*/false};
+      /*data_loss=*/DataLoss::NONE};
 
   return create_result;
 }
@@ -535,15 +419,15 @@ FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem,
   }
   header->max_proto_size = options.max_proto_size;
 
-  bool data_loss = false;
+  DataLoss data_loss = DataLoss::NONE;
   ICING_ASSIGN_OR_RETURN(Crc32 calculated_log_checksum,
                          ComputeChecksum(filesystem, file_path, Crc32(),
                                          sizeof(Header), file_size));
+
   // Double check that the log checksum is the same as the one that was
   // persisted last time. If not, we start recovery logic.
   if (header->log_checksum != calculated_log_checksum.Get()) {
-    // Need to rewind the proto log since the checksums don't match
-    data_loss = true;
+    // Need to rewind the proto log since the checksums don't match.
     // Worst case, we have to rewind the entire log back to just the header
     int64_t last_known_good = sizeof(Header);
 
@@ -559,10 +443,12 @@ FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem,
       // Check if it matches our last rewind state. If so, this becomes our last
       // good state and we can safely truncate and recover from here.
       last_known_good = header->rewind_offset;
+      data_loss = DataLoss::PARTIAL;
     } else {
       // Otherwise, we're going to truncate the entire log and this resets the
       // checksum to an empty log state.
       header->log_checksum = 0;
+      data_loss = DataLoss::COMPLETE;
     }
 
     if (!filesystem->Truncate(file_path.c_str(), last_known_good)) {
@@ -570,8 +456,8 @@ FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem,
           absl_ports::StrCat("Error truncating file: ", file_path));
     }
 
-    ICING_LOG(INFO) << "Truncated '" << file_path << "' to size "
-                    << last_known_good;
+    ICING_LOG(WARNING) << "Truncated '" << file_path << "' to size "
+                       << last_known_good;
   }
 
   CreateResult create_result = {
@@ -587,8 +473,10 @@ template <typename ProtoT>
 libtextclassifier3::StatusOr<Crc32> FileBackedProtoLog<ProtoT>::ComputeChecksum(
     const Filesystem* filesystem, const std::string& file_path,
     Crc32 initial_crc, int64_t start, int64_t end) {
-  auto mmapped_file = MemoryMappedFile(*filesystem, file_path,
-                                       MemoryMappedFile::Strategy::READ_ONLY);
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(*filesystem, file_path,
+                               MemoryMappedFile::Strategy::READ_ONLY));
   Crc32 new_crc(initial_crc.Get());
 
   if (start < 0) {
@@ -598,6 +486,14 @@ libtextclassifier3::StatusOr<Crc32> FileBackedProtoLog<ProtoT>::ComputeChecksum(
         file_path.c_str(), static_cast<long long>(start)));
   }
 
+  if (end < start) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Ending checksum offset of file '%s' must be greater than start "
+        "'%lld', was '%lld'",
+        file_path.c_str(), static_cast<long long>(start),
+        static_cast<long long>(end)));
+  }
+
   int64_t file_size = filesystem->GetFileSize(file_path.c_str());
   if (end > file_size) {
     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
@@ -607,103 +503,57 @@ libtextclassifier3::StatusOr<Crc32> FileBackedProtoLog<ProtoT>::ComputeChecksum(
         static_cast<long long>(end)));
   }
 
-  for (int i = start; i < end; i += kMmapChunkSize) {
-    // Don't read past the file size.
-    int next_chunk_size = kMmapChunkSize;
-    if ((i + kMmapChunkSize) >= end) {
-      next_chunk_size = end - i;
+  Architecture architecture = GetArchitecture();
+  switch (architecture) {
+    case Architecture::BIT_64: {
+      // Don't mmap in chunks here since mmapping can be harmful on 64-bit
+      // devices where mmap/munmap calls need the mmap write semaphore, which
+      // blocks mmap/munmap/mprotect and all page faults from executing while
+      // they run. On 64-bit devices, this doesn't actually load into memory, it
+      // just makes the file faultable. So the whole file should be ok.
+      // b/185822878.
+      ICING_RETURN_IF_ERROR(mmapped_file.Remap(start, end - start));
+      auto mmap_str = std::string_view(mmapped_file.region(), end - start);
+      new_crc.Append(mmap_str);
+      break;
     }
-
-    ICING_RETURN_IF_ERROR(mmapped_file.Remap(i, next_chunk_size));
-
-    auto mmap_str = std::string_view(mmapped_file.region(), next_chunk_size);
-    new_crc.Append(mmap_str);
-  }
-
-  return new_crc;
-}
-
-template <typename ProtoT>
-libtextclassifier3::StatusOr<int64_t> FileBackedProtoLog<ProtoT>::WriteProto(
-    const ProtoT& proto) {
-  int64_t proto_size = proto.ByteSizeLong();
-  int32_t metadata;
-  int metadata_size = sizeof(metadata);
-  int64_t current_position = filesystem_->GetCurrentPosition(fd_.get());
-
-  if (proto_size > header_->max_proto_size) {
-    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-        "proto_size, %lld, was too large to write. Max is %d",
-        static_cast<long long>(proto_size), header_->max_proto_size));
-  }
-
-  // At this point, we've guaranteed that proto_size is under kMaxProtoSize (see
-  // ::Create), so we can safely store it in an int.
-  int final_size = 0;
-
-  std::string proto_str;
-  google::protobuf::io::StringOutputStream proto_stream(&proto_str);
-
-  if (header_->compress) {
-    google::protobuf::io::GzipOutputStream::Options options;
-    options.format = google::protobuf::io::GzipOutputStream::ZLIB;
-    options.compression_level = kDeflateCompressionLevel;
-
-    google::protobuf::io::GzipOutputStream compressing_stream(&proto_stream,
-                                                                  options);
-
-    bool success = proto.SerializeToZeroCopyStream(&compressing_stream) &&
-                   compressing_stream.Close();
-
-    if (!success) {
-      return absl_ports::InternalError("Error compressing proto.");
+    case Architecture::BIT_32:
+      [[fallthrough]];
+    case Architecture::UNKNOWN: {
+      // 32-bit devices only have 4GB of RAM. Mmap in chunks to not use up too
+      // much memory at once. If we're unknown, then also chunk it because we're
+      // not sure what the device can handle.
+      for (int i = start; i < end; i += kMmapChunkSize) {
+        // Don't read past the file size.
+        int next_chunk_size = kMmapChunkSize;
+        if ((i + kMmapChunkSize) >= end) {
+          next_chunk_size = end - i;
+        }
+
+        ICING_RETURN_IF_ERROR(mmapped_file.Remap(i, next_chunk_size));
+
+        auto mmap_str =
+            std::string_view(mmapped_file.region(), next_chunk_size);
+        new_crc.Append(mmap_str);
+      }
+      break;
     }
-
-    final_size = proto_str.size();
-
-    // In case the compressed proto is larger than the original proto, we also
-    // can't write it.
-    if (final_size > header_->max_proto_size) {
-      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-          "Compressed proto size, %d, was greater than "
-          "max_proto_size, %d",
-          final_size, header_->max_proto_size));
-    }
-  } else {
-    // Serialize the proto directly into the write buffer at an offset of the
-    // metadata.
-    proto.SerializeToZeroCopyStream(&proto_stream);
-    final_size = proto_str.size();
-  }
-
-  // 1st byte for magic, next 3 bytes for proto size.
-  metadata = (kProtoMagic << 24) | final_size;
-
-  // Actually write metadata, has to be done after we know the possibly
-  // compressed proto size
-  if (!filesystem_->Write(fd_.get(), &metadata, metadata_size)) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Failed to write proto metadata to: ", file_path_));
-  }
-
-  // Write the serialized proto
-  if (!filesystem_->Write(fd_.get(), proto_str.data(), proto_str.size())) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Failed to write proto to: ", file_path_));
   }
 
-  return current_position;
+  return new_crc;
 }
 
 template <typename ProtoT>
 libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
     int64_t file_offset) const {
   int64_t file_size = filesystem_->GetFileSize(fd_.get());
-  MemoryMappedFile mmapped_file(*filesystem_, file_path_,
-                                MemoryMappedFile::Strategy::READ_ONLY);
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(*filesystem_, file_path_,
+                               MemoryMappedFile::Strategy::READ_ONLY));
   if (file_offset >= file_size) {
-    // file_size points to the next byte to write at, so subtract one to get the
-    // inclusive, actual size of file.
+    // file_size points to the next byte to write at, so subtract one to get
+    // the inclusive, actual size of file.
     return absl_ports::OutOfRangeError(
         IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
                                       "out of range of the file size, %lld",
@@ -716,17 +566,22 @@ libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
       int metadata, ReadProtoMetadata(&mmapped_file, file_offset, file_size));
 
   // Copy out however many bytes it says the proto is
-  int stored_size = metadata & 0x00FFFFFF;
+  int stored_size = GetProtoSize(metadata);
 
   ICING_RETURN_IF_ERROR(
       mmapped_file.Remap(file_offset + sizeof(metadata), stored_size));
-  google::protobuf::io::ArrayInputStream proto_stream(
-      mmapped_file.mutable_region(), stored_size);
+
+  if (IsEmptyBuffer(mmapped_file.region(), mmapped_file.region_size())) {
+    return absl_ports::NotFoundError("The proto data has been erased.");
+  }
+
+  google::protobuf::io::ArrayInputStream proto_stream(mmapped_file.mutable_region(),
+                                            stored_size);
 
   // Deserialize proto
   ProtoT proto;
   if (header_->compress) {
-    google::protobuf::io::GzipInputStream decompress_stream(&proto_stream);
+    protobuf_ports::GzipInputStream decompress_stream(&proto_stream);
     proto.ParseFromZeroCopyStream(&decompress_stream);
   } else {
     proto.ParseFromZeroCopyStream(&proto_stream);
@@ -736,32 +591,11 @@ libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
 }
 
 template <typename ProtoT>
-libtextclassifier3::StatusOr<int64_t> FileBackedProtoLog<ProtoT>::GetDiskUsage()
-    const {
-  int64_t size = filesystem_->GetDiskUsage(file_path_.c_str());
-  if (size == Filesystem::kBadFileSize) {
-    return absl_ports::InternalError("Failed to get disk usage of proto log");
-  }
-  return size;
-}
-
-template <typename ProtoT>
-libtextclassifier3::StatusOr<int64_t>
-FileBackedProtoLog<ProtoT>::GetElementsFileSize() const {
-  int64_t total_file_size = filesystem_->GetFileSize(file_path_.c_str());
-  if (total_file_size == Filesystem::kBadFileSize) {
-    return absl_ports::InternalError(
-        "Failed to get file size of elments in the proto log");
-  }
-  return total_file_size - sizeof(Header);
-}
-
-template <typename ProtoT>
 FileBackedProtoLog<ProtoT>::Iterator::Iterator(const Filesystem& filesystem,
                                                const std::string& file_path,
-                                               int64_t initial_offset)
-    : mmapped_file_(filesystem, file_path,
-                    MemoryMappedFile::Strategy::READ_ONLY),
+                                               int64_t initial_offset,
+                                               MemoryMappedFile&& mmapped_file)
+    : mmapped_file_(std::move(mmapped_file)),
       initial_offset_(initial_offset),
       current_offset_(kInvalidOffset),
       file_size_(filesystem.GetFileSize(file_path.c_str())) {
@@ -781,8 +615,7 @@ libtextclassifier3::Status FileBackedProtoLog<ProtoT>::Iterator::Advance() {
     ICING_ASSIGN_OR_RETURN(
         int metadata,
         ReadProtoMetadata(&mmapped_file_, current_offset_, file_size_));
-    int proto_size = metadata & 0x00FFFFFF;
-    current_offset_ += sizeof(metadata) + proto_size;
+    current_offset_ += sizeof(metadata) + GetProtoSize(metadata);
   }
 
   if (current_offset_ < file_size_) {
@@ -801,9 +634,14 @@ int64_t FileBackedProtoLog<ProtoT>::Iterator::GetOffset() {
 }
 
 template <typename ProtoT>
-typename FileBackedProtoLog<ProtoT>::Iterator
+libtextclassifier3::StatusOr<typename FileBackedProtoLog<ProtoT>::Iterator>
 FileBackedProtoLog<ProtoT>::GetIterator() {
-  return Iterator(*filesystem_, file_path_, /*initial_offset=*/sizeof(Header));
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(*filesystem_, file_path_,
+                               MemoryMappedFile::Strategy::READ_ONLY));
+  return Iterator(*filesystem_, file_path_,
+                  /*initial_offset=*/sizeof(Header), std::move(mmapped_file));
 }
 
 template <typename ProtoT>
@@ -829,7 +667,7 @@ libtextclassifier3::StatusOr<int> FileBackedProtoLog<ProtoT>::ReadProtoMetadata(
   ICING_RETURN_IF_ERROR(mmapped_file->Remap(file_offset, metadata_size));
   memcpy(&metadata, mmapped_file->region(), metadata_size);
   // Checks magic number
-  uint8_t stored_k_proto_magic = metadata >> 24;
+  uint8_t stored_k_proto_magic = GetProtoMagic(metadata);
   if (stored_k_proto_magic != kProtoMagic) {
     return absl_ports::InternalError(IcingStringUtil::StringPrintf(
         "Failed to read kProtoMagic, expected %d, actual %d", kProtoMagic,
@@ -838,50 +676,6 @@ libtextclassifier3::StatusOr<int> FileBackedProtoLog<ProtoT>::ReadProtoMetadata(
   return metadata;
 }
 
-template <typename ProtoT>
-libtextclassifier3::Status FileBackedProtoLog<ProtoT>::PersistToDisk() {
-  int64_t file_size = filesystem_->GetFileSize(file_path_.c_str());
-  if (file_size == header_->rewind_offset) {
-    // No changes made, don't need to update the checksum.
-    return libtextclassifier3::Status::OK;
-  }
-
-  int64_t new_content_size = file_size - header_->rewind_offset;
-  Crc32 crc;
-  if (new_content_size < 0) {
-    // File shrunk, recalculate the entire checksum.
-    ICING_ASSIGN_OR_RETURN(
-        crc, ComputeChecksum(filesystem_, file_path_, Crc32(), sizeof(Header),
-                             file_size));
-  } else {
-    // Append new changes to the existing checksum.
-    ICING_ASSIGN_OR_RETURN(
-        crc,
-        ComputeChecksum(filesystem_, file_path_, Crc32(header_->log_checksum),
-                        header_->rewind_offset, file_size));
-  }
-
-  header_->log_checksum = crc.Get();
-  header_->rewind_offset = file_size;
-  header_->header_checksum = header_->CalculateHeaderChecksum();
-
-  if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
-                           sizeof(Header))) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Failed to update header to: ", file_path_));
-  }
-
-  return libtextclassifier3::Status::OK;
-}
-
-template <typename ProtoT>
-libtextclassifier3::StatusOr<Crc32>
-FileBackedProtoLog<ProtoT>::ComputeChecksum() {
-  return FileBackedProtoLog<ProtoT>::ComputeChecksum(
-      filesystem_, file_path_, Crc32(), /*start=*/sizeof(Header),
-      /*end=*/filesystem_->GetFileSize(file_path_.c_str()));
-}
-
 }  // namespace lib
 }  // namespace icing
 
diff --git a/icing/file/file-backed-proto-log_benchmark.cc b/icing/file/file-backed-proto-log_benchmark.cc
deleted file mode 100644
index 26e0fb0..0000000
--- a/icing/file/file-backed-proto-log_benchmark.cc
+++ /dev/null
@@ -1,169 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <cstdint>
-#include <random>
-
-#include "testing/base/public/benchmark.h"
-#include "gmock/gmock.h"
-#include "icing/document-builder.h"
-#include "icing/file/file-backed-proto-log.h"
-#include "icing/file/filesystem.h"
-#include "icing/legacy/core/icing-string-util.h"
-#include "icing/proto/document.pb.h"
-#include "icing/testing/common-matchers.h"
-#include "icing/testing/random-string.h"
-#include "icing/testing/tmp-directory.h"
-
-// go/microbenchmarks
-//
-// To build and run on a local machine:
-//   $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
-//   icing/file:file-backed-proto-log_benchmark
-//
-//   $ blaze-bin/icing/file/file-backed-proto-log_benchmark
-//   --benchmarks=all
-//
-//
-// To build and run on an Android device (must be connected and rooted):
-//   $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
-//   --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
-//   icing/file:file-backed-proto-log_benchmark
-//
-//   $ adb root
-//
-//   $ adb push
-//   blaze-bin/icing/file/file-backed-proto-log_benchmark
-//   /data/local/tmp/
-//
-//   $ adb shell /data/local/tmp/file-backed-proto-log-benchmark
-//   --benchmarks=all
-
-namespace icing {
-namespace lib {
-
-namespace {
-
-static void BM_Write(benchmark::State& state) {
-  const Filesystem filesystem;
-  int string_length = state.range(0);
-  const std::string file_path = IcingStringUtil::StringPrintf(
-      "%s%s%d%s", GetTestTempDir().c_str(), "/proto_", string_length, ".log");
-  int max_proto_size = (1 << 24) - 1;  // 16 MiB
-  bool compress = true;
-
-  // Make sure it doesn't already exist.
-  filesystem.DeleteFile(file_path.c_str());
-
-  auto proto_log =
-      FileBackedProtoLog<DocumentProto>::Create(
-          &filesystem, file_path,
-          FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
-          .ValueOrDie()
-          .proto_log;
-
-  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
-
-  std::default_random_engine random;
-  const std::string rand_str =
-      RandomString(kAlNumAlphabet, string_length, &random);
-
-  auto document_properties = document.add_properties();
-  document_properties->set_name("string property");
-  document_properties->add_string_values(rand_str);
-
-  for (auto _ : state) {
-    testing::DoNotOptimize(proto_log->WriteProto(document));
-  }
-  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
-                          string_length);
-
-  // Cleanup after ourselves
-  filesystem.DeleteFile(file_path.c_str());
-}
-BENCHMARK(BM_Write)
-    ->Arg(1)
-    ->Arg(32)
-    ->Arg(512)
-    ->Arg(1024)
-    ->Arg(4 * 1024)
-    ->Arg(8 * 1024)
-    ->Arg(16 * 1024)
-    ->Arg(32 * 1024)
-    ->Arg(256 * 1024)
-    ->Arg(2 * 1024 * 1024)
-    ->Arg(8 * 1024 * 1024)
-    ->Arg(15 * 1024 * 1024);  // We do 15MiB here since our max proto size is
-                              // 16MiB, and we need some extra space for the
-                              // rest of the document properties
-
-static void BM_Read(benchmark::State& state) {
-  const Filesystem filesystem;
-  int string_length = state.range(0);
-  const std::string file_path = IcingStringUtil::StringPrintf(
-      "%s%s%d%s", GetTestTempDir().c_str(), "/proto_", string_length, ".log");
-  int max_proto_size = (1 << 24) - 1;  // 16 MiB
-  bool compress = true;
-
-  // Make sure it doesn't already exist.
-  filesystem.DeleteFile(file_path.c_str());
-
-  auto proto_log =
-      FileBackedProtoLog<DocumentProto>::Create(
-          &filesystem, file_path,
-          FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
-          .ValueOrDie()
-          .proto_log;
-
-  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
-
-  std::default_random_engine random;
-  const std::string rand_str =
-      RandomString(kAlNumAlphabet, string_length, &random);
-
-  auto document_properties = document.add_properties();
-  document_properties->set_name("string property");
-  document_properties->add_string_values(rand_str);
-
-  ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
-                             proto_log->WriteProto(document));
-
-  for (auto _ : state) {
-    testing::DoNotOptimize(proto_log->ReadProto(write_offset));
-  }
-  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
-                          string_length);
-
-  // Cleanup after ourselves
-  filesystem.DeleteFile(file_path.c_str());
-}
-BENCHMARK(BM_Read)
-    ->Arg(1)
-    ->Arg(32)
-    ->Arg(512)
-    ->Arg(1024)
-    ->Arg(4 * 1024)
-    ->Arg(8 * 1024)
-    ->Arg(16 * 1024)
-    ->Arg(32 * 1024)
-    ->Arg(256 * 1024)
-    ->Arg(2 * 1024 * 1024)
-    ->Arg(8 * 1024 * 1024)
-    ->Arg(15 * 1024 * 1024);  // We do 15MiB here since our max proto size is
-                              // 16MiB, and we need some extra space for the
-                              // rest of the document properties
-
-}  // namespace
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/file/file-backed-proto-log_test.cc b/icing/file/file-backed-proto-log_test.cc
index 3a9060d..eccb0c7 100644
--- a/icing/file/file-backed-proto-log_test.cc
+++ b/icing/file/file-backed-proto-log_test.cc
@@ -19,10 +19,7 @@
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
-#include "icing/file/mock-filesystem.h"
-#include "icing/portable/equals-proto.h"
 #include "icing/proto/document.pb.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/tmp-directory.h"
@@ -32,14 +29,7 @@ namespace lib {
 
 namespace {
 
-using ::icing::lib::portable_equals_proto::EqualsProto;
-using ::testing::A;
-using ::testing::Eq;
-using ::testing::Gt;
-using ::testing::Not;
 using ::testing::NotNull;
-using ::testing::Pair;
-using ::testing::Return;
 
 class FileBackedProtoLogTest : public ::testing::Test {
  protected:
@@ -48,7 +38,10 @@ class FileBackedProtoLogTest : public ::testing::Test {
   // https://stackoverflow.com/a/47368753
   FileBackedProtoLogTest() {}
 
-  void SetUp() override { file_path_ = GetTestTempDir() + "/proto_log"; }
+  void SetUp() override {
+    file_path_ = GetTestTempDir() + "/proto_log";
+    filesystem_.DeleteFile(file_path_.c_str());
+  }
 
   void TearDown() override { filesystem_.DeleteFile(file_path_.c_str()); }
 
@@ -74,7 +67,7 @@ TEST_F(FileBackedProtoLogTest, Initialize) {
           FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                      max_proto_size_)));
   EXPECT_THAT(create_result.proto_log, NotNull());
-  EXPECT_FALSE(create_result.data_loss);
+  EXPECT_FALSE(create_result.has_data_loss());
 
   // Can't recreate the same file with different options.
   ASSERT_THAT(FileBackedProtoLog<DocumentProto>::Create(
@@ -84,193 +77,6 @@ TEST_F(FileBackedProtoLogTest, Initialize) {
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(FileBackedProtoLogTest, WriteProtoTooLarge) {
-  int max_proto_size = 1;
-  ICING_ASSERT_OK_AND_ASSIGN(
-      FileBackedProtoLog<DocumentProto>::CreateResult create_result,
-      FileBackedProtoLog<DocumentProto>::Create(
-          &filesystem_, file_path_,
-          FileBackedProtoLog<DocumentProto>::Options(compress_,
-                                                     max_proto_size)));
-  auto proto_log = std::move(create_result.proto_log);
-  EXPECT_FALSE(create_result.data_loss);
-
-  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
-
-  // Proto is too large for the max_proto_size_in
-  ASSERT_THAT(proto_log->WriteProto(document),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
-TEST_F(FileBackedProtoLogTest, ReadProtoWrongKProtoMagic) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      FileBackedProtoLog<DocumentProto>::CreateResult create_result,
-      FileBackedProtoLog<DocumentProto>::Create(
-          &filesystem_, file_path_,
-          FileBackedProtoLog<DocumentProto>::Options(compress_,
-                                                     max_proto_size_)));
-  auto proto_log = std::move(create_result.proto_log);
-  EXPECT_FALSE(create_result.data_loss);
-
-  // Write a proto
-  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
-
-  ICING_ASSERT_OK_AND_ASSIGN(int64_t file_offset,
-                             proto_log->WriteProto(document));
-
-  // The 4 bytes of metadata that just doesn't have the same kProtoMagic
-  // specified in file-backed-proto-log.h
-  uint32_t wrong_magic = 0x7E000000;
-
-  // Sanity check that we opened the file correctly
-  int fd = filesystem_.OpenForWrite(file_path_.c_str());
-  ASSERT_GT(fd, 0);
-
-  // Write the wrong kProtoMagic in, kProtoMagics are stored at the beginning of
-  // a proto entry.
-  filesystem_.PWrite(fd, file_offset, &wrong_magic, sizeof(wrong_magic));
-
-  ASSERT_THAT(proto_log->ReadProto(file_offset),
-              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-}
-
-TEST_F(FileBackedProtoLogTest, ReadWriteUncompressedProto) {
-  int last_offset;
-  {
-    ICING_ASSERT_OK_AND_ASSIGN(
-        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
-        FileBackedProtoLog<DocumentProto>::Create(
-            &filesystem_, file_path_,
-            FileBackedProtoLog<DocumentProto>::Options(
-                /*compress_in=*/false, max_proto_size_)));
-    auto proto_log = std::move(create_result.proto_log);
-    EXPECT_FALSE(create_result.data_loss);
-
-    // Write the first proto
-    DocumentProto document1 =
-        DocumentBuilder().SetKey("namespace1", "uri1").Build();
-
-    ICING_ASSERT_OK_AND_ASSIGN(int written_position,
-                               proto_log->WriteProto(document1));
-
-    int document1_offset = written_position;
-
-    // Check that what we read is what we wrote
-    ASSERT_THAT(proto_log->ReadProto(written_position),
-                IsOkAndHolds(EqualsProto(document1)));
-
-    // Write a second proto that's close to the max size. Leave some room for
-    // the rest of the proto properties.
-    std::string long_str(max_proto_size_ - 1024, 'a');
-    DocumentProto document2 = DocumentBuilder()
-                                  .SetKey("namespace2", "uri2")
-                                  .AddStringProperty("long_str", long_str)
-                                  .Build();
-
-    ICING_ASSERT_OK_AND_ASSIGN(written_position,
-                               proto_log->WriteProto(document2));
-
-    int document2_offset = written_position;
-    last_offset = written_position;
-    ASSERT_GT(document2_offset, document1_offset);
-
-    // Check the second proto
-    ASSERT_THAT(proto_log->ReadProto(written_position),
-                IsOkAndHolds(EqualsProto(document2)));
-
-    ICING_ASSERT_OK(proto_log->PersistToDisk());
-  }
-
-  {
-    // Make a new proto_log with the same file_path, and make sure we
-    // can still write to the same underlying file.
-    ICING_ASSERT_OK_AND_ASSIGN(
-        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
-        FileBackedProtoLog<DocumentProto>::Create(
-            &filesystem_, file_path_,
-            FileBackedProtoLog<DocumentProto>::Options(
-                /*compress_in=*/false, max_proto_size_)));
-    auto recreated_proto_log = std::move(create_result.proto_log);
-    EXPECT_FALSE(create_result.data_loss);
-
-    // Write a third proto
-    DocumentProto document3 =
-        DocumentBuilder().SetKey("namespace3", "uri3").Build();
-
-    ASSERT_THAT(recreated_proto_log->WriteProto(document3),
-                IsOkAndHolds(Gt(last_offset)));
-  }
-}
-
-TEST_F(FileBackedProtoLogTest, ReadWriteCompressedProto) {
-  int last_offset;
-
-  {
-    ICING_ASSERT_OK_AND_ASSIGN(
-        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
-        FileBackedProtoLog<DocumentProto>::Create(
-            &filesystem_, file_path_,
-            FileBackedProtoLog<DocumentProto>::Options(
-                /*compress_in=*/true, max_proto_size_)));
-    auto proto_log = std::move(create_result.proto_log);
-    EXPECT_FALSE(create_result.data_loss);
-
-    // Write the first proto
-    DocumentProto document1 =
-        DocumentBuilder().SetKey("namespace1", "uri1").Build();
-
-    ICING_ASSERT_OK_AND_ASSIGN(int written_position,
-                               proto_log->WriteProto(document1));
-
-    int document1_offset = written_position;
-
-    // Check that what we read is what we wrote
-    ASSERT_THAT(proto_log->ReadProto(written_position),
-                IsOkAndHolds(EqualsProto(document1)));
-
-    // Write a second proto that's close to the max size. Leave some room for
-    // the rest of the proto properties.
-    std::string long_str(max_proto_size_ - 1024, 'a');
-    DocumentProto document2 = DocumentBuilder()
-                                  .SetKey("namespace2", "uri2")
-                                  .AddStringProperty("long_str", long_str)
-                                  .Build();
-
-    ICING_ASSERT_OK_AND_ASSIGN(written_position,
-                               proto_log->WriteProto(document2));
-
-    int document2_offset = written_position;
-    last_offset = written_position;
-    ASSERT_GT(document2_offset, document1_offset);
-
-    // Check the second proto
-    ASSERT_THAT(proto_log->ReadProto(written_position),
-                IsOkAndHolds(EqualsProto(document2)));
-
-    ICING_ASSERT_OK(proto_log->PersistToDisk());
-  }
-
-  {
-    // Make a new proto_log with the same file_path, and make sure we
-    // can still write to the same underlying file.
-    ICING_ASSERT_OK_AND_ASSIGN(
-        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
-        FileBackedProtoLog<DocumentProto>::Create(
-            &filesystem_, file_path_,
-            FileBackedProtoLog<DocumentProto>::Options(
-                /*compress_in=*/true, max_proto_size_)));
-    auto recreated_proto_log = std::move(create_result.proto_log);
-    EXPECT_FALSE(create_result.data_loss);
-
-    // Write a third proto
-    DocumentProto document3 =
-        DocumentBuilder().SetKey("namespace3", "uri3").Build();
-
-    ASSERT_THAT(recreated_proto_log->WriteProto(document3),
-                IsOkAndHolds(Gt(last_offset)));
-  }
-}
-
 TEST_F(FileBackedProtoLogTest, CorruptHeader) {
   {
     ICING_ASSERT_OK_AND_ASSIGN(
@@ -280,7 +86,7 @@ TEST_F(FileBackedProtoLogTest, CorruptHeader) {
             FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                        max_proto_size_)));
     auto recreated_proto_log = std::move(create_result.proto_log);
-    EXPECT_FALSE(create_result.data_loss);
+    EXPECT_FALSE(create_result.has_data_loss());
 
     int corrupt_offset =
         offsetof(FileBackedProtoLog<DocumentProto>::Header, rewind_offset);
@@ -300,220 +106,6 @@ TEST_F(FileBackedProtoLogTest, CorruptHeader) {
   }
 }
 
-TEST_F(FileBackedProtoLogTest, CorruptContent) {
-  {
-    ICING_ASSERT_OK_AND_ASSIGN(
-        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
-        FileBackedProtoLog<DocumentProto>::Create(
-            &filesystem_, file_path_,
-            FileBackedProtoLog<DocumentProto>::Options(compress_,
-                                                       max_proto_size_)));
-    auto proto_log = std::move(create_result.proto_log);
-    EXPECT_FALSE(create_result.data_loss);
-
-    DocumentProto document =
-        DocumentBuilder().SetKey("namespace1", "uri1").Build();
-
-    // Write and persist an document.
-    ICING_ASSERT_OK_AND_ASSIGN(int document_offset,
-                               proto_log->WriteProto(document));
-    ICING_ASSERT_OK(proto_log->PersistToDisk());
-
-    // "Corrupt" the content written in the log.
-    document.set_uri("invalid");
-    std::string serialized_document = document.SerializeAsString();
-    filesystem_.PWrite(file_path_.c_str(), document_offset,
-                       serialized_document.data(), serialized_document.size());
-  }
-
-  {
-    // We can recover, but we have data loss.
-    ICING_ASSERT_OK_AND_ASSIGN(
-        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
-        FileBackedProtoLog<DocumentProto>::Create(
-            &filesystem_, file_path_,
-            FileBackedProtoLog<DocumentProto>::Options(compress_,
-                                                       max_proto_size_)));
-    auto proto_log = std::move(create_result.proto_log);
-    ASSERT_TRUE(create_result.data_loss);
-
-    // Lost everything in the log since the rewind position doesn't help if
-    // there's been data corruption within the persisted region
-    ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()),
-              sizeof(FileBackedProtoLog<DocumentProto>::Header));
-  }
-}
-
-TEST_F(FileBackedProtoLogTest, PersistToDisk) {
-  DocumentProto document1 =
-      DocumentBuilder().SetKey("namespace1", "uri1").Build();
-  DocumentProto document2 =
-      DocumentBuilder().SetKey("namespace2", "uri2").Build();
-  int document1_offset, document2_offset;
-  int log_size;
-
-  {
-    ICING_ASSERT_OK_AND_ASSIGN(
-        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
-        FileBackedProtoLog<DocumentProto>::Create(
-            &filesystem_, file_path_,
-            FileBackedProtoLog<DocumentProto>::Options(compress_,
-                                                       max_proto_size_)));
-    auto proto_log = std::move(create_result.proto_log);
-    EXPECT_FALSE(create_result.data_loss);
-
-    // Write and persist the first proto
-    ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
-                               proto_log->WriteProto(document1));
-    ICING_ASSERT_OK(proto_log->PersistToDisk());
-
-    // Write, but don't explicitly persist the second proto
-    ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
-                               proto_log->WriteProto(document2));
-
-    // Check that what we read is what we wrote
-    ASSERT_THAT(proto_log->ReadProto(document1_offset),
-                IsOkAndHolds(EqualsProto(document1)));
-    ASSERT_THAT(proto_log->ReadProto(document2_offset),
-                IsOkAndHolds(EqualsProto(document2)));
-
-    log_size = filesystem_.GetFileSize(file_path_.c_str());
-    ASSERT_GT(log_size, 0);
-  }
-
-  {
-    // The header rewind position and checksum aren't updated in this "system
-    // crash" scenario.
-
-    std::string bad_proto =
-        "some incomplete proto that we didn't finish writing before the system "
-        "crashed";
-    filesystem_.PWrite(file_path_.c_str(), log_size, bad_proto.data(),
-                       bad_proto.size());
-
-    // Double check that we actually wrote something to the underlying file
-    ASSERT_GT(filesystem_.GetFileSize(file_path_.c_str()), log_size);
-  }
-
-  {
-    // We can recover, but we have data loss
-    ICING_ASSERT_OK_AND_ASSIGN(
-        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
-        FileBackedProtoLog<DocumentProto>::Create(
-            &filesystem_, file_path_,
-            FileBackedProtoLog<DocumentProto>::Options(compress_,
-                                                       max_proto_size_)));
-    auto proto_log = std::move(create_result.proto_log);
-    ASSERT_TRUE(create_result.data_loss);
-
-    // Check that everything was persisted across instances
-    ASSERT_THAT(proto_log->ReadProto(document1_offset),
-                IsOkAndHolds(EqualsProto(document1)));
-    ASSERT_THAT(proto_log->ReadProto(document2_offset),
-                IsOkAndHolds(EqualsProto(document2)));
-
-    // We correctly rewound to the last good state.
-    ASSERT_EQ(log_size, filesystem_.GetFileSize(file_path_.c_str()));
-  }
-}
-
-TEST_F(FileBackedProtoLogTest, Iterator) {
-  DocumentProto document1 =
-      DocumentBuilder().SetKey("namespace", "uri1").Build();
-  DocumentProto document2 =
-      DocumentBuilder().SetKey("namespace", "uri2").Build();
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      FileBackedProtoLog<DocumentProto>::CreateResult create_result,
-      FileBackedProtoLog<DocumentProto>::Create(
-          &filesystem_, file_path_,
-          FileBackedProtoLog<DocumentProto>::Options(compress_,
-                                                     max_proto_size_)));
-  auto proto_log = std::move(create_result.proto_log);
-  EXPECT_FALSE(create_result.data_loss);
-
-  {
-    // Empty iterator
-    auto iterator = proto_log->GetIterator();
-    ASSERT_THAT(iterator.Advance(),
-                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
-  }
-
-  {
-    // Iterates through some documents
-    ICING_ASSERT_OK(proto_log->WriteProto(document1));
-    ICING_ASSERT_OK(proto_log->WriteProto(document2));
-    auto iterator = proto_log->GetIterator();
-    // 1st proto
-    ICING_ASSERT_OK(iterator.Advance());
-    ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
-                IsOkAndHolds(EqualsProto(document1)));
-    // 2nd proto
-    ICING_ASSERT_OK(iterator.Advance());
-    ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
-                IsOkAndHolds(EqualsProto(document2)));
-    // Tries to advance
-    ASSERT_THAT(iterator.Advance(),
-                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
-  }
-
-  {
-    // Iterator with bad filesystem
-    MockFilesystem mock_filesystem;
-    ON_CALL(mock_filesystem, GetFileSize(A<const char *>()))
-        .WillByDefault(Return(Filesystem::kBadFileSize));
-    FileBackedProtoLog<DocumentProto>::Iterator bad_iterator(
-        mock_filesystem, file_path_, /*initial_offset=*/0);
-    ASSERT_THAT(bad_iterator.Advance(),
-                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
-  }
-}
-
-TEST_F(FileBackedProtoLogTest, ComputeChecksum) {
-  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
-  Crc32 checksum;
-
-  {
-    ICING_ASSERT_OK_AND_ASSIGN(
-        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
-        FileBackedProtoLog<DocumentProto>::Create(
-            &filesystem_, file_path_,
-            FileBackedProtoLog<DocumentProto>::Options(compress_,
-                                                       max_proto_size_)));
-    auto proto_log = std::move(create_result.proto_log);
-    EXPECT_FALSE(create_result.data_loss);
-
-    ICING_EXPECT_OK(proto_log->WriteProto(document));
-
-    ICING_ASSERT_OK_AND_ASSIGN(checksum, proto_log->ComputeChecksum());
-
-    // Calling it twice with no changes should get us the same checksum
-    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
-  }
-
-  {
-    ICING_ASSERT_OK_AND_ASSIGN(
-        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
-        FileBackedProtoLog<DocumentProto>::Create(
-            &filesystem_, file_path_,
-            FileBackedProtoLog<DocumentProto>::Options(compress_,
-                                                       max_proto_size_)));
-    auto proto_log = std::move(create_result.proto_log);
-    EXPECT_FALSE(create_result.data_loss);
-
-    // Checksum should be consistent across instances
-    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
-
-    // PersistToDisk shouldn't affect the checksum value
-    ICING_EXPECT_OK(proto_log->PersistToDisk());
-    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
-
-    // Check that modifying the log leads to a different checksum
-    ICING_EXPECT_OK(proto_log->WriteProto(document));
-    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Not(Eq(checksum))));
-  }
-}
-
 }  // namespace
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/file/file-backed-proto.h b/icing/file/file-backed-proto.h
index aede8de..8c5743b 100644
--- a/icing/file/file-backed-proto.h
+++ b/icing/file/file-backed-proto.h
@@ -22,6 +22,7 @@
 #ifndef ICING_FILE_FILE_BACKED_PROTO_H_
 #define ICING_FILE_FILE_BACKED_PROTO_H_
 
+#include <algorithm>
 #include <cstdint>
 #include <memory>
 #include <string>
@@ -37,6 +38,7 @@
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/util/crc32.h"
 #include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
 
 namespace icing {
 namespace lib {
@@ -63,6 +65,24 @@ class FileBackedProto {
   // file_path : Must be a path within in a directory that already exists.
   FileBackedProto(const Filesystem& filesystem, std::string_view file_path);
 
+  // Reset the internal file_path for the file backed proto.
+  // Example use:
+  //   auto file_backed_proto1 = *FileBackedProto<Proto>::Create(...);
+  //   auto file_backed_google::protobuf = *FileBackedProto<Proto>::Create(...);
+  //   filesystem.SwapFiles(file1, file2);
+  //   file_backed_proto1.SetSwappedFilepath(file2);
+  //   file_backed_google::protobuf.SetSwappedFilepath(file1);
+  void SetSwappedFilepath(std::string_view swapped_to_file_path) {
+    file_path_ = swapped_to_file_path;
+  }
+
+  // Computes the checksum of the proto stored in this file and returns it.
+  // RETURNS:
+  //   - the checksum of the proto or 0 if the file is empty/non-existent
+  //   - INTERNAL_ERROR if an IO error or a corruption was encountered.
+  libtextclassifier3::StatusOr<Crc32> ComputeChecksum() const
+      ICING_LOCKS_EXCLUDED(mutex_);
+
   // Returns a reference to the proto read from the file. It
   // internally caches the read proto so that future calls are fast.
   //
@@ -83,7 +103,7 @@ class FileBackedProto {
   //
   // TODO(cassiewang) The implementation today loses old data if Write() fails.
   // We should write to a tmp file first and rename the file to fix this.
-  // TODO(samzheng) Change to Write(ProtoT&& proto)
+  // TODO(cassiewang) Change to Write(ProtoT&& proto)
   libtextclassifier3::Status Write(std::unique_ptr<ProtoT> proto)
       ICING_LOCKS_EXCLUDED(mutex_);
 
@@ -92,6 +112,11 @@ class FileBackedProto {
   FileBackedProto& operator=(const FileBackedProto&) = delete;
 
  private:
+  // Internal method to handle reading the proto from disk.
+  // Requires the caller to hold an exclusive lock on mutex_.
+  libtextclassifier3::StatusOr<const ProtoT*> ReadInternal() const
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
   // Upper bound of file-size that is supported.
   static constexpr int32_t kMaxFileSize = 1 * 1024 * 1024;  // 1 MiB.
 
@@ -99,9 +124,11 @@ class FileBackedProto {
   mutable absl_ports::shared_mutex mutex_;
 
   const Filesystem* const filesystem_;
-  const std::string file_path_;
+  std::string file_path_;
 
   mutable std::unique_ptr<ProtoT> cached_proto_ ICING_GUARDED_BY(mutex_);
+
+  mutable std::unique_ptr<Header> cached_header_ ICING_GUARDED_BY(mutex_);
 };
 
 template <typename ProtoT>
@@ -113,12 +140,35 @@ FileBackedProto<ProtoT>::FileBackedProto(const Filesystem& filesystem,
     : filesystem_(&filesystem), file_path_(file_path) {}
 
 template <typename ProtoT>
+libtextclassifier3::StatusOr<Crc32> FileBackedProto<ProtoT>::ComputeChecksum()
+    const {
+  absl_ports::unique_lock l(&mutex_);
+  if (cached_proto_ == nullptr) {
+    auto read_status = ReadInternal();
+    if (!read_status.ok()) {
+      if (absl_ports::IsNotFound(read_status.status())) {
+        // File doesn't exist. So simply return 0.
+        return Crc32();
+      }
+      return read_status.status();
+    }
+  }
+  return Crc32(cached_header_->proto_checksum);
+}
+
+template <typename ProtoT>
 libtextclassifier3::StatusOr<const ProtoT*> FileBackedProto<ProtoT>::Read()
     const {
   ICING_VLOG(1) << "Reading proto from file: " << file_path_;
 
   absl_ports::unique_lock l(&mutex_);
 
+  return ReadInternal();
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<const ProtoT*>
+FileBackedProto<ProtoT>::ReadInternal() const {
   // Return cached proto if we've already read from disk.
   if (cached_proto_ != nullptr) {
     ICING_VLOG(1) << "Reusing cached proto for file: " << file_path_;
@@ -146,8 +196,7 @@ libtextclassifier3::StatusOr<const ProtoT*> FileBackedProto<ProtoT>::Read()
                 << " of size: " << file_size;
 
   Header header;
-  if (!filesystem_->PRead(fd.get(), &header, sizeof(Header),
-                          /*offset=*/0)) {
+  if (!filesystem_->PRead(fd.get(), &header, sizeof(Header), /*offset=*/0)) {
     return absl_ports::InternalError(
         absl_ports::StrCat("Unable to read header of: ", file_path_));
   }
@@ -182,6 +231,7 @@ libtextclassifier3::StatusOr<const ProtoT*> FileBackedProto<ProtoT>::Read()
 
   ICING_VLOG(1) << "Successfully read proto from file: " << file_path_;
   cached_proto_ = std::move(proto);
+  cached_header_ = std::make_unique<Header>(std::move(header));
   return cached_proto_.get();
 }
 
@@ -242,6 +292,7 @@ libtextclassifier3::Status FileBackedProto<ProtoT>::Write(
 
   ICING_VLOG(1) << "Successfully wrote proto to file: " << file_path_;
   cached_proto_ = std::move(new_proto);
+  cached_header_ = std::make_unique<Header>(std::move(header));
   return libtextclassifier3::Status::OK;
 }
 
diff --git a/icing/file/file-backed-proto_test.cc b/icing/file/file-backed-proto_test.cc
index 7f994fb..009af52 100644
--- a/icing/file/file-backed-proto_test.cc
+++ b/icing/file/file-backed-proto_test.cc
@@ -45,7 +45,7 @@ TEST_F(FileBackedProtoTest, SimpleReadWriteTest) {
       DocumentBuilder().SetKey("namespace", "google.com").Build();
 
   FileBackedProto<DocumentProto> file_proto(filesystem_, filename_);
-  ICING_ASSERT_OK(file_proto.Write(absl::make_unique<DocumentProto>(document)));
+  ICING_ASSERT_OK(file_proto.Write(std::make_unique<DocumentProto>(document)));
   EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(document))));
   // Multiple reads work.
   EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(document))));
@@ -61,7 +61,7 @@ TEST_F(FileBackedProtoTest, DataPersistsAcrossMultipleInstancesTest) {
     EXPECT_THAT(file_proto.Read(), Not(IsOk()));  // Nothing to read.
 
     ICING_ASSERT_OK(
-        file_proto.Write(absl::make_unique<DocumentProto>(document)));
+        file_proto.Write(std::make_unique<DocumentProto>(document)));
     EXPECT_THAT(file_proto.Read(),
                 IsOkAndHolds(Pointee(EqualsProto(document))));
   }
@@ -84,12 +84,12 @@ TEST_F(FileBackedProtoTest, MultipleUpdatesToProtoTest) {
   {
     FileBackedProto<DocumentProto> file_proto(filesystem_, filename_);
     ICING_ASSERT_OK(
-        file_proto.Write(absl::make_unique<DocumentProto>(googleProto)));
+        file_proto.Write(std::make_unique<DocumentProto>(googleProto)));
     EXPECT_THAT(file_proto.Read(),
                 IsOkAndHolds(Pointee(EqualsProto(googleProto))));
 
     ICING_ASSERT_OK(
-        file_proto.Write(absl::make_unique<DocumentProto>(youtubeProto)));
+        file_proto.Write(std::make_unique<DocumentProto>(youtubeProto)));
     EXPECT_THAT(file_proto.Read(),
                 IsOkAndHolds(Pointee(EqualsProto(youtubeProto))));
   }
@@ -100,12 +100,12 @@ TEST_F(FileBackedProtoTest, MultipleUpdatesToProtoTest) {
                 IsOkAndHolds(Pointee(EqualsProto(youtubeProto))));
 
     ICING_ASSERT_OK(
-        file_proto.Write(absl::make_unique<DocumentProto>(wazeProto)));
+        file_proto.Write(std::make_unique<DocumentProto>(wazeProto)));
     EXPECT_THAT(file_proto.Read(),
                 IsOkAndHolds(Pointee(EqualsProto(wazeProto))));
 
     ICING_ASSERT_OK(
-        file_proto.Write(absl::make_unique<DocumentProto>(googleProto)));
+        file_proto.Write(std::make_unique<DocumentProto>(googleProto)));
     EXPECT_THAT(file_proto.Read(),
                 IsOkAndHolds(Pointee(EqualsProto(googleProto))));
   }
@@ -117,7 +117,7 @@ TEST_F(FileBackedProtoTest, InvalidFilenameTest) {
 
   FileBackedProto<DocumentProto> file_proto(filesystem_, "");
   EXPECT_THAT(file_proto.Read(), Not(IsOk()));
-  EXPECT_THAT(file_proto.Write(absl::make_unique<DocumentProto>(document)),
+  EXPECT_THAT(file_proto.Write(std::make_unique<DocumentProto>(document)),
               Not(IsOk()));
 }
 
@@ -128,7 +128,7 @@ TEST_F(FileBackedProtoTest, FileCorruptionTest) {
   {
     FileBackedProto<DocumentProto> file_proto(filesystem_, filename_);
     ICING_ASSERT_OK(
-        file_proto.Write(absl::make_unique<DocumentProto>(document)));
+        file_proto.Write(std::make_unique<DocumentProto>(document)));
     EXPECT_THAT(file_proto.Read(),
                 IsOkAndHolds(Pointee(EqualsProto(document))));
   }
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 27d03b2..7408e8b 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -56,10 +56,15 @@
 #ifndef ICING_FILE_FILE_BACKED_VECTOR_H_
 #define ICING_FILE_FILE_BACKED_VECTOR_H_
 
-#include <stdint.h>
 #include <sys/mman.h>
+#include <unistd.h>
 
+#include <algorithm>
+#include <cinttypes>
 #include <cstdint>
+#include <cstring>
+#include <functional>
+#include <limits>
 #include <memory>
 #include <string>
 #include <utility>
@@ -72,6 +77,7 @@
 #include "icing/file/filesystem.h"
 #include "icing/file/memory-mapped-file.h"
 #include "icing/legacy/core/icing-string-util.h"
+#include "icing/portable/platform.h"
 #include "icing/util/crc32.h"
 #include "icing/util/logging.h"
 #include "icing/util/math-util.h"
@@ -83,6 +89,9 @@ namespace lib {
 template <typename T>
 class FileBackedVector {
  public:
+  class MutableArrayView;
+  class MutableView;
+
   // Header stored at the beginning of the file before the rest of the vector
   // elements. Stores metadata on the vector.
   struct Header {
@@ -133,15 +142,38 @@ class FileBackedVector {
                         kHeaderChecksumOffset,
                     "");
 
-      Crc32 crc;
-      std::string_view header_str(
-          reinterpret_cast<const char*>(this),
-          offsetof(FileBackedVector::Header, header_checksum));
-      crc.Append(header_str);
-      return crc.Get();
+      return Crc32(std::string_view(
+                       reinterpret_cast<const char*>(this),
+                       offsetof(FileBackedVector::Header, header_checksum)))
+          .Get();
     }
   };
 
+  // Absolute max file size for FileBackedVector.
+  // - We memory map the whole file, so file size ~= memory size.
+  // - On 32-bit platform, the virtual memory address space is 4GB. To avoid
+  //   exhausting the memory, set smaller file size limit for 32-bit platform.
+#ifdef ICING_ARCH_BIT_64
+  static constexpr int32_t kMaxFileSize =
+      std::numeric_limits<int32_t>::max();  // 2^31-1 Bytes, ~2.1 GB
+#else
+  static constexpr int32_t kMaxFileSize =
+      (1 << 28) + Header::kHeaderSize;  // 2^28 + 12 Bytes, ~256 MiB
+#endif
+
+  // Size of element type T. The value is same as sizeof(T), while we should
+  // avoid using sizeof(T) in our codebase to prevent unexpected unsigned
+  // integer casting.
+  static constexpr int32_t kElementTypeSize = static_cast<int32_t>(sizeof(T));
+  static_assert(sizeof(T) <= (1 << 10));
+
+  // Absolute max # of elements allowed. Since we are using int32_t to store
+  // num_elements, max value is 2^31-1. Still the actual max # of elements are
+  // determined by max_file_size, kMaxFileSize, kElementTypeSize, and
+  // Header::kHeaderSize.
+  static constexpr int32_t kMaxNumElements =
+      std::numeric_limits<int32_t>::max();
+
   // Creates a new FileBackedVector to read/write content to.
   //
   // filesystem: Object to make system level calls
@@ -149,9 +181,44 @@ class FileBackedVector {
   //             within a directory that already exists.
   // mmap_strategy : Strategy/optimizations to access the content in the vector,
   //                 see MemoryMappedFile::Strategy for more details
+  // max_file_size: Maximum file size for FileBackedVector, default
+  //                kMaxFileSize. Note that this value won't be written into the
+  //                header, so maximum file size will always be specified in
+  //                runtime and the caller should make sure the value is correct
+  //                and reasonable. Also it will be cached in MemoryMappedFile
+  //                member, so we can always call mmapped_file_->max_file_size()
+  //                to get it.
+  //                The range should be in
+  //                [Header::kHeaderSize + kElementTypeSize, kMaxFileSize], and
+  //                (max_file_size - Header::kHeaderSize) / kElementTypeSize is
+  //                max # of elements that can be stored.
+  // pre_mapping_mmap_size: pre-mapping size of MemoryMappedFile, default 0.
+  //                        Pre-mapping a large memory region to the file and
+  //                        grow the underlying file later, so we can avoid
+  //                        remapping too frequently and reduce the cost of
+  //                        system call and memory paging after remap. The user
+  //                        should specify reasonable size to save remapping
+  //                        cost and avoid exhausting the memory at once in the
+  //                        beginning.
+  //                        Note: if the file exists and pre_mapping_mmap_size
+  //                        is smaller than file_size - Header::kHeaderSize,
+  //                        then it still pre-maps file_size -
+  //                        Header::kHeaderSize to make all existing elements
+  //                        available.
+  // TODO(b/247671531): figure out pre_mapping_mmap_size for each
+  //                    FileBackedVector use case.
+  //
+  // Return:
+  //   FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+  //                             checksum.
+  //   INTERNAL_ERROR on I/O errors.
+  //   INVALID_ARGUMENT_ERROR if max_file_size is incorrect.
+  //   UNIMPLEMENTED_ERROR if created with strategy READ_WRITE_MANUAL_SYNC.
   static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
   Create(const Filesystem& filesystem, const std::string& file_path,
-         MemoryMappedFile::Strategy mmap_strategy);
+         MemoryMappedFile::Strategy mmap_strategy,
+         int32_t max_file_size = kMaxFileSize,
+         int32_t pre_mapping_mmap_size = 0);
 
   // Deletes the FileBackedVector
   //
@@ -169,23 +236,144 @@ class FileBackedVector {
   // synced by the system and the checksum will be updated.
   ~FileBackedVector();
 
-  // Accesses the element at idx.
+  // Gets a copy of the element at idx.
+  //
+  // This is useful if you think the FileBackedVector may grow before you need
+  // to access this return value. When the FileBackedVector grows, the
+  // underlying mmap will be unmapped and remapped, which will invalidate any
+  // pointers to the previously mapped region. Getting a copy will avoid
+  // referencing the now-invalidated region.
   //
   // Returns:
-  //   OUT_OF_RANGE_ERROR if idx < 0 or > num_elements()
+  //   OUT_OF_RANGE_ERROR if idx < 0 or idx >= num_elements()
+  libtextclassifier3::StatusOr<T> GetCopy(int32_t idx) const;
+
+  // Gets an immutable pointer to the element at idx.
+  //
+  // WARNING: Subsequent calls to Set/Append/Allocate may invalidate the pointer
+  // returned by Get.
+  //
+  // This is useful if you do not think the FileBackedVector will grow before
+  // you need to reference this value, and you want to avoid a copy. When the
+  // FileBackedVector grows, the underlying mmap will be unmapped and remapped,
+  // which will invalidate this pointer to the previously mapped region.
+  //
+  // Returns:
+  //   OUT_OF_RANGE_ERROR if idx < 0 or idx >= num_elements()
   libtextclassifier3::StatusOr<const T*> Get(int32_t idx) const;
 
+  // Gets a MutableView to the element at idx.
+  //
+  // WARNING: Subsequent calls to Set/Append/Allocate may invalidate the
+  // reference returned by MutableView::Get().
+  //
+  // This is useful if you do not think the FileBackedVector will grow before
+  // you need to reference this value, and you want to mutate the underlying
+  // data directly. When the FileBackedVector grows, the underlying mmap will be
+  // unmapped and remapped, which will invalidate this MutableView to the
+  // previously mapped region.
+  //
+  // Returns:
+  //   OUT_OF_RANGE_ERROR if idx < 0 or idx >= num_elements()
+  libtextclassifier3::StatusOr<MutableView> GetMutable(int32_t idx);
+
+  // Gets a MutableArrayView to the elements at range [idx, idx + len).
+  //
+  // WARNING: Subsequent calls to Set/Append/Allocate may invalidate the
+  // reference/pointer returned by MutableArrayView::operator[]/data().
+  //
+  // This is useful if you do not think the FileBackedVector will grow before
+  // you need to reference this value, and you want to mutate the underlying
+  // data directly. When the FileBackedVector grows, the underlying mmap will be
+  // unmapped and remapped, which will invalidate this MutableArrayView to the
+  // previously mapped region.
+  //
+  // Returns:
+  //   OUT_OF_RANGE_ERROR if idx < 0 or idx + len > num_elements()
+  libtextclassifier3::StatusOr<MutableArrayView> GetMutable(int32_t idx,
+                                                            int32_t len);
+
   // Writes the value at idx.
   //
+  // May grow the underlying file and mmapped region as needed to fit the new
+  // value. If it does grow, then any pointers/references to previous values
+  // returned from Get/GetMutable/Allocate may be invalidated.
+  //
   // Returns:
-  //   OUT_OF_RANGE_ERROR if idx < 0 or file cannot be grown idx size
+  //   OUT_OF_RANGE_ERROR if idx < 0 or idx > kMaxIndex or file cannot be grown
+  //                      to fit idx + 1 elements
   libtextclassifier3::Status Set(int32_t idx, const T& value);
 
-  // Resizes to first len elements. The crc is not updated on truncation.
+  // Set [idx, idx + len) to a single value.
+  //
+  // May grow the underlying file and mmapped region as needed to fit the new
+  // value. If it does grow, then any pointers/references to previous values
+  // returned from Get/GetMutable/Allocate may be invalidated.
+  //
+  // Returns:
+  //   OUT_OF_RANGE_ERROR if idx < 0 or idx + len > kMaxNumElements or file
+  //                      cannot be grown to fit idx + len elements
+  libtextclassifier3::Status Set(int32_t idx, int32_t len, const T& value);
+
+  // Appends the value to the end of the vector.
+  //
+  // May grow the underlying file and mmapped region as needed to fit the new
+  // value. If it does grow, then any pointers/references to previous values
+  // returned from Get/GetMutable/Allocate may be invalidated.
+  //
+  // Returns:
+  //   OUT_OF_RANGE_ERROR if file cannot be grown (i.e. reach
+  //                      mmapped_file_->max_file_size())
+  libtextclassifier3::Status Append(const T& value) {
+    return Set(header_->num_elements, value);
+  }
+
+  // Allocates spaces with given length in the end of the vector and returns a
+  // MutableArrayView to the space.
+  //
+  // May grow the underlying file and mmapped region as needed to fit the new
+  // value. If it does grow, then any pointers/references to previous values
+  // returned from Get/GetMutable/Allocate may be invalidated.
+  //
+  // WARNING: Subsequent calls to Set/Append/Allocate may invalidate the
+  // reference/pointer returned by MutableArrayView::operator[]/data().
+  //
+  // This is useful if you do not think the FileBackedVector will grow before
+  // you need to reference this value, and you want to allocate adjacent spaces
+  // for multiple elements and mutate the underlying data directly. When the
+  // FileBackedVector grows, the underlying mmap will be unmapped and remapped,
+  // which will invalidate this MutableArrayView to the previously mapped
+  // region.
   //
   // Returns:
-  //   OUT_OF_RANGE_ERROR if len < 0 or >= num_elements()
-  libtextclassifier3::Status TruncateTo(int32_t len);
+  //   OUT_OF_RANGE_ERROR if len <= 0 or file cannot be grown (i.e. reach
+  //                      mmapped_file_->max_file_size())
+  libtextclassifier3::StatusOr<MutableArrayView> Allocate(int32_t len);
+
+  // Resizes to first len elements. The crc is cleared on truncation and will be
+  // updated on destruction, or once the client calls ComputeChecksum() or
+  // PersistToDisk().
+  //
+  // Returns:
+  //   OUT_OF_RANGE_ERROR if len < 0 or len >= num_elements()
+  libtextclassifier3::Status TruncateTo(int32_t new_num_elements);
+
+  // Sorts the vector within range [begin_idx, end_idx).
+  // It handles SetDirty properly for the file-backed-vector.
+  //
+  // Returns:
+  //   OUT_OF_RANGE_ERROR if (0 <= begin_idx < end_idx <= num_elements()) does
+  //                      not hold
+  libtextclassifier3::Status Sort(int32_t begin_idx, int32_t end_idx);
+
+  // Mark idx as changed iff idx < changes_end_, so later ComputeChecksum() can
+  // update checksum by the cached changes without going over [0, changes_end_).
+  //
+  // If the buffer size exceeds kPartialCrcLimitDiv, then clear all change
+  // buffers and set changes_end_ as 0, indicating that the checksum should be
+  // recomputed from idx 0 (starting from the beginning). Otherwise cache the
+  // change.
+  void SetDirty(int32_t idx);
 
   // Flushes content to underlying file.
   //
@@ -211,22 +399,78 @@ class FileBackedVector {
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
 
+  // Updates checksum of the vector contents and returns it.
+  //
+  // Returns:
+  //   INTERNAL_ERROR if the vector's internal state is inconsistent
+  libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
+
   // Accessors.
   const T* array() const {
     return reinterpret_cast<const T*>(mmapped_file_->region());
   }
 
-  T* mutable_array() const {
-    return reinterpret_cast<T*>(mmapped_file_->mutable_region());
-  }
-
   int32_t num_elements() const { return header_->num_elements; }
 
-  // Updates checksum of the vector contents and returns it.
-  //
-  // Returns:
-  //   INTERNAL_ERROR if the vector's internal state is inconsistent
-  libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
+ public:
+  class MutableArrayView {
+   public:
+    const T& operator[](int32_t idx) const { return data_[idx]; }
+    T& operator[](int32_t idx) {
+      SetDirty(idx);
+      return data_[idx];
+    }
+
+    const T* data() const { return data_; }
+
+    int32_t size() const { return len_; }
+
+    // Set the mutable array slice (starting at idx) by the given element array.
+    // It handles SetDirty properly for the file-backed-vector when modifying
+    // elements.
+    //
+    // REQUIRES: arr is valid && arr_len >= 0 && idx >= 0 && idx + arr_len <=
+    //           size(), otherwise the behavior is undefined.
+    void SetArray(int32_t idx, const T* arr, int32_t arr_len) {
+      for (int32_t i = 0; i < arr_len; ++i) {
+        SetDirty(idx + i);
+        data_[idx + i] = arr[i];
+      }
+    }
+
+   private:
+    MutableArrayView(FileBackedVector<T>* vector, T* data, int32_t len)
+        : vector_(vector),
+          data_(data),
+          original_idx_(data - vector->array()),
+          len_(len) {}
+
+    void SetDirty(int32_t idx) { vector_->SetDirty(original_idx_ + idx); }
+
+    // Does not own. For SetDirty only.
+    FileBackedVector<T>* vector_;
+
+    // data_ points at vector_->mutable_array()[original_idx_]
+    T* data_;
+    int32_t original_idx_;
+    int32_t len_;
+
+    friend class FileBackedVector;
+  };
+
+  class MutableView {
+   public:
+    const T& Get() const { return mutable_array_view_[0]; }
+    T& Get() { return mutable_array_view_[0]; }
+
+   private:
+    MutableView(FileBackedVector<T>* vector, T* data)
+        : mutable_array_view_(vector, data, 1) {}
+
+    MutableArrayView mutable_array_view_;
+
+    friend class FileBackedVector;
+  };
 
  private:
   // We track partial updates to the array for crc updating. This
@@ -239,24 +483,27 @@ class FileBackedVector {
   // Grow file by at least this many elements if array is growable.
   static constexpr int64_t kGrowElements = 1u << 14;  // 16K
 
-  // Max number of elements that can be held by the vector.
-  static constexpr int64_t kMaxNumElements = 1u << 20;  // 1M
+  // Absolute max index allowed.
+  static constexpr int32_t kMaxIndex = kMaxNumElements - 1;
 
   // Can only be created through the factory ::Create function
-  FileBackedVector(const Filesystem& filesystem, const std::string& file_path,
-                   std::unique_ptr<Header> header,
-                   std::unique_ptr<MemoryMappedFile> mmapped_file);
+  explicit FileBackedVector(const Filesystem& filesystem,
+                            const std::string& file_path,
+                            std::unique_ptr<Header> header,
+                            MemoryMappedFile&& mmapped_file);
 
   // Initialize a new FileBackedVector, and create the file.
   static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
   InitializeNewFile(const Filesystem& filesystem, const std::string& file_path,
-                    ScopedFd fd, MemoryMappedFile::Strategy mmap_strategy);
+                    ScopedFd fd, MemoryMappedFile::Strategy mmap_strategy,
+                    int32_t max_file_size, int32_t pre_mapping_mmap_size);
 
   // Initialize a FileBackedVector from an existing file.
   static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
   InitializeExistingFile(const Filesystem& filesystem,
                          const std::string& file_path, ScopedFd fd,
-                         MemoryMappedFile::Strategy mmap_strategy);
+                         MemoryMappedFile::Strategy mmap_strategy,
+                         int32_t max_file_size, int32_t pre_mapping_mmap_size);
 
   // Grows the underlying file to hold at least num_elements
   //
@@ -264,6 +511,10 @@ class FileBackedVector {
   //   OUT_OF_RANGE_ERROR if we can't grow to the specified size
   libtextclassifier3::Status GrowIfNecessary(int32_t num_elements);
 
+  T* mutable_array() const {
+    return reinterpret_cast<T*>(mmapped_file_->mutable_region());
+  }
+
   // Cached constructor params.
   const Filesystem* const filesystem_;
   const std::string file_path_;
@@ -281,26 +532,33 @@ class FileBackedVector {
   // Buffer of the original elements that have been changed since the last crc
   // update. Will be cleared if the size grows too big.
   std::string saved_original_buffer_;
-
-  // Keep track of all pages we touched so we can write them back to
-  // disk.
-  std::vector<bool> dirty_pages_;
 };
 
 template <typename T>
+constexpr int32_t FileBackedVector<T>::kMaxFileSize;
+
+template <typename T>
+constexpr int32_t FileBackedVector<T>::kElementTypeSize;
+
+template <typename T>
+constexpr int32_t FileBackedVector<T>::kMaxNumElements;
+
+template <typename T>
 constexpr int32_t FileBackedVector<T>::kPartialCrcLimitDiv;
 
 template <typename T>
 constexpr int64_t FileBackedVector<T>::kGrowElements;
 
 template <typename T>
-constexpr int64_t FileBackedVector<T>::kMaxNumElements;
+constexpr int32_t FileBackedVector<T>::kMaxIndex;
 
 template <typename T>
 libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
 FileBackedVector<T>::Create(const Filesystem& filesystem,
                             const std::string& file_path,
-                            MemoryMappedFile::Strategy mmap_strategy) {
+                            MemoryMappedFile::Strategy mmap_strategy,
+                            int32_t max_file_size,
+                            int32_t pre_mapping_mmap_size) {
   if (mmap_strategy == MemoryMappedFile::Strategy::READ_WRITE_MANUAL_SYNC) {
     // FileBackedVector's behavior of growing the file underneath the mmap is
     // inherently broken with MAP_PRIVATE. Growing the vector requires extending
@@ -313,6 +571,14 @@ FileBackedVector<T>::Create(const Filesystem& filesystem,
         "mmap strategy.");
   }
 
+  if (max_file_size < Header::kHeaderSize + kElementTypeSize ||
+      max_file_size > kMaxFileSize) {
+    // FileBackedVector should be able to store at least 1 element, so
+    // max_file_size should be at least Header::kHeaderSize + kElementTypeSize.
+    return absl_ports::InvalidArgumentError(
+        "Invalid max file size for FileBackedVector");
+  }
+
   ScopedFd fd(filesystem.OpenForWrite(file_path.c_str()));
   if (!fd.is_valid()) {
     return absl_ports::InternalError(
@@ -325,41 +591,56 @@ FileBackedVector<T>::Create(const Filesystem& filesystem,
         absl_ports::StrCat("Bad file size for file ", file_path));
   }
 
+  if (max_file_size < file_size) {
+    return absl_ports::InvalidArgumentError(
+        "Max file size should not be smaller than the existing file size");
+  }
+
   const bool new_file = file_size == 0;
   if (new_file) {
     return InitializeNewFile(filesystem, file_path, std::move(fd),
-                             mmap_strategy);
+                             mmap_strategy, max_file_size,
+                             pre_mapping_mmap_size);
   }
   return InitializeExistingFile(filesystem, file_path, std::move(fd),
-                                mmap_strategy);
+                                mmap_strategy, max_file_size,
+                                pre_mapping_mmap_size);
 }
 
 template <typename T>
 libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
-FileBackedVector<T>::InitializeNewFile(
-    const Filesystem& filesystem, const std::string& file_path, ScopedFd fd,
-    MemoryMappedFile::Strategy mmap_strategy) {
+FileBackedVector<T>::InitializeNewFile(const Filesystem& filesystem,
+                                       const std::string& file_path,
+                                       ScopedFd fd,
+                                       MemoryMappedFile::Strategy mmap_strategy,
+                                       int32_t max_file_size,
+                                       int32_t pre_mapping_mmap_size) {
   // Create header.
   auto header = std::make_unique<Header>();
   header->magic = FileBackedVector<T>::Header::kMagic;
-  header->element_size = sizeof(T);
+  header->element_size = kElementTypeSize;
   header->header_checksum = header->CalculateHeaderChecksum();
 
   // We use Write() here, instead of writing through the mmapped region
   // created below, so we can gracefully handle errors that occur when the
   // disk is full. See b/77309668 for details.
   if (!filesystem.PWrite(fd.get(), /*offset=*/0, header.get(),
-                         sizeof(Header))) {
+                         Header::kHeaderSize)) {
     return absl_ports::InternalError("Failed to write header");
   }
 
-  // Constructor of MemoryMappedFile doesn't actually call mmap(), mmap()
-  // happens on MemoryMappedFile::Remap(). So having a potentially unflushed fd
-  // at this point shouldn't run into issues with a mmap of the same file. But
-  // we'll close the fd just in case.
+  // Close the fd since constructor of MemoryMappedFile calls mmap() and we need
+  // to flush fd before mmap().
   fd.reset();
-  auto mmapped_file =
-      std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
+
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(filesystem, file_path, mmap_strategy,
+                               max_file_size,
+                               /*pre_mapping_file_offset=*/Header::kHeaderSize,
+                               /*pre_mapping_mmap_size=*/
+                               std::min(max_file_size - Header::kHeaderSize,
+                                        pre_mapping_mmap_size)));
 
   return std::unique_ptr<FileBackedVector<T>>(new FileBackedVector<T>(
       filesystem, file_path, std::move(header), std::move(mmapped_file)));
@@ -369,15 +650,21 @@ template <typename T>
 libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
 FileBackedVector<T>::InitializeExistingFile(
     const Filesystem& filesystem, const std::string& file_path,
-    const ScopedFd fd, MemoryMappedFile::Strategy mmap_strategy) {
+    const ScopedFd fd, MemoryMappedFile::Strategy mmap_strategy,
+    int32_t max_file_size, int32_t pre_mapping_mmap_size) {
   int64_t file_size = filesystem.GetFileSize(file_path.c_str());
-  if (file_size < sizeof(FileBackedVector<T>::Header)) {
+  if (file_size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Bad file size for file ", file_path));
+  }
+
+  if (file_size < Header::kHeaderSize) {
     return absl_ports::InternalError(
         absl_ports::StrCat("File header too short for ", file_path));
   }
 
   auto header = std::make_unique<Header>();
-  if (!filesystem.PRead(fd.get(), header.get(), sizeof(Header),
+  if (!filesystem.PRead(fd.get(), header.get(), Header::kHeaderSize,
                         /*offset=*/0)) {
     return absl_ports::InternalError(
         absl_ports::StrCat("Failed to read header of ", file_path));
@@ -391,34 +678,50 @@ FileBackedVector<T>::InitializeExistingFile(
         absl_ports::StrCat("Invalid header kMagic for ", file_path));
   }
 
-  // Mmap the content of the vector, excluding the header so its easier to
-  // access elements from the mmapped region
-  auto mmapped_file =
-      std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
-  ICING_RETURN_IF_ERROR(
-      mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header)));
-
   // Check header
   if (header->header_checksum != header->CalculateHeaderChecksum()) {
-    return absl_ports::InternalError(
+    return absl_ports::FailedPreconditionError(
         absl_ports::StrCat("Invalid header crc for ", file_path));
   }
 
-  if (header->element_size != sizeof(T)) {
+  if (header->element_size != kElementTypeSize) {
     return absl_ports::InternalError(IcingStringUtil::StringPrintf(
-        "Inconsistent element size, expected %zd, actual %d", sizeof(T),
+        "Inconsistent element size, expected %d, actual %d", kElementTypeSize,
         header->element_size));
   }
 
+  int64_t min_file_size =
+      static_cast<int64_t>(header->num_elements) * kElementTypeSize +
+      Header::kHeaderSize;
+  if (min_file_size > file_size) {
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Inconsistent file size, expected %" PRId64 ", actual %" PRId64,
+        min_file_size, file_size));
+  }
+
+  // Mmap the content of the vector, excluding the header so its easier to
+  // access elements from the mmapped region
+  // Although users can specify their own pre_mapping_mmap_size, we should make
+  // sure that the pre-map size is at least file_size - Header::kHeaderSize to
+  // make all existing elements available.
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(
+          filesystem, file_path, mmap_strategy, max_file_size,
+          /*pre_mapping_file_offset=*/Header::kHeaderSize,
+          /*pre_mapping_mmap_size=*/
+          std::max(
+              file_size - Header::kHeaderSize,
+              static_cast<int64_t>(std::min(max_file_size - Header::kHeaderSize,
+                                            pre_mapping_mmap_size)))));
+
   // Check vector contents
-  Crc32 vector_checksum;
-  std::string_view vector_contents(
-      reinterpret_cast<const char*>(mmapped_file->region()),
-      header->num_elements * sizeof(T));
-  vector_checksum.Append(vector_contents);
+  Crc32 vector_checksum(
+      std::string_view(reinterpret_cast<const char*>(mmapped_file.region()),
+                       header->num_elements * kElementTypeSize));
 
   if (vector_checksum.Get() != header->vector_checksum) {
-    return absl_ports::InternalError(
+    return absl_ports::FailedPreconditionError(
         absl_ports::StrCat("Invalid vector contents for ", file_path));
   }
 
@@ -437,14 +740,15 @@ libtextclassifier3::Status FileBackedVector<T>::Delete(
 }
 
 template <typename T>
-FileBackedVector<T>::FileBackedVector(
-    const Filesystem& filesystem, const std::string& file_path,
-    std::unique_ptr<Header> header,
-    std::unique_ptr<MemoryMappedFile> mmapped_file)
+FileBackedVector<T>::FileBackedVector(const Filesystem& filesystem,
+                                      const std::string& file_path,
+                                      std::unique_ptr<Header> header,
+                                      MemoryMappedFile&& mmapped_file)
     : filesystem_(&filesystem),
       file_path_(file_path),
       header_(std::move(header)),
-      mmapped_file_(std::move(mmapped_file)),
+      mmapped_file_(
+          std::make_unique<MemoryMappedFile>(std::move(mmapped_file))),
       changes_end_(header_->num_elements) {}
 
 template <typename T>
@@ -460,6 +764,13 @@ FileBackedVector<T>::~FileBackedVector() {
 }
 
 template <typename T>
+libtextclassifier3::StatusOr<T> FileBackedVector<T>::GetCopy(
+    int32_t idx) const {
+  ICING_ASSIGN_OR_RETURN(const T* value, Get(idx));
+  return *value;
+}
+
+template <typename T>
 libtextclassifier3::StatusOr<const T*> FileBackedVector<T>::Get(
     int32_t idx) const {
   if (idx < 0) {
@@ -477,54 +788,111 @@ libtextclassifier3::StatusOr<const T*> FileBackedVector<T>::Get(
 }
 
 template <typename T>
+libtextclassifier3::StatusOr<typename FileBackedVector<T>::MutableView>
+FileBackedVector<T>::GetMutable(int32_t idx) {
+  if (idx < 0) {
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx));
+  }
+
+  if (idx >= header_->num_elements) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Index, %d, was greater than vector size, %d", idx,
+        header_->num_elements));
+  }
+
+  return MutableView(this, &mutable_array()[idx]);
+}
+
+template <typename T>
+libtextclassifier3::StatusOr<typename FileBackedVector<T>::MutableArrayView>
+FileBackedVector<T>::GetMutable(int32_t idx, int32_t len) {
+  if (idx < 0) {
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx));
+  }
+
+  if (idx > header_->num_elements - len) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Index with len, %d %d, was greater than vector size, %d", idx, len,
+        header_->num_elements));
+  }
+
+  return MutableArrayView(this, &mutable_array()[idx], len);
+}
+
+template <typename T>
 libtextclassifier3::Status FileBackedVector<T>::Set(int32_t idx,
                                                     const T& value) {
+  return Set(idx, 1, value);
+}
+
+template <typename T>
+libtextclassifier3::Status FileBackedVector<T>::Set(int32_t idx, int32_t len,
+                                                    const T& value) {
   if (idx < 0) {
     return absl_ports::OutOfRangeError(
         IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx));
   }
 
-  int32_t start_byte = idx * sizeof(T);
-
-  ICING_RETURN_IF_ERROR(GrowIfNecessary(idx + 1));
+  if (len <= 0) {
+    return absl_ports::OutOfRangeError("Invalid set length");
+  }
 
-  if (idx + 1 > header_->num_elements) {
-    header_->num_elements = idx + 1;
+  if (idx > kMaxNumElements - len) {
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Length %d (with index %d), was too long "
+                                      "for max num elements allowed, %d",
+                                      len, idx, kMaxNumElements));
   }
 
-  if (mutable_array()[idx] == value) {
-    // No need to update
-    return libtextclassifier3::Status::OK;
+  ICING_RETURN_IF_ERROR(GrowIfNecessary(idx + len));
+
+  if (idx + len > header_->num_elements) {
+    header_->num_elements = idx + len;
   }
 
-  // Cache original value to update crcs.
-  if (idx < changes_end_) {
-    // If we exceed kPartialCrcLimitDiv, clear changes_end_ to
-    // revert to full CRC.
-    if ((saved_original_buffer_.size() + sizeof(T)) *
-            FileBackedVector<T>::kPartialCrcLimitDiv >
-        changes_end_ * sizeof(T)) {
-      ICING_VLOG(2) << "FileBackedVector change tracking limit exceeded";
-      changes_.clear();
-      saved_original_buffer_.clear();
-      changes_end_ = 0;
-      header_->vector_checksum = 0;
-    } else {
-      changes_.push_back(idx);
-      saved_original_buffer_.append(
-          reinterpret_cast<char*>(const_cast<T*>(array())) + start_byte,
-          sizeof(T));
+  for (int32_t i = 0; i < len; ++i) {
+    if (array()[idx + i] == value) {
+      // No need to update
+      continue;
     }
+
+    SetDirty(idx + i);
+    mutable_array()[idx + i] = value;
   }
 
-  mutable_array()[idx] = value;
   return libtextclassifier3::Status::OK;
 }
 
 template <typename T>
+libtextclassifier3::StatusOr<typename FileBackedVector<T>::MutableArrayView>
+FileBackedVector<T>::Allocate(int32_t len) {
+  if (len <= 0) {
+    return absl_ports::OutOfRangeError("Invalid allocate length");
+  }
+
+  if (len > kMaxNumElements - header_->num_elements) {
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Cannot allocate %d elements", len));
+  }
+
+  // Although header_->num_elements + len doesn't exceed kMaxNumElements, the
+  // actual max # of elements are determined by mmapped_file_->max_file_size(),
+  // kElementTypeSize, and kHeaderSize. Thus, it is still possible to fail to
+  // grow the file.
+  ICING_RETURN_IF_ERROR(GrowIfNecessary(header_->num_elements + len));
+
+  int32_t start_idx = header_->num_elements;
+  header_->num_elements += len;
+
+  return MutableArrayView(this, &mutable_array()[start_idx], len);
+}
+
+template <typename T>
 libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary(
     int32_t num_elements) {
-  if (sizeof(T) == 0) {
+  if (kElementTypeSize == 0) {
     // Growing is a no-op
     return libtextclassifier3::Status::OK;
   }
@@ -533,32 +901,35 @@ libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary(
     return libtextclassifier3::Status::OK;
   }
 
-  if (num_elements > FileBackedVector<T>::kMaxNumElements) {
+  if (num_elements > (mmapped_file_->max_file_size() - Header::kHeaderSize) /
+                         kElementTypeSize) {
     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
-        "%d exceeds maximum number of elements allowed, %lld", num_elements,
-        static_cast<long long>(FileBackedVector<T>::kMaxNumElements)));
+        "%d elements total size exceed maximum bytes of elements allowed, "
+        "%" PRId64 " bytes",
+        num_elements, mmapped_file_->max_file_size() - Header::kHeaderSize));
   }
 
-  int64_t current_file_size = filesystem_->GetFileSize(file_path_.c_str());
-  int64_t least_file_size_needed = sizeof(Header) + num_elements * sizeof(T);
-
-  if (least_file_size_needed <= current_file_size) {
-    // Our underlying file can hold the target num_elements cause we've grown
-    // before
+  int32_t least_file_size_needed =
+      Header::kHeaderSize + num_elements * kElementTypeSize;  // Won't overflow
+  if (least_file_size_needed <= mmapped_file_->available_size()) {
     return libtextclassifier3::Status::OK;
   }
 
-  // Otherwise, we need to grow. Grow to kGrowElements boundary.
-  least_file_size_needed = math_util::RoundUpTo(
-      least_file_size_needed,
-      int64_t{FileBackedVector<T>::kGrowElements * sizeof(T)});
-  if (!filesystem_->Grow(file_path_.c_str(), least_file_size_needed)) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Couldn't grow file ", file_path_));
-  }
-
-  ICING_RETURN_IF_ERROR(mmapped_file_->Remap(
-      sizeof(Header), least_file_size_needed - sizeof(Header)));
+  int64_t round_up_file_size_needed = math_util::RoundUpTo(
+      int64_t{least_file_size_needed},
+      int64_t{FileBackedVector<T>::kGrowElements} * kElementTypeSize);
+
+  // Call GrowAndRemapIfNecessary. It handles file growth internally and remaps
+  // intelligently.
+  // We've ensured that least_file_size_needed (for num_elements) doesn't exceed
+  // mmapped_file_->max_file_size(), but it is still possible that
+  // round_up_file_size_needed exceeds it, so use the smaller value of them as
+  // new_mmap_size.
+  ICING_RETURN_IF_ERROR(mmapped_file_->GrowAndRemapIfNecessary(
+      /*new_file_offset=*/Header::kHeaderSize,
+      /*new_mmap_size=*/std::min(round_up_file_size_needed,
+                                 mmapped_file_->max_file_size()) -
+          Header::kHeaderSize));
 
   return libtextclassifier3::Status::OK;
 }
@@ -577,11 +948,58 @@ libtextclassifier3::Status FileBackedVector<T>::TruncateTo(
         new_num_elements, header_->num_elements));
   }
 
+  ICING_VLOG(2)
+      << "FileBackedVector truncating, need to recalculate entire checksum";
+  changes_.clear();
+  saved_original_buffer_.clear();
+  changes_end_ = 0;
+  header_->vector_checksum = 0;
+
   header_->num_elements = new_num_elements;
   return libtextclassifier3::Status::OK;
 }
 
 template <typename T>
+libtextclassifier3::Status FileBackedVector<T>::Sort(int32_t begin_idx,
+                                                     int32_t end_idx) {
+  if (begin_idx < 0 || begin_idx >= end_idx ||
+      end_idx > header_->num_elements) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Invalid sort index, %d, %d", begin_idx, end_idx));
+  }
+  for (int32_t i = begin_idx; i < end_idx; ++i) {
+    SetDirty(i);
+  }
+  std::sort(mutable_array() + begin_idx, mutable_array() + end_idx);
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+void FileBackedVector<T>::SetDirty(int32_t idx) {
+  // Cache original value to update crcs.
+  if (idx >= 0 && idx < changes_end_) {
+    // If we exceed kPartialCrcLimitDiv, clear changes_end_ to
+    // revert to full CRC.
+    if ((saved_original_buffer_.size() + kElementTypeSize) *
+            FileBackedVector<T>::kPartialCrcLimitDiv >
+        changes_end_ * kElementTypeSize) {
+      ICING_VLOG(2) << "FileBackedVector change tracking limit exceeded";
+      changes_.clear();
+      saved_original_buffer_.clear();
+      changes_end_ = 0;
+      header_->vector_checksum = 0;
+    } else {
+      int32_t start_byte = idx * kElementTypeSize;
+
+      changes_.push_back(idx);
+      saved_original_buffer_.append(
+          reinterpret_cast<char*>(const_cast<T*>(array())) + start_byte,
+          kElementTypeSize);
+    }
+  }
+}
+
+template <typename T>
 libtextclassifier3::StatusOr<Crc32> FileBackedVector<T>::ComputeChecksum() {
   // First apply the modified area. Keep a bitmap of already updated
   // regions so we don't double-update.
@@ -592,8 +1010,7 @@ libtextclassifier3::StatusOr<Crc32> FileBackedVector<T>::ComputeChecksum() {
   int num_truncated = 0;
   int num_overlapped = 0;
   int num_duplicate = 0;
-  for (size_t i = 0; i < changes_.size(); i++) {
-    const int32_t change_offset = changes_[i];
+  for (const int32_t change_offset : changes_) {
     if (change_offset > changes_end_) {
       return absl_ports::InternalError(IcingStringUtil::StringPrintf(
           "Failed to update crc, change offset %d, changes_end_ %d",
@@ -607,9 +1024,10 @@ libtextclassifier3::StatusOr<Crc32> FileBackedVector<T>::ComputeChecksum() {
     }
 
     // Turn change buffer into change^original.
-    const char* buffer_end = &saved_original_buffer_[cur_offset + sizeof(T)];
-    const char* cur_array =
-        reinterpret_cast<const char*>(array()) + change_offset * sizeof(T);
+    const char* buffer_end =
+        &saved_original_buffer_[cur_offset + kElementTypeSize];
+    const char* cur_array = reinterpret_cast<const char*>(array()) +
+                            change_offset * kElementTypeSize;
     // Now xor in. SSE acceleration please?
     for (char* cur = &saved_original_buffer_[cur_offset]; cur < buffer_end;
          cur++, cur_array++) {
@@ -621,9 +1039,9 @@ libtextclassifier3::StatusOr<Crc32> FileBackedVector<T>::ComputeChecksum() {
     bool overlap = false;
     uint32_t cur_element = change_offset;
     for (char* cur = &saved_original_buffer_[cur_offset]; cur < buffer_end;
-         cur_element++, cur += sizeof(T)) {
+         cur_element++, cur += kElementTypeSize) {
       if (updated[cur_element]) {
-        memset(cur, 0, sizeof(T));
+        memset(cur, 0, kElementTypeSize);
         overlap = true;
       } else {
         updated[cur_element] = true;
@@ -634,10 +1052,11 @@ libtextclassifier3::StatusOr<Crc32> FileBackedVector<T>::ComputeChecksum() {
     // Apply update to crc.
     if (new_update) {
       // Explicitly create the string_view with length
-      std::string_view xored_str(buffer_end - sizeof(T), sizeof(T));
+      std::string_view xored_str(buffer_end - kElementTypeSize,
+                                 kElementTypeSize);
       if (!cur_crc
-               .UpdateWithXor(xored_str, changes_end_ * sizeof(T),
-                              change_offset * sizeof(T))
+               .UpdateWithXor(xored_str, changes_end_ * kElementTypeSize,
+                              change_offset * kElementTypeSize)
                .ok()) {
         return absl_ports::InternalError(IcingStringUtil::StringPrintf(
             "Failed to update crc, change offset %d, change "
@@ -651,8 +1070,9 @@ libtextclassifier3::StatusOr<Crc32> FileBackedVector<T>::ComputeChecksum() {
     } else {
       num_duplicate++;
     }
-    cur_offset += sizeof(T);
+    cur_offset += kElementTypeSize;
   }
+
   if (!changes_.empty()) {
     ICING_VLOG(2) << IcingStringUtil::StringPrintf(
         "Array update partial crcs %d truncated %d overlapped %d duplicate %d",
@@ -663,8 +1083,9 @@ libtextclassifier3::StatusOr<Crc32> FileBackedVector<T>::ComputeChecksum() {
   if (changes_end_ < header_->num_elements) {
     // Explicitly create the string_view with length
     std::string_view update_str(
-        reinterpret_cast<const char*>(array()) + changes_end_ * sizeof(T),
-        (header_->num_elements - changes_end_) * sizeof(T));
+        reinterpret_cast<const char*>(array()) +
+            changes_end_ * kElementTypeSize,
+        (header_->num_elements - changes_end_) * kElementTypeSize);
     cur_crc.Append(update_str);
     ICING_VLOG(2) << IcingStringUtil::StringPrintf(
         "Array update tail crc offset %d -> %d", changes_end_,
@@ -689,7 +1110,7 @@ libtextclassifier3::Status FileBackedVector<T>::PersistToDisk() {
   header_->header_checksum = header_->CalculateHeaderChecksum();
 
   if (!filesystem_->PWrite(file_path_.c_str(), /*offset=*/0, header_.get(),
-                           sizeof(Header))) {
+                           Header::kHeaderSize)) {
     return absl_ports::InternalError("Failed to sync header");
   }
 
@@ -723,7 +1144,11 @@ libtextclassifier3::StatusOr<int64_t> FileBackedVector<T>::GetElementsFileSize()
     return absl_ports::InternalError(
         "Failed to get file size of elements in the file-backed vector");
   }
-  return total_file_size - sizeof(Header);
+  if (total_file_size < Header::kHeaderSize) {
+    return absl_ports::InternalError(
+        "File size should not be smaller than header size");
+  }
+  return total_file_size - Header::kHeaderSize;
 }
 
 }  // namespace lib
diff --git a/icing/file/file-backed-vector_benchmark.cc b/icing/file/file-backed-vector_benchmark.cc
new file mode 100644
index 0000000..0447e93
--- /dev/null
+++ b/icing/file/file-backed-vector_benchmark.cc
@@ -0,0 +1,158 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <limits>
+#include <memory>
+#include <random>
+#include <string>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+class FileBackedVectorBenchmark {
+ public:
+  explicit FileBackedVectorBenchmark()
+      : base_dir_(GetTestTempDir() + "/file_backed_vector_benchmark"),
+        file_path_(base_dir_ + "/test_vector"),
+        ddir_(&filesystem_, base_dir_),
+        random_engine_(/*seed=*/12345) {}
+
+  const Filesystem& filesystem() const { return filesystem_; }
+  const std::string& file_path() const { return file_path_; }
+  std::default_random_engine& random_engine() { return random_engine_; }
+
+ private:
+  Filesystem filesystem_;
+  std::string base_dir_;
+  std::string file_path_;
+  DestructibleDirectory ddir_;
+
+  std::default_random_engine random_engine_;
+};
+
+// Benchmark Set() (without extending vector, i.e. the index should be in range
+// [0, num_elts - 1].
+void BM_Set(benchmark::State& state) {
+  int num_elts = state.range(0);
+
+  FileBackedVectorBenchmark fbv_benchmark;
+
+  fbv_benchmark.filesystem().DeleteFile(fbv_benchmark.file_path().c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<int>> fbv,
+      FileBackedVector<int>::Create(
+          fbv_benchmark.filesystem(), fbv_benchmark.file_path(),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  // Extend to num_elts
+  ICING_ASSERT_OK(fbv->Set(num_elts - 1, 0));
+
+  std::uniform_int_distribution<> distrib(0, num_elts - 1);
+  for (auto _ : state) {
+    int idx = distrib(fbv_benchmark.random_engine());
+    ICING_ASSERT_OK(fbv->Set(idx, idx));
+  }
+}
+BENCHMARK(BM_Set)
+    ->Arg(1 << 10)
+    ->Arg(1 << 11)
+    ->Arg(1 << 12)
+    ->Arg(1 << 13)
+    ->Arg(1 << 14)
+    ->Arg(1 << 15)
+    ->Arg(1 << 16)
+    ->Arg(1 << 17)
+    ->Arg(1 << 18)
+    ->Arg(1 << 19)
+    ->Arg(1 << 20);
+
+// Benchmark single Append(). Equivalent to Set(fbv->num_elements(), val), which
+// extends the vector every round.
+void BM_Append(benchmark::State& state) {
+  FileBackedVectorBenchmark fbv_benchmark;
+
+  fbv_benchmark.filesystem().DeleteFile(fbv_benchmark.file_path().c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<int>> fbv,
+      FileBackedVector<int>::Create(
+          fbv_benchmark.filesystem(), fbv_benchmark.file_path(),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  std::uniform_int_distribution<> distrib(0, std::numeric_limits<int>::max());
+  for (auto _ : state) {
+    ICING_ASSERT_OK(fbv->Append(distrib(fbv_benchmark.random_engine())));
+  }
+}
+BENCHMARK(BM_Append);
+
+// Benchmark appending many elements.
+void BM_AppendMany(benchmark::State& state) {
+  int num = state.range(0);
+
+  FileBackedVectorBenchmark fbv_benchmark;
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    fbv_benchmark.filesystem().DeleteFile(fbv_benchmark.file_path().c_str());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<int>> fbv,
+        FileBackedVector<int>::Create(
+            fbv_benchmark.filesystem(), fbv_benchmark.file_path(),
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    state.ResumeTiming();
+
+    for (int i = 0; i < num; ++i) {
+      ICING_ASSERT_OK(fbv->Append(i));
+    }
+
+    // Since destructor calls PersistToDisk, to avoid calling it twice, we reset
+    // the unique pointer to invoke destructor instead of calling PersistToDisk
+    // explicitly, so in this case PersistToDisk will be called only once.
+    fbv.reset();
+  }
+}
+BENCHMARK(BM_AppendMany)
+    ->Arg(1 << 5)
+    ->Arg(1 << 6)
+    ->Arg(1 << 7)
+    ->Arg(1 << 8)
+    ->Arg(1 << 9)
+    ->Arg(1 << 10)
+    ->Arg(1 << 11)
+    ->Arg(1 << 12)
+    ->Arg(1 << 13)
+    ->Arg(1 << 14)
+    ->Arg(1 << 15)
+    ->Arg(1 << 16)
+    ->Arg(1 << 17)
+    ->Arg(1 << 18)
+    ->Arg(1 << 19)
+    ->Arg(1 << 20);
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index 7561b57..524bbc1 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -14,25 +14,35 @@
 
 #include "icing/file/file-backed-vector.h"
 
-#include <errno.h>
+#include <unistd.h>
 
 #include <algorithm>
+#include <cerrno>
 #include <cstdint>
+#include <limits>
 #include <memory>
+#include <string>
 #include <string_view>
 #include <vector>
 
+#include "icing/text_classifier/lib3/utils/base/status.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/memory-mapped-file.h"
+#include "icing/file/mock-filesystem.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/util/crc32.h"
 #include "icing/util/logging.h"
 
+using ::testing::ElementsAre;
 using ::testing::Eq;
+using ::testing::IsTrue;
+using ::testing::Lt;
+using ::testing::Not;
 using ::testing::Pointee;
+using ::testing::SizeIs;
 
 namespace icing {
 namespace lib {
@@ -55,24 +65,36 @@ class FileBackedVectorTest : public testing::Test {
 
   // Helper method to loop over some data and insert into the vector at some idx
   template <typename T>
-  void Insert(FileBackedVector<T>* vector, int32_t idx, std::string data) {
-    for (int i = 0; i < data.length(); ++i) {
+  void Insert(FileBackedVector<T>* vector, int32_t idx,
+              const std::vector<T>& data) {
+    for (int i = 0; i < data.size(); ++i) {
       ICING_ASSERT_OK(vector->Set(idx + i, data.at(i)));
     }
   }
 
+  void Insert(FileBackedVector<char>* vector, int32_t idx, std::string data) {
+    Insert(vector, idx, std::vector<char>(data.begin(), data.end()));
+  }
+
   // Helper method to retrieve data from the beginning of the vector
   template <typename T>
-  std::string_view Get(FileBackedVector<T>* vector, int32_t expected_len) {
+  std::vector<T> Get(FileBackedVector<T>* vector, int32_t idx,
+                     int32_t expected_len) {
+    return std::vector<T>(vector->array() + idx,
+                          vector->array() + idx + expected_len);
+  }
+
+  std::string_view Get(FileBackedVector<char>* vector, int32_t expected_len) {
     return Get(vector, 0, expected_len);
   }
 
-  template <typename T>
-  std::string_view Get(FileBackedVector<T>* vector, int32_t idx,
+  std::string_view Get(FileBackedVector<char>* vector, int32_t idx,
                        int32_t expected_len) {
     return std::string_view(vector->array() + idx, expected_len);
   }
 
+  const Filesystem& filesystem() const { return filesystem_; }
+
   Filesystem filesystem_;
   std::string file_path_;
   int fd_;
@@ -96,6 +118,79 @@ TEST_F(FileBackedVectorTest, Create) {
   }
 }
 
+TEST_F(FileBackedVectorTest, CreateWithInvalidStrategy) {
+  // Create a vector with unimplemented strategy
+  EXPECT_THAT(FileBackedVector<char>::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_MANUAL_SYNC),
+              StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
+}
+
+TEST_F(FileBackedVectorTest, CreateWithCustomMaxFileSize) {
+  int32_t header_size = FileBackedVector<char>::Header::kHeaderSize;
+
+  // Create a vector with invalid max_file_size
+  EXPECT_THAT(FileBackedVector<char>::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                  /*max_file_size=*/-1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(FileBackedVector<char>::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                  /*max_file_size=*/header_size - 1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(FileBackedVector<char>::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                  /*max_file_size=*/header_size + sizeof(char) - 1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  {
+    // Create a vector with max_file_size that allows only 1 element.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        auto vector, FileBackedVector<char>::Create(
+                         filesystem_, file_path_,
+                         MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                         /*max_file_size=*/header_size + sizeof(char) * 1));
+    ICING_ASSERT_OK(vector->Set(0, 'a'));
+  }
+
+  {
+    // We can create it again with larger max_file_size, as long as it is not
+    // greater than kMaxFileSize.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        auto vector, FileBackedVector<char>::Create(
+                         filesystem_, file_path_,
+                         MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                         /*max_file_size=*/header_size + sizeof(char) * 2));
+    EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(Eq('a'))));
+    ICING_ASSERT_OK(vector->Set(1, 'b'));
+  }
+
+  // We cannot create it again with max_file_size < current_file_size, even if
+  // it is a valid value.
+  int64_t current_file_size = filesystem_.GetFileSize(file_path_.c_str());
+  ASSERT_THAT(current_file_size, Eq(header_size + sizeof(char) * 2));
+  ASSERT_THAT(current_file_size - 1, Not(Lt(header_size + sizeof(char))));
+  EXPECT_THAT(FileBackedVector<char>::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                  /*max_file_size=*/current_file_size - 1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  {
+    // We can create it again with max_file_size == current_file_size.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        auto vector, FileBackedVector<char>::Create(
+                         filesystem_, file_path_,
+                         MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                         /*max_file_size=*/current_file_size));
+    EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(Eq('a'))));
+    EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(Eq('b'))));
+  }
+}
+
 TEST_F(FileBackedVectorTest, SimpleShared) {
   // Create a vector and add some data.
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -132,7 +227,7 @@ TEST_F(FileBackedVectorTest, SimpleShared) {
   ASSERT_THAT(FileBackedVector<char>::Create(
                   filesystem_, file_path_,
                   MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
-              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 
   // Get it back into an ok state
   filesystem_.PWrite(file_path_.data(),
@@ -158,8 +253,8 @@ TEST_F(FileBackedVectorTest, SimpleShared) {
   // Truncate the content
   ICING_EXPECT_OK(vector->TruncateTo(0));
 
-  // We don't automatically update the crc when we truncate.
-  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(good_crc));
+  // Crc is cleared after truncation and reset to 0.
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
   EXPECT_EQ(0u, vector->num_elements());
 }
 
@@ -188,6 +283,432 @@ TEST_F(FileBackedVectorTest, Get) {
               StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
 }
 
+TEST_F(FileBackedVectorTest, SetWithoutGrowing) {
+  // Create a vector and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
+
+  std::string original = "abcde";
+  Insert(vector.get(), /*idx=*/0, original);
+  ASSERT_THAT(vector->num_elements(), Eq(original.length()));
+  ASSERT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/5), Eq(original));
+
+  ICING_EXPECT_OK(vector->Set(/*idx=*/1, /*len=*/3, 'z'));
+  EXPECT_THAT(vector->num_elements(), Eq(5));
+  EXPECT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/5), Eq("azzze"));
+}
+
+TEST_F(FileBackedVectorTest, SetWithGrowing) {
+  // Create a vector and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
+
+  std::string original = "abcde";
+  Insert(vector.get(), /*idx=*/0, original);
+  ASSERT_THAT(vector->num_elements(), Eq(original.length()));
+  ASSERT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/5), Eq(original));
+
+  ICING_EXPECT_OK(vector->Set(/*idx=*/3, /*len=*/4, 'z'));
+  EXPECT_THAT(vector->num_elements(), Eq(7));
+  EXPECT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/7), Eq("abczzzz"));
+}
+
+TEST_F(FileBackedVectorTest, SetInvalidArguments) {
+  // Create a vector and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  EXPECT_THAT(vector->Set(/*idx=*/0, /*len=*/-1, 'z'),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->Set(/*idx=*/0, /*len=*/0, 'z'),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->Set(/*idx=*/-1, /*len=*/2, 'z'),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->Set(/*idx=*/100,
+                          /*len=*/std::numeric_limits<int32_t>::max(), 'z'),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(FileBackedVectorTest, MutableView) {
+  // Create a vector and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  Insert(vector.get(), /*idx=*/0, std::string(1000, 'a'));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2620640643U)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(FileBackedVector<char>::MutableView mutable_elt,
+                             vector->GetMutable(3));
+
+  mutable_elt.Get() = 'b';
+  EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(Eq('b'))));
+
+  mutable_elt.Get() = 'c';
+  EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(Eq('c'))));
+}
+
+TEST_F(FileBackedVectorTest, MutableViewShouldSetDirty) {
+  // Create a vector and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  Insert(vector.get(), /*idx=*/0, std::string(1000, 'a'));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2620640643U)));
+
+  std::string_view reconstructed_view =
+      std::string_view(vector->array(), vector->num_elements());
+
+  ICING_ASSERT_OK_AND_ASSIGN(FileBackedVector<char>::MutableView mutable_elt,
+                             vector->GetMutable(3));
+
+  // Mutate the element via MutateView
+  // If non-const Get() is called, MutateView should set the element index dirty
+  // so that ComputeChecksum() can pick up the change and compute the checksum
+  // correctly. Validate by mapping another array on top.
+  mutable_elt.Get() = 'b';
+  ASSERT_THAT(vector->Get(3), IsOkAndHolds(Pointee(Eq('b'))));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc1, vector->ComputeChecksum());
+  Crc32 full_crc1;
+  full_crc1.Append(reconstructed_view);
+  EXPECT_THAT(crc1, Eq(full_crc1));
+
+  // Mutate and test again.
+  mutable_elt.Get() = 'c';
+  ASSERT_THAT(vector->Get(3), IsOkAndHolds(Pointee(Eq('c'))));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc2, vector->ComputeChecksum());
+  Crc32 full_crc2;
+  full_crc2.Append(reconstructed_view);
+  EXPECT_THAT(crc2, Eq(full_crc2));
+}
+
+TEST_F(FileBackedVectorTest, MutableArrayView) {
+  // Create a vector and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<int>> vector,
+      FileBackedVector<int>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  Insert(vector.get(), /*idx=*/0, std::vector<int>(/*count=*/100, /*value=*/1));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2494890115U)));
+
+  constexpr int kArrayViewOffset = 5;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedVector<int>::MutableArrayView mutable_arr,
+      vector->GetMutable(kArrayViewOffset, /*len=*/3));
+  EXPECT_THAT(mutable_arr, SizeIs(3));
+
+  mutable_arr[0] = 2;
+  mutable_arr[1] = 3;
+  mutable_arr[2] = 4;
+
+  EXPECT_THAT(vector->Get(kArrayViewOffset + 0), IsOkAndHolds(Pointee(Eq(2))));
+  EXPECT_THAT(mutable_arr.data()[0], Eq(2));
+
+  EXPECT_THAT(vector->Get(kArrayViewOffset + 1), IsOkAndHolds(Pointee(Eq(3))));
+  EXPECT_THAT(mutable_arr.data()[1], Eq(3));
+
+  EXPECT_THAT(vector->Get(kArrayViewOffset + 2), IsOkAndHolds(Pointee(Eq(4))));
+  EXPECT_THAT(mutable_arr.data()[2], Eq(4));
+}
+
+TEST_F(FileBackedVectorTest, MutableArrayViewSetArray) {
+  // Create a vector and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<int>> vector,
+      FileBackedVector<int>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  Insert(vector.get(), /*idx=*/0, std::vector<int>(/*count=*/100, /*value=*/1));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2494890115U)));
+
+  constexpr int kArrayViewOffset = 3;
+  constexpr int kArrayViewLen = 5;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedVector<int>::MutableArrayView mutable_arr,
+      vector->GetMutable(kArrayViewOffset, kArrayViewLen));
+
+  std::vector<int> change1{2, 3, 4};
+  mutable_arr.SetArray(/*idx=*/0, change1.data(), change1.size());
+  EXPECT_THAT(Get(vector.get(), kArrayViewOffset, kArrayViewLen),
+              ElementsAre(2, 3, 4, 1, 1));
+
+  std::vector<int> change2{5, 6};
+  mutable_arr.SetArray(/*idx=*/2, change2.data(), change2.size());
+  EXPECT_THAT(Get(vector.get(), kArrayViewOffset, kArrayViewLen),
+              ElementsAre(2, 3, 5, 6, 1));
+}
+
+TEST_F(FileBackedVectorTest, MutableArrayViewSetArrayWithZeroLength) {
+  // Create a vector and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<int>> vector,
+      FileBackedVector<int>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  Insert(vector.get(), /*idx=*/0, std::vector<int>(/*count=*/100, /*value=*/1));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2494890115U)));
+
+  constexpr int kArrayViewOffset = 3;
+  constexpr int kArrayViewLen = 5;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedVector<int>::MutableArrayView mutable_arr,
+      vector->GetMutable(kArrayViewOffset, kArrayViewLen));
+
+  // Zero arr_len should work and change nothing
+  std::vector<int> change{2, 3};
+  mutable_arr.SetArray(/*idx=*/0, change.data(), /*arr_len=*/0);
+  EXPECT_THAT(Get(vector.get(), kArrayViewOffset, kArrayViewLen),
+              ElementsAre(1, 1, 1, 1, 1));
+}
+
+TEST_F(FileBackedVectorTest, MutableArrayViewIndexOperatorShouldSetDirty) {
+  // Create an array with some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<int>> vector,
+      FileBackedVector<int>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  Insert(vector.get(), /*idx=*/0, std::vector<int>(/*count=*/100, /*value=*/1));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2494890115U)));
+
+  std::string_view reconstructed_view(
+      reinterpret_cast<const char*>(vector->array()),
+      vector->num_elements() * sizeof(int));
+
+  constexpr int kArrayViewOffset = 5;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedVector<int>::MutableArrayView mutable_arr,
+      vector->GetMutable(kArrayViewOffset, /*len=*/3));
+
+  // Use operator[] to mutate elements
+  // If non-const operator[] is called, MutateView should set the element index
+  // dirty so that ComputeChecksum() can pick up the change and compute the
+  // checksum correctly. Validate by mapping another array on top.
+  mutable_arr[0] = 2;
+  ASSERT_THAT(vector->Get(kArrayViewOffset + 0), IsOkAndHolds(Pointee(Eq(2))));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc1, vector->ComputeChecksum());
+  EXPECT_THAT(crc1, Eq(Crc32(reconstructed_view)));
+
+  mutable_arr[1] = 3;
+  ASSERT_THAT(vector->Get(kArrayViewOffset + 1), IsOkAndHolds(Pointee(Eq(3))));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc2, vector->ComputeChecksum());
+  EXPECT_THAT(crc2, Eq(Crc32(reconstructed_view)));
+
+  mutable_arr[2] = 4;
+  ASSERT_THAT(vector->Get(kArrayViewOffset + 2), IsOkAndHolds(Pointee(Eq(4))));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc3, vector->ComputeChecksum());
+  EXPECT_THAT(crc3, Eq(Crc32(reconstructed_view)));
+
+  // Change the same position. It should set dirty again.
+  mutable_arr[0] = 5;
+  ASSERT_THAT(vector->Get(kArrayViewOffset + 0), IsOkAndHolds(Pointee(Eq(5))));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc4, vector->ComputeChecksum());
+  EXPECT_THAT(crc4, Eq(Crc32(reconstructed_view)));
+}
+
+TEST_F(FileBackedVectorTest, MutableArrayViewSetArrayShouldSetDirty) {
+  // Create an array with some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<int>> vector,
+      FileBackedVector<int>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  Insert(vector.get(), /*idx=*/0, std::vector<int>(/*count=*/100, /*value=*/1));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2494890115U)));
+
+  std::string_view reconstructed_view(
+      reinterpret_cast<const char*>(vector->array()),
+      vector->num_elements() * sizeof(int));
+
+  constexpr int kArrayViewOffset = 3;
+  constexpr int kArrayViewLen = 5;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedVector<int>::MutableArrayView mutable_arr,
+      vector->GetMutable(kArrayViewOffset, kArrayViewLen));
+
+  std::vector<int> change{2, 3, 4};
+  mutable_arr.SetArray(/*idx=*/0, change.data(), change.size());
+  ASSERT_THAT(Get(vector.get(), kArrayViewOffset, kArrayViewLen),
+              ElementsAre(2, 3, 4, 1, 1));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc, vector->ComputeChecksum());
+  EXPECT_THAT(crc, Eq(Crc32(reconstructed_view)));
+}
+
+TEST_F(FileBackedVectorTest, Append) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  ASSERT_THAT(vector->num_elements(), Eq(0));
+
+  ICING_EXPECT_OK(vector->Append('a'));
+  EXPECT_THAT(vector->num_elements(), Eq(1));
+  EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(Eq('a'))));
+
+  ICING_EXPECT_OK(vector->Append('b'));
+  EXPECT_THAT(vector->num_elements(), Eq(2));
+  EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(Eq('b'))));
+}
+
+TEST_F(FileBackedVectorTest, AppendAfterSet) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  ASSERT_THAT(vector->num_elements(), Eq(0));
+
+  ICING_ASSERT_OK(vector->Set(9, 'z'));
+  ASSERT_THAT(vector->num_elements(), Eq(10));
+  ICING_EXPECT_OK(vector->Append('a'));
+  EXPECT_THAT(vector->num_elements(), Eq(11));
+  EXPECT_THAT(vector->Get(10), IsOkAndHolds(Pointee(Eq('a'))));
+}
+
+TEST_F(FileBackedVectorTest, AppendAfterTruncate) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  Insert(vector.get(), /*idx=*/0, std::string(1000, 'z'));
+  ASSERT_THAT(vector->num_elements(), Eq(1000));
+
+  ICING_ASSERT_OK(vector->TruncateTo(5));
+  ICING_EXPECT_OK(vector->Append('a'));
+  EXPECT_THAT(vector->num_elements(), Eq(6));
+  EXPECT_THAT(vector->Get(5), IsOkAndHolds(Pointee(Eq('a'))));
+}
+
+TEST_F(FileBackedVectorTest, AppendShouldFailIfExceedingMaxFileSize) {
+  int32_t max_file_size = (1 << 10) - 1;
+  int32_t max_num_elements =
+      (max_file_size - FileBackedVector<char>::Header::kHeaderSize) /
+      sizeof(char);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size));
+  ICING_ASSERT_OK(vector->Set(max_num_elements - 1, 'z'));
+  ASSERT_THAT(vector->num_elements(), Eq(max_num_elements));
+
+  EXPECT_THAT(vector->Append('a'),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(FileBackedVectorTest, Allocate) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  ASSERT_THAT(vector->num_elements(), Eq(0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      typename FileBackedVector<char>::MutableArrayView mutable_arr,
+      vector->Allocate(3));
+  EXPECT_THAT(vector->num_elements(), Eq(3));
+  EXPECT_THAT(mutable_arr, SizeIs(3));
+  std::string change = "abc";
+  mutable_arr.SetArray(/*idx=*/0, /*arr=*/change.data(), /*arr_len=*/3);
+  EXPECT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/3), Eq(change));
+}
+
+TEST_F(FileBackedVectorTest, AllocateAfterSet) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  ASSERT_THAT(vector->num_elements(), Eq(0));
+
+  ICING_ASSERT_OK(vector->Set(9, 'z'));
+  ASSERT_THAT(vector->num_elements(), Eq(10));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      typename FileBackedVector<char>::MutableArrayView mutable_arr,
+      vector->Allocate(3));
+  EXPECT_THAT(vector->num_elements(), Eq(13));
+  EXPECT_THAT(mutable_arr, SizeIs(3));
+  std::string change = "abc";
+  mutable_arr.SetArray(/*idx=*/0, /*arr=*/change.data(), /*arr_len=*/3);
+  EXPECT_THAT(Get(vector.get(), /*idx=*/10, /*expected_len=*/3), Eq(change));
+}
+
+TEST_F(FileBackedVectorTest, AllocateAfterTruncate) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  Insert(vector.get(), /*idx=*/0, std::string(1000, 'z'));
+  ASSERT_THAT(vector->num_elements(), Eq(1000));
+
+  ICING_ASSERT_OK(vector->TruncateTo(5));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      typename FileBackedVector<char>::MutableArrayView mutable_arr,
+      vector->Allocate(3));
+  EXPECT_THAT(vector->num_elements(), Eq(8));
+  std::string change = "abc";
+  mutable_arr.SetArray(/*idx=*/0, /*arr=*/change.data(), /*arr_len=*/3);
+  EXPECT_THAT(Get(vector.get(), /*idx=*/5, /*expected_len=*/3), Eq(change));
+}
+
+TEST_F(FileBackedVectorTest, AllocateInvalidLengthShouldFail) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  ASSERT_THAT(vector->num_elements(), Eq(0));
+
+  EXPECT_THAT(vector->Allocate(-1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->num_elements(), Eq(0));
+
+  EXPECT_THAT(vector->Allocate(0),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->num_elements(), Eq(0));
+}
+
+TEST_F(FileBackedVectorTest, AllocateShouldFailIfExceedingMaxFileSize) {
+  int32_t max_file_size = (1 << 10) - 1;
+  int32_t max_num_elements =
+      (max_file_size - FileBackedVector<char>::Header::kHeaderSize) /
+      sizeof(char);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size));
+  ICING_ASSERT_OK(vector->Set(max_num_elements - 3, 'z'));
+  ASSERT_THAT(vector->num_elements(), Eq(max_num_elements - 2));
+
+  EXPECT_THAT(vector->Allocate(3),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->Allocate(2), IsOk());
+}
+
 TEST_F(FileBackedVectorTest, IncrementalCrc_NonOverlappingChanges) {
   int num_elements = 1000;
   int incremental_size = 3;
@@ -265,30 +786,58 @@ TEST_F(FileBackedVectorTest, IncrementalCrc_OverlappingChanges) {
   }
 }
 
+TEST_F(FileBackedVectorTest, SetIntMaxShouldReturnOutOfRangeError) {
+  // Create a vector and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<int32_t>> vector,
+      FileBackedVector<int32_t>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
+
+  // It is an edge case. Since Set() calls GrowIfNecessary(idx + 1), we have to
+  // make sure that when idx is INT32_MAX, Set() should handle it correctly.
+  EXPECT_THAT(vector->Set(std::numeric_limits<int32_t>::max(), 1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
 TEST_F(FileBackedVectorTest, Grow) {
-  // This is the same value as FileBackedVector::kMaxNumElts
-  constexpr int32_t kMaxNumElts = 1U << 20;
+  int32_t max_file_size = (1 << 20) - 1;
+  int32_t header_size = FileBackedVector<int32_t>::Header::kHeaderSize;
+  int32_t element_type_size = static_cast<int32_t>(sizeof(int32_t));
+
+  // Max file size includes size of the header and elements, so max # of
+  // elements will be (max_file_size - header_size) / element_type_size.
+  //
+  // Also ensure that (max_file_size - header_size) is not a multiple of
+  // element_type_size, in order to test if the desired # of elements is
+  // computed by (math) floor instead of ceil.
+  ASSERT_THAT((max_file_size - header_size) % element_type_size, Not(Eq(0)));
+  int32_t max_num_elements = (max_file_size - header_size) / element_type_size;
 
   ASSERT_TRUE(filesystem_.Truncate(fd_, 0));
 
-  // Create an array and add some data.
+  // Create a vector and add some data.
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<FileBackedVector<char>> vector,
-      FileBackedVector<char>::Create(
+      std::unique_ptr<FileBackedVector<int32_t>> vector,
+      FileBackedVector<int32_t>::Create(
           filesystem_, file_path_,
-          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size));
   EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
-
-  EXPECT_THAT(vector->Set(kMaxNumElts + 11, 'a'),
+  // max_num_elements is the allowed max # of elements, so the valid index
+  // should be 0 to max_num_elements-1.
+  EXPECT_THAT(vector->Set(max_num_elements, 1),
               StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
-  EXPECT_THAT(vector->Set(-1, 'a'),
+  EXPECT_THAT(vector->Set(-1, 1),
               StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->Set(max_num_elements - 1, 1), IsOk());
 
-  uint32_t start = kMaxNumElts - 13;
-  Insert(vector.get(), start, "abcde");
+  int32_t start = max_num_elements - 5;
+  std::vector<int32_t> data{1, 2, 3, 4, 5};
+  Insert(vector.get(), start, data);
 
   // Crc works?
-  const Crc32 good_crc(1134899064U);
+  const Crc32 good_crc(650981917U);
   EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(good_crc));
 
   // PersistToDisk does nothing bad, and ensures the content is still there
@@ -300,12 +849,12 @@ TEST_F(FileBackedVectorTest, Grow) {
   vector.reset();
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      vector, FileBackedVector<char>::Create(
-                  filesystem_, file_path_,
-                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+      vector,
+      FileBackedVector<int32_t>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size));
 
-  std::string expected = "abcde";
-  EXPECT_EQ(expected, Get(vector.get(), start, expected.length()));
+  EXPECT_THAT(Get(vector.get(), start, data.size()), Eq(data));
 }
 
 TEST_F(FileBackedVectorTest, GrowsInChunks) {
@@ -318,25 +867,32 @@ TEST_F(FileBackedVectorTest, GrowsInChunks) {
           filesystem_, file_path_,
           MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
 
-  // Our initial file size should just be the size of the header
-  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-              Eq(sizeof(FileBackedVector<char>::Header)));
-
-  // Once we add something though, we'll grow to kGrowElements big
-  Insert(vector.get(), 0, "a");
-  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-              Eq(kGrowElements * sizeof(int)));
+  // Our initial file size should just be the size of the header. Disk usage
+  // will indicate that one block has been allocated, which contains the header.
+  int header_size = sizeof(FileBackedVector<char>::Header);
+  int page_size = getpagesize();
+  EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(header_size));
+  EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(page_size));
+
+  // Once we add something though, we'll grow to be kGrowElements big. From this
+  // point on, file size and disk usage should be the same because Growing will
+  // explicitly allocate the number of blocks needed to accomodate the file.
+  Insert(vector.get(), 0, {1});
+  int file_size = 1 * kGrowElements * sizeof(int);
+  EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+  EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
 
   // Should still be the same size, don't need to grow underlying file
-  Insert(vector.get(), 1, "b");
-  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-              Eq(kGrowElements * sizeof(int)));
+  Insert(vector.get(), 1, {2});
+  EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+  EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
 
   // Now we grow by a kGrowElements chunk, so the underlying file is 2
   // kGrowElements big
-  Insert(vector.get(), 2, std::string(kGrowElements, 'c'));
-  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-              Eq(kGrowElements * 2 * sizeof(int)));
+  file_size = 2 * kGrowElements * sizeof(int);
+  Insert(vector.get(), 2, std::vector<int>(kGrowElements, 3));
+  EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+  EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
 
   // Destroy/persist the contents.
   vector.reset();
@@ -409,10 +965,10 @@ TEST_F(FileBackedVectorTest, TruncateTo) {
   EXPECT_EQ(1, vector->num_elements());
   EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(31158534)));
 
-  // Truncating doesn't cause the checksum to be updated.
+  // Truncating clears the checksum and resets it to 0
   ICING_EXPECT_OK(vector->TruncateTo(0));
   EXPECT_EQ(0, vector->num_elements());
-  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(31158534)));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
 
   // Can't truncate past end.
   EXPECT_THAT(vector->TruncateTo(100),
@@ -423,6 +979,386 @@ TEST_F(FileBackedVectorTest, TruncateTo) {
               StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
 }
 
+TEST_F(FileBackedVectorTest, TruncateAndReReadFile) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<float>> vector,
+        FileBackedVector<float>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+    ICING_ASSERT_OK(vector->Set(0, 1.0));
+    ICING_ASSERT_OK(vector->Set(1, 2.0));
+    ICING_ASSERT_OK(vector->Set(2, 2.0));
+    ICING_ASSERT_OK(vector->Set(3, 2.0));
+    ICING_ASSERT_OK(vector->Set(4, 2.0));
+  }  // Destroying the vector should trigger a checksum of the 5 elements
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<float>> vector,
+        FileBackedVector<float>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+    EXPECT_EQ(5, vector->num_elements());
+    ICING_EXPECT_OK(vector->TruncateTo(4));
+    EXPECT_EQ(4, vector->num_elements());
+  }  // Destroying the vector should update the checksum to 4 elements
+
+  // Creating again should double check that our checksum of 4 elements matches
+  // what was previously saved.
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<float>> vector,
+        FileBackedVector<float>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+    EXPECT_EQ(vector->num_elements(), 4);
+  }
+}
+
+TEST_F(FileBackedVectorTest, Sort) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<int>> vector,
+      FileBackedVector<int>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  ICING_ASSERT_OK(vector->Set(0, 5));
+  ICING_ASSERT_OK(vector->Set(1, 4));
+  ICING_ASSERT_OK(vector->Set(2, 2));
+  ICING_ASSERT_OK(vector->Set(3, 3));
+  ICING_ASSERT_OK(vector->Set(4, 1));
+
+  // Sort vector range [1, 4) (excluding 4).
+  EXPECT_THAT(vector->Sort(/*begin_idx=*/1, /*end_idx=*/4), IsOk());
+  // Verify sorted range should be sorted and others should remain unchanged.
+  EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(5)));
+  EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(2)));
+  EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(3)));
+  EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(4)));
+  EXPECT_THAT(vector->Get(4), IsOkAndHolds(Pointee(1)));
+
+  // Sort again by end_idx = num_elements().
+  EXPECT_THAT(vector->Sort(/*begin_idx=*/0, /*end_idx=*/vector->num_elements()),
+              IsOk());
+  EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(1)));
+  EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(2)));
+  EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(3)));
+  EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(4)));
+  EXPECT_THAT(vector->Get(4), IsOkAndHolds(Pointee(5)));
+}
+
+TEST_F(FileBackedVectorTest, SortByInvalidIndexShouldReturnOutOfRangeError) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<int>> vector,
+      FileBackedVector<int>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  ICING_ASSERT_OK(vector->Set(0, 5));
+  ICING_ASSERT_OK(vector->Set(1, 4));
+  ICING_ASSERT_OK(vector->Set(2, 2));
+  ICING_ASSERT_OK(vector->Set(3, 3));
+  ICING_ASSERT_OK(vector->Set(4, 1));
+
+  EXPECT_THAT(vector->Sort(/*begin_idx=*/-1, /*end_idx=*/4),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->Sort(/*begin_idx=*/0, /*end_idx=*/-1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->Sort(/*begin_idx=*/3, /*end_idx=*/3),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->Sort(/*begin_idx=*/3, /*end_idx=*/1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->Sort(/*begin_idx=*/5, /*end_idx=*/5),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->Sort(/*begin_idx=*/3, /*end_idx=*/6),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(FileBackedVectorTest, SortShouldSetDirtyCorrectly) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<int>> vector,
+        FileBackedVector<int>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    ICING_ASSERT_OK(vector->Set(0, 5));
+    ICING_ASSERT_OK(vector->Set(1, 4));
+    ICING_ASSERT_OK(vector->Set(2, 2));
+    ICING_ASSERT_OK(vector->Set(3, 3));
+    ICING_ASSERT_OK(vector->Set(4, 1));
+  }  // Destroying the vector should trigger a checksum of the 5 elements
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<int>> vector,
+        FileBackedVector<int>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+    // Sort vector range [1, 4) (excluding 4).
+    EXPECT_THAT(vector->Sort(/*begin_idx=*/1, /*end_idx=*/4), IsOk());
+  }  // Destroying the vector should update the checksum
+
+  // Creating again should check that the checksum after sorting matches what
+  // was previously saved. This tests the correctness of SetDirty() for sorted
+  // elements.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<int>> vector,
+      FileBackedVector<int>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  // Verify sorted range should be sorted and others should remain unchanged.
+  EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(5)));
+  EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(2)));
+  EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(3)));
+  EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(4)));
+  EXPECT_THAT(vector->Get(4), IsOkAndHolds(Pointee(1)));
+}
+
+TEST_F(FileBackedVectorTest, SetDirty) {
+  // 1. Create a vector and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  Insert(vector.get(), 0, "abcd");
+
+  std::string_view reconstructed_view =
+      std::string_view(vector->array(), vector->num_elements());
+
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc1, vector->ComputeChecksum());
+  Crc32 full_crc_before_overwrite;
+  full_crc_before_overwrite.Append(reconstructed_view);
+  EXPECT_THAT(crc1, Eq(full_crc_before_overwrite));
+
+  // 2. Manually overwrite the values of the first two elements.
+  std::string corrupted_content = "ef";
+  ASSERT_THAT(
+      filesystem_.PWrite(fd_, /*offset=*/sizeof(FileBackedVector<char>::Header),
+                         corrupted_content.c_str(), corrupted_content.length()),
+      IsTrue());
+  ASSERT_THAT(Get(vector.get(), 0, 4), Eq("efcd"));
+  Crc32 full_crc_after_overwrite;
+  full_crc_after_overwrite.Append(reconstructed_view);
+  ASSERT_THAT(full_crc_before_overwrite, Not(Eq(full_crc_after_overwrite)));
+
+  // 3. Without calling SetDirty(), the checksum will be recomputed incorrectly.
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc2, vector->ComputeChecksum());
+  EXPECT_THAT(crc2, Not(Eq(full_crc_after_overwrite)));
+
+  // 4. Call SetDirty()
+  vector->SetDirty(0);
+  vector->SetDirty(1);
+
+  // 5. The checksum should be computed correctly after calling SetDirty() with
+  // correct index.
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc3, vector->ComputeChecksum());
+  EXPECT_THAT(crc3, Eq(full_crc_after_overwrite));
+}
+
+TEST_F(FileBackedVectorTest, InitFileTooSmallForHeaderFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  // 2. Shrink the file to be smaller than the header.
+  filesystem_.Truncate(fd_, sizeof(FileBackedVector<char>::Header) - 1);
+
+  {
+    // 3. Attempt to create the file and confirm that it fails.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitWrongDataSizeFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  {
+    // 2. Attempt to create the file with a different element size and confirm
+    // that it fails.
+    EXPECT_THAT(FileBackedVector<int>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitCorruptHeaderFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  // 2. Modify the header, but don't update the checksum. This would be similar
+  // to corruption of the header.
+  FileBackedVector<char>::Header header;
+  ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
+              IsTrue());
+  header.num_elements = 1;
+  ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
+              IsTrue());
+
+  {
+    // 3. Attempt to create the file with a header that doesn't match its
+    // checksum and confirm that it fails.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitHeaderElementSizeTooBigFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  // 2. Modify the header so that the number of elements exceeds the actual size
+  // of the underlying file.
+  FileBackedVector<char>::Header header;
+  ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
+              IsTrue());
+  int64_t file_size = filesystem_.GetFileSize(fd_);
+  int64_t allocated_elements_size = file_size - sizeof(header);
+  header.num_elements = (allocated_elements_size / sizeof(char)) + 1;
+  header.header_checksum = header.CalculateHeaderChecksum();
+  ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
+              IsTrue());
+
+  {
+    // 3. Attempt to create the file with num_elements that is larger than the
+    // underlying file and confirm that it fails.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitCorruptElementsFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  // 2. Overwrite the values of the first two elements.
+  std::string corrupted_content = "BY";
+  ASSERT_THAT(
+      filesystem_.PWrite(fd_, /*offset=*/sizeof(FileBackedVector<char>::Header),
+                         corrupted_content.c_str(), corrupted_content.length()),
+      IsTrue());
+
+  {
+    // 3. Attempt to create the file with elements that don't match their
+    // checksum and confirm that it fails.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitNormalSucceeds) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  {
+    // 2. Attempt to create the file with a completely valid header and elements
+    // region. This should succeed.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                IsOk());
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitFromExistingFileShouldPreMapAtLeastFileSize) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+            FileBackedVector<char>::kMaxFileSize));
+    Insert(vector.get(), 10000, "A");
+    Insert(vector.get(), 10001, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  {
+    // 2. Attempt to create the file with pre_mapping_mmap_size < file_size. It
+    //    should still pre-map file_size, so we can pass the checksum
+    //    verification when initializing and get the correct contents.
+    int64_t file_size = filesystem_.GetFileSize(file_path_.c_str());
+    int pre_mapping_mmap_size = 10;
+    ASSERT_THAT(pre_mapping_mmap_size, Lt(file_size));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+            FileBackedVector<char>::kMaxFileSize, pre_mapping_mmap_size));
+    EXPECT_THAT(Get(vector.get(), /*idx=*/10000, /*expected_len=*/2), Eq("AZ"));
+  }
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/file/filesystem.cc b/icing/file/filesystem.cc
index 4a76c01..cd905e7 100644
--- a/icing/file/filesystem.cc
+++ b/icing/file/filesystem.cc
@@ -16,7 +16,6 @@
 
 #include <dirent.h>
 #include <dlfcn.h>
-#include <errno.h>
 #include <fcntl.h>
 #include <fnmatch.h>
 #include <pthread.h>
@@ -26,6 +25,7 @@
 #include <unistd.h>
 
 #include <algorithm>
+#include <cerrno>
 #include <cstdint>
 #include <unordered_set>
 
@@ -63,18 +63,16 @@ void LogOpenFileDescriptors() {
   constexpr int kMaxFileDescriptorsToStat = 4096;
   struct rlimit rlim = {0, 0};
   if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "getrlimit() failed (errno=%d)", errno);
+    ICING_LOG(ERROR) << "getrlimit() failed (errno=" << errno << ")";
     return;
   }
   int fd_lim = rlim.rlim_cur;
   if (fd_lim > kMaxFileDescriptorsToStat) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Maximum number of file descriptors (%d) too large.", fd_lim);
+    ICING_LOG(ERROR) << "Maximum number of file descriptors (" << fd_lim
+        << ") too large.";
     fd_lim = kMaxFileDescriptorsToStat;
   }
-  ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-      "Listing up to %d file descriptors.", fd_lim);
+  ICING_LOG(INFO) << "Listing up to " << fd_lim << " file descriptors.";
 
   // Verify that /proc/self/fd is a directory. If not, procfs is not mounted or
   // inaccessible for some other reason. In that case, there's no point trying
@@ -96,15 +94,12 @@ void LogOpenFileDescriptors() {
     if (len >= 0) {
       // Zero-terminate the buffer, because readlink() won't.
       target[len < target_size ? len : target_size - 1] = '\0';
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("fd %d -> \"%s\"", fd,
-                                                        target);
+      ICING_LOG(INFO) << "fd " << fd << " -> \"" << target << "\"";
     } else if (errno != ENOENT) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("fd %d -> ? (errno=%d)",
-                                                        fd, errno);
+      ICING_LOG(ERROR) << "fd " << fd << " -> ? (errno=" << errno << ")";
     }
   }
-  ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-      "File descriptor list complete.");
+  ICING_LOG(INFO) << "File descriptor list complete.";
 }
 
 // Logs an error formatted as: desc1 + file_name + desc2 + strerror(errnum).
@@ -113,8 +108,11 @@ void LogOpenFileDescriptors() {
 // file descriptors (see LogOpenFileDescriptors() above).
 void LogOpenError(const char* desc1, const char* file_name, const char* desc2,
                   int errnum) {
-  ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-      "%s%s%s%s", desc1, file_name, desc2, strerror(errnum));
+  if (errnum == ENOENT) {
+    ICING_VLOG(1) << desc1 << file_name << desc2 << strerror(errnum);
+  } else {
+    ICING_LOG(ERROR) << desc1 << file_name << desc2 << strerror(errnum);  
+  }
   if (errnum == EMFILE) {
     LogOpenFileDescriptors();
   }
@@ -133,6 +131,9 @@ bool ListDirectoryInternal(const char* dir_name,
     return false;
   }
 
+  // According to linux man page
+  // (https://man7.org/linux/man-pages/man3/readdir.3.html#RETURN_VALUE), dirent
+  // may be statically allocated, so don't free it.
   dirent* p;
   // readdir's implementation seems to be thread safe.
   while ((p = readdir(dir)) != nullptr) {
@@ -155,8 +156,7 @@ bool ListDirectoryInternal(const char* dir_name,
     }
   }
   if (closedir(dir) != 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Error closing %s: %s", dir_name, strerror(errno));
+    ICING_LOG(ERROR) << "Error closing " << dir_name << " " << strerror(errno);
   }
   return true;
 }
@@ -179,11 +179,10 @@ void ScopedFd::reset(int fd) {
 const int64_t Filesystem::kBadFileSize;
 
 bool Filesystem::DeleteFile(const char* file_name) const {
-  ICING_VLOG(1) << IcingStringUtil::StringPrintf("Deleting file %s", file_name);
+  ICING_VLOG(1) << "Deleting file " << file_name;
   int ret = unlink(file_name);
   if (ret != 0 && errno != ENOENT) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Deleting file %s failed: %s", file_name, strerror(errno));
+    ICING_LOG(ERROR) << "Deleting file " << file_name << " failed: " << strerror(errno);
     return false;
   }
   return true;
@@ -192,8 +191,7 @@ bool Filesystem::DeleteFile(const char* file_name) const {
 bool Filesystem::DeleteDirectory(const char* dir_name) const {
   int ret = rmdir(dir_name);
   if (ret != 0 && errno != ENOENT) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Deleting directory %s failed: %s", dir_name, strerror(errno));
+    ICING_LOG(ERROR) << "Deleting directory " << dir_name << " failed: " << strerror(errno);
     return false;
   }
   return true;
@@ -206,8 +204,7 @@ bool Filesystem::DeleteDirectoryRecursively(const char* dir_name) const {
     if (errno == ENOENT) {
       return true;  // If directory didn't exist, this was successful.
     }
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Stat %s failed: %s", dir_name, strerror(errno));
+    ICING_LOG(ERROR) << "Stat " << dir_name << " failed: " << strerror(errno);
     return false;
   }
   vector<std::string> entries;
@@ -220,8 +217,7 @@ bool Filesystem::DeleteDirectoryRecursively(const char* dir_name) const {
        ++i) {
     std::string filename = std::string(dir_name) + '/' + *i;
     if (stat(filename.c_str(), &st) < 0) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-          "Stat %s failed: %s", filename.c_str(), strerror(errno));
+      ICING_LOG(ERROR) << "Stat " << filename << " failed: " << strerror(errno);
       success = false;
     } else if (S_ISDIR(st.st_mode)) {
       success = DeleteDirectoryRecursively(filename.c_str()) && success;
@@ -244,8 +240,7 @@ bool Filesystem::FileExists(const char* file_name) const {
     exists = S_ISREG(st.st_mode) != 0;
   } else {
     if (errno != ENOENT) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-          "Unable to stat file %s: %s", file_name, strerror(errno));
+      ICING_LOG(ERROR) << "Unable to stat file " << file_name << ": " << strerror(errno);
     }
     exists = false;
   }
@@ -259,8 +254,7 @@ bool Filesystem::DirectoryExists(const char* dir_name) const {
     exists = S_ISDIR(st.st_mode) != 0;
   } else {
     if (errno != ENOENT) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-          "Unable to stat directory %s: %s", dir_name, strerror(errno));
+      ICING_LOG(ERROR) << "Unable to stat directory " << dir_name << ": " << strerror(errno);
     }
     exists = false;
   }
@@ -316,8 +310,7 @@ bool Filesystem::GetMatchingFiles(const char* glob,
   int basename_idx = GetBasenameIndex(glob);
   if (basename_idx == 0) {
     // We need a directory.
-    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-        "Expected directory, no matching files for: %s", glob);
+    ICING_VLOG(1) << "Expected directory, no matching files for: " << glob;
     return true;
   }
   const char* basename_glob = glob + basename_idx;
@@ -372,8 +365,11 @@ int Filesystem::OpenForRead(const char* file_name) const {
 int64_t Filesystem::GetFileSize(int fd) const {
   struct stat st;
   if (fstat(fd, &st) < 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat file: %s",
-                                                      strerror(errno));
+    if (errno == ENOENT) {
+      ICING_VLOG(1) << "Unable to stat file: " << strerror(errno);
+    } else {
+      ICING_LOG(WARNING) << "Unable to stat file: " << strerror(errno);
+    }
     return kBadFileSize;
   }
   return st.st_size;
@@ -383,11 +379,9 @@ int64_t Filesystem::GetFileSize(const char* filename) const {
   struct stat st;
   if (stat(filename, &st) < 0) {
     if (errno == ENOENT) {
-      ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-          "Unable to stat file %s: %s", filename, strerror(errno));
+      ICING_VLOG(1) << "Unable to stat file " << filename << ": " << strerror(errno);
     } else {
-      ICING_LOG(WARNING) << IcingStringUtil::StringPrintf(
-          "Unable to stat file %s: %s", filename, strerror(errno));
+      ICING_LOG(WARNING) << "Unable to stat file " << filename << ": " << strerror(errno);
     }
     return kBadFileSize;
   }
@@ -396,8 +390,7 @@ int64_t Filesystem::GetFileSize(const char* filename) const {
 
 bool Filesystem::Truncate(int fd, int64_t new_size) const {
   if (ftruncate(fd, new_size) != 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Unable to truncate file: %s", strerror(errno));
+    ICING_LOG(ERROR) << "Unable to truncate file: " << strerror(errno);
     return false;
   }
   lseek(fd, new_size, SEEK_SET);
@@ -416,8 +409,7 @@ bool Filesystem::Truncate(const char* filename, int64_t new_size) const {
 
 bool Filesystem::Grow(int fd, int64_t new_size) const {
   if (ftruncate(fd, new_size) != 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to grow file: %s",
-                                                      strerror(errno));
+    ICING_LOG(ERROR) << "Unable to grow file: " << strerror(errno);
     return false;
   }
 
@@ -442,8 +434,7 @@ bool Filesystem::Write(int fd, const void* data, size_t data_size) const {
     size_t chunk_size = std::min<size_t>(write_len, 64u * 1024);
     ssize_t wrote = write(fd, data, chunk_size);
     if (wrote < 0) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Bad write: %s",
-                                                        strerror(errno));
+      ICING_LOG(ERROR) << "Bad write: " << strerror(errno);
       return false;
     }
     data = static_cast<const uint8_t*>(data) + wrote;
@@ -464,6 +455,68 @@ bool Filesystem::Write(const char* filename, const void* data,
   return success;
 }
 
+bool Filesystem::CopyFile(const char* src, const char* dst) const {
+  ScopedFd src_fd(OpenForRead(src));
+
+  std::string dir = GetDirname(dst);
+  if (!CreateDirectoryRecursively(dir.c_str())) {
+    return false;
+  }
+  ScopedFd dst_fd(OpenForWrite(dst));
+
+  if (!src_fd.is_valid() || !dst_fd.is_valid()) {
+    return false;
+  }
+  uint64_t size = GetFileSize(*src_fd);
+  std::unique_ptr<uint8_t[]> buf = std::make_unique<uint8_t[]>(size);
+  if (!Read(*src_fd, buf.get(), size)) {
+    return false;
+  }
+  return Write(*dst_fd, buf.get(), size);
+}
+
+bool Filesystem::CopyDirectory(const char* src_dir, const char* dst_dir,
+                               bool recursive) const {
+  DIR* dir = opendir(src_dir);
+  if (!dir) {
+    LogOpenError("Unable to open directory ", src_dir, ": ", errno);
+    return false;
+  }
+
+  dirent* p;
+  // readdir's implementation seems to be thread safe.
+  while ((p = readdir(dir)) != nullptr) {
+    std::string file_name(p->d_name);
+    if (file_name == "." || file_name == "..") {
+      continue;
+    }
+
+    std::string full_src_path = absl_ports::StrCat(src_dir, "/", p->d_name);
+    std::string full_dst_path = absl_ports::StrCat(dst_dir, "/", p->d_name);
+
+    // Directories are copied when writing a non-directory file, so no
+    // explicit copying of a directory is required.
+    if (p->d_type != DT_DIR) {
+      if (!CopyFile(full_src_path.c_str(), full_dst_path.c_str())) {
+        return false;
+      }
+    }
+
+    // Recurse down directories, if requested.
+    if (recursive && (p->d_type == DT_DIR)) {
+      std::string src_sub_dir = absl_ports::StrCat(src_dir, "/", p->d_name);
+      std::string dst_sub_dir = absl_ports::StrCat(dst_dir, "/", p->d_name);
+      if (!CopyDirectory(src_sub_dir.c_str(), dst_sub_dir.c_str(), recursive)) {
+        return false;
+      }
+    }
+  }
+  if (closedir(dir) != 0) {
+    ICING_LOG(ERROR) << "Error closing " << src_dir << ": " << strerror(errno);
+  }
+  return true;
+}
+
 bool Filesystem::PWrite(int fd, off_t offset, const void* data,
                         size_t data_size) const {
   size_t write_len = data_size;
@@ -472,8 +525,7 @@ bool Filesystem::PWrite(int fd, off_t offset, const void* data,
     size_t chunk_size = std::min<size_t>(write_len, 64u * 1024);
     ssize_t wrote = pwrite(fd, data, chunk_size, offset);
     if (wrote < 0) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Bad write: %s",
-                                                        strerror(errno));
+      ICING_LOG(ERROR) << "Bad write: " << strerror(errno);
       return false;
     }
     data = static_cast<const uint8_t*>(data) + wrote;
@@ -498,8 +550,7 @@ bool Filesystem::PWrite(const char* filename, off_t offset, const void* data,
 bool Filesystem::Read(int fd, void* buf, size_t buf_size) const {
   ssize_t read_status = read(fd, buf, buf_size);
   if (read_status < 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Bad read: %s",
-                                                      strerror(errno));
+    ICING_LOG(ERROR) << "Bad read: " << strerror(errno);
     return false;
   }
   return true;
@@ -519,8 +570,7 @@ bool Filesystem::Read(const char* filename, void* buf, size_t buf_size) const {
 bool Filesystem::PRead(int fd, void* buf, size_t buf_size, off_t offset) const {
   ssize_t read_status = pread(fd, buf, buf_size, offset);
   if (read_status < 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Bad read: %s",
-                                                      strerror(errno));
+    ICING_LOG(ERROR) << "Bad read: " << strerror(errno);
     return false;
   }
   return true;
@@ -546,8 +596,7 @@ bool Filesystem::DataSync(int fd) const {
 #endif
 
   if (result < 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to sync data: %s",
-                                                      strerror(errno));
+    ICING_LOG(ERROR) << "Unable to sync data: " << strerror(errno);
     return false;
   }
   return true;
@@ -555,9 +604,7 @@ bool Filesystem::DataSync(int fd) const {
 
 bool Filesystem::RenameFile(const char* old_name, const char* new_name) const {
   if (rename(old_name, new_name) < 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Unable to rename file %s to %s: %s", old_name, new_name,
-        strerror(errno));
+    ICING_LOG(ERROR) << "Unable to rename file " << old_name << " to " << new_name << ": " << strerror(errno);
     return false;
   }
   return true;
@@ -595,8 +642,7 @@ bool Filesystem::CreateDirectory(const char* dir_name) const {
     if (mkdir(dir_name, S_IRUSR | S_IWUSR | S_IXUSR) == 0) {
       success = true;
     } else {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-          "Creating directory %s failed: %s", dir_name, strerror(errno));
+      ICING_LOG(ERROR) << "Creating directory " << dir_name << " failed: " << strerror(errno);
     }
   }
   return success;
@@ -616,8 +662,7 @@ bool Filesystem::CreateDirectoryRecursively(const char* dir_name) const {
 int64_t Filesystem::GetDiskUsage(int fd) const {
   struct stat st;
   if (fstat(fd, &st) < 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat file: %s",
-                                                      strerror(errno));
+    ICING_LOG(ERROR) << "Unable to stat file: " << strerror(errno);
     return kBadFileSize;
   }
   return st.st_blocks * kStatBlockSize;
@@ -626,8 +671,7 @@ int64_t Filesystem::GetDiskUsage(int fd) const {
 int64_t Filesystem::GetFileDiskUsage(const char* path) const {
   struct stat st;
   if (stat(path, &st) != 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat %s: %s",
-                                                      path, strerror(errno));
+    ICING_LOG(ERROR) << "Unable to stat " << path << ": " << strerror(errno);
     return kBadFileSize;
   }
   return st.st_blocks * kStatBlockSize;
@@ -636,8 +680,7 @@ int64_t Filesystem::GetFileDiskUsage(const char* path) const {
 int64_t Filesystem::GetDiskUsage(const char* path) const {
   struct stat st;
   if (stat(path, &st) != 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat %s: %s",
-                                                      path, strerror(errno));
+    ICING_LOG(ERROR) << "Unable to stat " << path << ": " << strerror(errno);
     return kBadFileSize;
   }
   int64_t result = st.st_blocks * kStatBlockSize;
diff --git a/icing/file/filesystem.h b/icing/file/filesystem.h
index b85f3a0..dd2c5d1 100644
--- a/icing/file/filesystem.h
+++ b/icing/file/filesystem.h
@@ -17,11 +17,9 @@
 #ifndef ICING_FILE_FILESYSTEM_H_
 #define ICING_FILE_FILESYSTEM_H_
 
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-
 #include <cstdint>
+#include <cstdio>
+#include <cstring>
 #include <memory>
 #include <string>
 #include <unordered_set>
@@ -83,8 +81,15 @@ class Filesystem {
   // success or if the directory did not yet exist.
   virtual bool DeleteDirectoryRecursively(const char* dir_name) const;
 
+  // Copies the src file to the dst file.
+  virtual bool CopyFile(const char* src, const char* dst) const;
+
+  // Copies the src directory and its contents to the dst dir.
+  virtual bool CopyDirectory(const char* src_dir, const char* dst_dir,
+                             bool recursive) const;
+
   // Returns true if a file exists.  False if the file doesn't exist.
-  // If there is an error getting stat on the file, it logs the error and //
+  // If there is an error getting stat on the file, it logs the error and
   // asserts.
   virtual bool FileExists(const char* file_name) const;
 
@@ -228,6 +233,11 @@ class Filesystem {
   // Increments to_increment by size if size is valid, or sets to_increment
   // to kBadFileSize if either size or to_increment is kBadFileSize.
   static void IncrementByOrSetInvalid(int64_t size, int64_t* to_increment);
+
+  // Return -1 if file_size is invalid. Otherwise, return file_size.
+  static int64_t SanitizeFileSize(int64_t file_size) {
+    return (file_size != kBadFileSize) ? file_size : -1;
+  }
 };
 // LINT.ThenChange(//depot/google3/icing/file/mock-filesystem.h)
 
diff --git a/icing/file/filesystem_test.cc b/icing/file/filesystem_test.cc
index 492a50d..214180e 100644
--- a/icing/file/filesystem_test.cc
+++ b/icing/file/filesystem_test.cc
@@ -38,6 +38,7 @@ using ::testing::Gt;
 using ::testing::Le;
 using ::testing::Ne;
 using ::testing::UnorderedElementsAre;
+using ::testing::UnorderedElementsAreArray;
 
 namespace icing {
 namespace lib {
@@ -450,5 +451,47 @@ TEST_F(FilesystemTest, ReadWrite) {
   EXPECT_THAT(hello, Eq("hello"));
 }
 
+TEST_F(FilesystemTest, CopyDirectory) {
+  Filesystem filesystem;
+
+  // File structure:
+  // <temp_dir>/
+  //   src_dir/
+  //     file1
+  //     file2
+  //     sub_dir/
+  //       file3
+  const std::string src_dir = temp_dir_ + "/src_dir";
+  const std::string sub_dir = "sub_dir";
+  const std::string sub_dir_path = src_dir + "/" + sub_dir;
+  vector<std::string> some_files = {"file1", "file2", sub_dir + "/file3"};
+
+  // Make sure there is no pre-existing test-dir structure
+  ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(src_dir.c_str()));
+
+  // Setup a test-dir structure
+  ASSERT_TRUE(filesystem.CreateDirectoryRecursively(
+      sub_dir_path.c_str()));  // deepest path for test
+  CreateTestFiles(some_files, src_dir);
+
+  const std::string dst_dir = temp_dir_ + "/dst_dir";
+  EXPECT_TRUE(filesystem.CopyDirectory(src_dir.c_str(), dst_dir.c_str(),
+                                       /*recursive=*/true));
+
+  vector<std::string> src_dir_files;
+  EXPECT_TRUE(filesystem.ListDirectory(src_dir.c_str(), /*exclude=*/{},
+                                       /*recursive=*/true, &src_dir_files));
+
+  vector<std::string> dst_dir_files;
+  EXPECT_TRUE(filesystem.ListDirectory(dst_dir.c_str(), /*exclude=*/{},
+                                       /*recursive=*/true, &dst_dir_files));
+
+  EXPECT_THAT(dst_dir_files, UnorderedElementsAreArray(src_dir_files));
+
+  // Clean up
+  ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(src_dir.c_str()));
+  ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(dst_dir.c_str()));
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/file/memory-mapped-file-leak_test.cc b/icing/file/memory-mapped-file-leak_test.cc
deleted file mode 100644
index 598fb61..0000000
--- a/icing/file/memory-mapped-file-leak_test.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/file/memory-mapped-file.h"
-
-#include "perftools/profiles/collector/heap/alloc_recorder.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "icing/file/filesystem.h"
-#include "icing/testing/common-matchers.h"
-#include "icing/testing/recorder-test-utils.h"
-#include "icing/testing/tmp-directory.h"
-
-namespace icing {
-namespace lib {
-namespace {
-
-namespace heap_profile = ::perftools::profiles::collector::heap;
-
-using testing::Le;
-
-TEST(MemoryMappedFileTest, MMapMemoryLeak) {
-  std::string test_dir = GetTestTempDir();
-  std::string recorder_dir = test_dir + "/recorder";
-  Filesystem filesystem;
-  ASSERT_TRUE(filesystem.CreateDirectoryRecursively(recorder_dir.c_str()));
-
-  ASSERT_TRUE(heap_profile::AllocRecorderStartWithMmapTracking(recorder_dir));
-  {
-    std::string mmfile_dir = test_dir + "/file";
-    ASSERT_TRUE(filesystem.CreateDirectoryRecursively(mmfile_dir.c_str()));
-    MemoryMappedFile mmfile(filesystem, mmfile_dir + "/mmfile",
-                            MemoryMappedFile::READ_WRITE_AUTO_SYNC);
-    // How this works:
-    // We request a 500-byte mapping starting at the 101st byte of the file.
-    // But(!), mmap only accepts offsets that are multiples of page size. So
-    // instead mmfile will create a 600-byte mapping starting at the 1st byte of
-    // file and then return the address of the 101st byte within that mapping.
-    // For this reason, total bytes and peak bytes will be 600 bytes.
-    //
-    // When mmfile goes out of scope it needs to munmap the mapping that it
-    // created. But, remember that the mapping is larger (600 bytes) than what
-    // we requested (500 bytes)! So mmfile needs to remember the actual size of
-    // the mapping, NOT the requested size. Calling munmap with the correct size
-    // will ensure that total_inuse_bytes is 0 after mmfile goes out of scope.
-    // Calling munmap with the requested size would still keep 100 bytes of the
-    // mapping around!
-    mmfile.Remap(100, 500);
-  }
-  heap_profile::AllocRecorderStop();
-
-  // Mmap only affects bytes measurements.
-  ProfileInfo profile_info = SummarizeProfileProto(recorder_dir + ".0.pb.gz");
-  EXPECT_THAT(profile_info.total_alloc_bytes, Le(600));
-  EXPECT_THAT(profile_info.peak_bytes, Le(600));
-  EXPECT_THAT(profile_info.inuse_bytes, Le(0));
-}
-
-}  // namespace
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/file/memory-mapped-file.cc b/icing/file/memory-mapped-file.cc
index 34365a9..43ed030 100644
--- a/icing/file/memory-mapped-file.cc
+++ b/icing/file/memory-mapped-file.cc
@@ -12,108 +12,156 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// TODO(cassiewang) Add unit-tests to this class.
-
 #include "icing/file/memory-mapped-file.h"
 
 #include <sys/mman.h>
 
 #include <cerrno>
+#include <cinttypes>
+#include <memory>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/absl_ports/str_cat.h"
 #include "icing/file/filesystem.h"
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/util/math-util.h"
+#include "icing/util/status-macros.h"
 
 namespace icing {
 namespace lib {
 
+/* static */ libtextclassifier3::StatusOr<MemoryMappedFile>
+MemoryMappedFile::Create(const Filesystem& filesystem,
+                         std::string_view file_path, Strategy mmap_strategy,
+                         int64_t max_file_size) {
+  if (max_file_size <= 0 || max_file_size > kMaxFileSize) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Invalid max file size %" PRId64 " for MemoryMappedFile",
+        max_file_size));
+  }
+
+  const std::string file_path_str(file_path);
+  int64_t file_size = filesystem.FileExists(file_path_str.c_str())
+                          ? filesystem.GetFileSize(file_path_str.c_str())
+                          : 0;
+  if (file_size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Bad file size for file ", file_path));
+  }
+
+  return MemoryMappedFile(filesystem, file_path, mmap_strategy, max_file_size,
+                          file_size);
+}
+
+/* static */ libtextclassifier3::StatusOr<MemoryMappedFile>
+MemoryMappedFile::Create(const Filesystem& filesystem,
+                         std::string_view file_path, Strategy mmap_strategy,
+                         int64_t max_file_size, int64_t pre_mapping_file_offset,
+                         int64_t pre_mapping_mmap_size) {
+  if (max_file_size <= 0 || max_file_size > kMaxFileSize) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Invalid max file size %" PRId64 " for MemoryMappedFile",
+        max_file_size));
+  }
+
+  // We need at least pre_mapping_file_offset + pre_mapping_mmap_size bytes for
+  // the underlying file size, so max_file_size should be at least
+  // pre_mapping_file_offset + pre_mapping_mmap_size. Safe integer check.
+  if (pre_mapping_file_offset < 0 || pre_mapping_mmap_size < 0 ||
+      pre_mapping_file_offset > max_file_size - pre_mapping_mmap_size) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Invalid pre-mapping file offset %" PRId64 " and mmap size %" PRId64
+        " with max file size %" PRId64 "for MemoryMappedFile",
+        pre_mapping_file_offset, pre_mapping_mmap_size, max_file_size));
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile mmapped_file,
+      Create(filesystem, file_path, mmap_strategy, max_file_size));
+
+  if (pre_mapping_mmap_size > 0) {
+    ICING_RETURN_IF_ERROR(
+        mmapped_file.RemapImpl(pre_mapping_file_offset, pre_mapping_mmap_size));
+  }
+
+  return std::move(mmapped_file);
+}
+
 MemoryMappedFile::MemoryMappedFile(const Filesystem& filesystem,
-                                   const std::string_view file_path,
-                                   Strategy mmap_strategy)
+                                   std::string_view file_path,
+                                   Strategy mmap_strategy,
+                                   int64_t max_file_size, int64_t file_size)
     : filesystem_(&filesystem),
       file_path_(file_path),
-      strategy_(mmap_strategy) {}
+      strategy_(mmap_strategy),
+      max_file_size_(max_file_size),
+      file_size_(file_size),
+      mmap_result_(nullptr),
+      file_offset_(0),
+      mmap_size_(0),
+      alignment_adjustment_(0) {}
+
+MemoryMappedFile::MemoryMappedFile(MemoryMappedFile&& other)
+    // Make sure that mmap_result_ is a nullptr before we call Swap. We don't
+    // care what values the remaining members hold before we swap into other,
+    // but if mmap_result_ holds a non-NULL value before we initialized anything
+    // then other will try to free memory at that address when it's destroyed!
+    : mmap_result_(nullptr) {
+  Swap(&other);
+}
+
+MemoryMappedFile& MemoryMappedFile::operator=(MemoryMappedFile&& other) {
+  // Swap all of our elements with other. This will ensure that both this now
+  // holds other's previous resources and that this's previous resources will be
+  // properly freed when other is destructed at the end of this function.
+  Swap(&other);
+  return *this;
+}
 
 MemoryMappedFile::~MemoryMappedFile() { Unmap(); }
 
 void MemoryMappedFile::MemoryMappedFile::Unmap() {
   if (mmap_result_ != nullptr) {
-    munmap(mmap_result_, adjusted_mmap_size_);
+    munmap(mmap_result_, adjusted_mmap_size());
     mmap_result_ = nullptr;
   }
 
   file_offset_ = 0;
-  region_ = nullptr;
-  region_size_ = 0;
-  adjusted_mmap_size_ = 0;
+  mmap_size_ = 0;
+  alignment_adjustment_ = 0;
 }
 
-libtextclassifier3::Status MemoryMappedFile::Remap(size_t file_offset,
-                                                   size_t mmap_size) {
-  // First unmap any previously mmapped region.
-  Unmap();
+libtextclassifier3::Status MemoryMappedFile::Remap(int64_t file_offset,
+                                                   int64_t mmap_size) {
+  return RemapImpl(file_offset, mmap_size);
+}
 
-  if (mmap_size == 0) {
-    // Nothing more to do.
-    return libtextclassifier3::Status::OK;
+libtextclassifier3::Status MemoryMappedFile::GrowAndRemapIfNecessary(
+    int64_t new_file_offset, int64_t new_mmap_size) {
+  // We need at least new_file_offset + new_mmap_size bytes for the underlying
+  // file size, and it should not exceed max_file_size_. Safe integer check.
+  if (new_file_offset < 0 || new_mmap_size < 0 ||
+      new_file_offset > max_file_size_ - new_mmap_size) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Invalid new file offset %" PRId64 " and new mmap size %" PRId64
+        " with max file size %" PRId64 "for MemoryMappedFile",
+        new_file_offset, new_mmap_size, max_file_size_));
   }
 
-  size_t aligned_offset =
-      math_util::RoundDownTo(file_offset, system_page_size());
-  size_t alignment_adjustment = file_offset - aligned_offset;
-  size_t adjusted_mmap_size = alignment_adjustment + mmap_size;
-
-  int mmap_flags = 0;
-  // Determines if the mapped region should just be readable or also writable.
-  int protection_flags = 0;
-  ScopedFd fd;
-  switch (strategy_) {
-    case Strategy::READ_ONLY: {
-      mmap_flags = MAP_PRIVATE;
-      protection_flags = PROT_READ;
-      fd.reset(filesystem_->OpenForRead(file_path_.c_str()));
-      break;
-    }
-    case Strategy::READ_WRITE_AUTO_SYNC: {
-      mmap_flags = MAP_SHARED;
-      protection_flags = PROT_READ | PROT_WRITE;
-      fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
-      break;
-    }
-    case Strategy::READ_WRITE_MANUAL_SYNC: {
-      mmap_flags = MAP_PRIVATE;
-      protection_flags = PROT_READ | PROT_WRITE;
-      // TODO(cassiewang) MAP_PRIVATE effectively makes it a read-only file.
-      // figure out if we can open this file in read-only mode.
-      fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
-      break;
-    }
-    default:
-      return absl_ports::UnknownError(IcingStringUtil::StringPrintf(
-          "Invalid value in switch statement: %d", strategy_));
-  }
-
-  if (!fd.is_valid()) {
-    return absl_ports::InternalError(absl_ports::StrCat(
-        "Unable to open file meant to be mmapped: ", file_path_));
+  if (new_mmap_size == 0) {
+    // Unmap any previously mmapped region.
+    Unmap();
+    return libtextclassifier3::Status::OK;
   }
 
-  mmap_result_ = mmap(nullptr, adjusted_mmap_size, protection_flags, mmap_flags,
-                      fd.get(), aligned_offset);
+  ICING_RETURN_IF_ERROR(GrowFileSize(new_file_offset + new_mmap_size));
 
-  if (mmap_result_ == MAP_FAILED) {
-    mmap_result_ = nullptr;
-    return absl_ports::InternalError(absl_ports::StrCat(
-        "Failed to mmap region due to error: ", strerror(errno)));
+  if (new_file_offset != file_offset_ || new_mmap_size > mmap_size_) {
+    ICING_RETURN_IF_ERROR(RemapImpl(new_file_offset, new_mmap_size));
   }
 
-  file_offset_ = file_offset;
-  region_ = reinterpret_cast<char*>(mmap_result_) + alignment_adjustment;
-  region_size_ = mmap_size;
-  adjusted_mmap_size_ = adjusted_mmap_size;
   return libtextclassifier3::Status::OK;
 }
 
@@ -123,13 +171,27 @@ libtextclassifier3::Status MemoryMappedFile::PersistToDisk() {
         "Attempting to PersistToDisk on a read-only file: ", file_path_));
   }
 
-  if (region_ == nullptr) {
+  if (mmap_result_ == nullptr) {
     // Nothing mapped to sync.
     return libtextclassifier3::Status::OK;
   }
 
+  // Sync actual file size via system call.
+  int64_t actual_file_size = filesystem_->GetFileSize(file_path_.c_str());
+  if (actual_file_size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError("Unable to retrieve file size");
+  }
+  file_size_ = actual_file_size;
+
   if (strategy_ == Strategy::READ_WRITE_AUTO_SYNC &&
-      msync(mmap_result_, adjusted_mmap_size_, MS_SYNC) != 0) {
+      // adjusted_mmap_size(), which is the mmap size after alignment
+      // adjustment, may be larger than the actual underlying file size since we
+      // can pre-mmap a large memory region before growing the file. Therefore,
+      // we should std::min with file_size_ - adjusted_offset() as the msync
+      // size.
+      msync(mmap_result_,
+            std::min(file_size_ - adjusted_offset(), adjusted_mmap_size()),
+            MS_SYNC) != 0) {
     return absl_ports::InternalError(
         absl_ports::StrCat("Unable to sync file using msync(): ", file_path_));
   }
@@ -139,7 +201,13 @@ libtextclassifier3::Status MemoryMappedFile::PersistToDisk() {
   // can't be synced using msync(). So, we have to directly write to the
   // underlying file to update it.
   if (strategy_ == Strategy::READ_WRITE_MANUAL_SYNC &&
-      !filesystem_->PWrite(file_path_.c_str(), 0, region(), region_size())) {
+      // Contents before file_offset_ won't be modified by the caller, so we
+      // only need to PWrite contents starting at file_offset_. mmap_size_ may
+      // be larger than the actual underlying file size since we can pre-mmap a
+      // large memory before growing the file. Therefore, we should std::min
+      // with file_size_ - file_offset_ as the PWrite size.
+      !filesystem_->PWrite(file_path_.c_str(), file_offset_, region(),
+                           std::min(mmap_size_, file_size_ - file_offset_))) {
     return absl_ports::InternalError(
         absl_ports::StrCat("Unable to sync file using PWrite(): ", file_path_));
   }
@@ -160,12 +228,162 @@ libtextclassifier3::Status MemoryMappedFile::OptimizeFor(
     madvise_flag = MADV_SEQUENTIAL;
   }
 
-  if (madvise(mmap_result_, adjusted_mmap_size_, madvise_flag) != 0) {
+  if (madvise(mmap_result_, adjusted_mmap_size(), madvise_flag) != 0) {
     return absl_ports::InternalError(absl_ports::StrCat(
         "Unable to madvise file ", file_path_, "; Error: ", strerror(errno)));
   }
   return libtextclassifier3::Status::OK;
 }
 
+libtextclassifier3::Status MemoryMappedFile::GrowFileSize(
+    int64_t new_file_size) {
+  // Early return if new_file_size doesn't exceed the cached file size
+  // (file_size_). It saves a system call for getting the actual file size and
+  // reduces latency significantly.
+  if (new_file_size <= file_size_) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  if (new_file_size > max_file_size_) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "new file size %" PRId64 " exceeds maximum file size allowed, %" PRId64
+        " bytes",
+        new_file_size, max_file_size_));
+  }
+
+  // Sync actual file size via system call.
+  int64_t actual_file_size = filesystem_->GetFileSize(file_path_.c_str());
+  if (actual_file_size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError("Unable to retrieve file size");
+  }
+  file_size_ = actual_file_size;
+
+  // Early return again if new_file_size doesn't exceed actual_file_size. It
+  // saves system calls for opening and closing file descriptor.
+  if (new_file_size <= actual_file_size) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  if (strategy_ == Strategy::READ_ONLY) {
+    return absl_ports::FailedPreconditionError(absl_ports::StrCat(
+        "Attempting to grow a read-only file: ", file_path_));
+  }
+
+  // We use Write here rather than Grow because Grow doesn't actually allocate
+  // an underlying disk block. This can lead to problems with mmap because mmap
+  // has no effective way to signal that it was impossible to allocate the disk
+  // block and ends up crashing instead. Write will force the allocation of
+  // these blocks, which will ensure that any failure to grow will surface here.
+  int64_t page_size = system_page_size();
+  auto buf = std::make_unique<uint8_t[]>(page_size);
+  int64_t size_to_write = std::min(page_size - (file_size_ % page_size),
+                                   new_file_size - file_size_);
+  ScopedFd sfd(filesystem_->OpenForAppend(file_path_.c_str()));
+  if (!sfd.is_valid()) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Couldn't open file ", file_path_));
+  }
+  while (size_to_write > 0 && file_size_ < new_file_size) {
+    if (!filesystem_->Write(sfd.get(), buf.get(), size_to_write)) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Couldn't grow file ", file_path_));
+    }
+    file_size_ += size_to_write;
+    size_to_write = std::min(page_size - (file_size_ % page_size),
+                             new_file_size - file_size_);
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status MemoryMappedFile::RemapImpl(int64_t new_file_offset,
+                                                       int64_t new_mmap_size) {
+  if (new_file_offset < 0) {
+    return absl_ports::OutOfRangeError("Invalid file offset");
+  }
+
+  if (new_mmap_size < 0) {
+    return absl_ports::OutOfRangeError("Invalid mmap size");
+  }
+
+  if (new_mmap_size == 0) {
+    // First unmap any previously mmapped region.
+    Unmap();
+    return libtextclassifier3::Status::OK;
+  }
+
+  int64_t new_aligned_offset =
+      math_util::RoundDownTo(new_file_offset, system_page_size());
+  int64_t new_alignment_adjustment = new_file_offset - new_aligned_offset;
+  int64_t new_adjusted_mmap_size = new_alignment_adjustment + new_mmap_size;
+
+  int mmap_flags = 0;
+  // Determines if the mapped region should just be readable or also writable.
+  int protection_flags = 0;
+  ScopedFd fd;
+  switch (strategy_) {
+    case Strategy::READ_ONLY: {
+      mmap_flags = MAP_PRIVATE;
+      protection_flags = PROT_READ;
+      fd.reset(filesystem_->OpenForRead(file_path_.c_str()));
+      break;
+    }
+    case Strategy::READ_WRITE_AUTO_SYNC: {
+      mmap_flags = MAP_SHARED;
+      protection_flags = PROT_READ | PROT_WRITE;
+      fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
+      break;
+    }
+    case Strategy::READ_WRITE_MANUAL_SYNC: {
+      mmap_flags = MAP_PRIVATE;
+      protection_flags = PROT_READ | PROT_WRITE;
+      // TODO(cassiewang) MAP_PRIVATE effectively makes it a read-only file.
+      // figure out if we can open this file in read-only mode.
+      fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
+      break;
+    }
+    default:
+      return absl_ports::UnknownError(IcingStringUtil::StringPrintf(
+          "Invalid value in switch statement: %d", strategy_));
+  }
+
+  if (!fd.is_valid()) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Unable to open file meant to be mmapped: ", file_path_));
+  }
+
+  void* new_mmap_result =
+      mmap(nullptr, new_adjusted_mmap_size, protection_flags, mmap_flags,
+           fd.get(), new_aligned_offset);
+
+  if (new_mmap_result == MAP_FAILED) {
+    new_mmap_result = nullptr;
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to mmap region due to error: ", strerror(errno)));
+  }
+
+  // Now we know that we have successfully created a new mapping. We can free
+  // the old one and switch to the new one.
+  Unmap();
+
+  mmap_result_ = new_mmap_result;
+  file_offset_ = new_file_offset;
+  mmap_size_ = new_mmap_size;
+  alignment_adjustment_ = new_alignment_adjustment;
+  return libtextclassifier3::Status::OK;
+}
+
+void MemoryMappedFile::Swap(MemoryMappedFile* other) {
+  std::swap(filesystem_, other->filesystem_);
+  std::swap(file_path_, other->file_path_);
+  std::swap(strategy_, other->strategy_);
+  std::swap(max_file_size_, other->max_file_size_);
+  std::swap(file_size_, other->file_size_);
+  std::swap(mmap_result_, other->mmap_result_);
+  std::swap(file_offset_, other->file_offset_);
+  std::swap(mmap_size_, other->mmap_size_);
+  std::swap(alignment_adjustment_, other->alignment_adjustment_);
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/file/memory-mapped-file.h b/icing/file/memory-mapped-file.h
index 5a52368..54507af 100644
--- a/icing/file/memory-mapped-file.h
+++ b/icing/file/memory-mapped-file.h
@@ -21,18 +21,54 @@
 // faster reads as well as background-sync vs manual-sync of changes to disk.
 // For more details, see comments at MemoryMappedFile::Strategy.
 //
-// Usage:
+// ** Usage 1: pre-mmap large memory and grow the underlying file internally **
 //
-// MemoryMappedFile mmapped_file(filesystem, "/file.pb", READ_WRITE_AUTO_SYNC));
-// mmapped_file->Remap(0, 16* 1024);  // load the first 16K of the file.
+// // Create MemoryMappedFile instance.
+// ICING_ASSIGN_OR_RETURN(
+//     std::unique_ptr<MemoryMappedFile> mmapped_file,
+//     MemoryMappedFile::Create(filesystem, "/file.pb",
+//                              READ_WRITE_AUTO_SYNC,
+//                              max_file_size,
+//                              /*pre_mapping_file_offset=*/0,
+//                              /*pre_mapping_mmap_size=*/1024 * 1024));
 //
+// // Found that we need 4K bytes for the file and mmapped region.
+// mmapped_file->GrowAndRemapIfNecessary(
+//     /*new_file_offset=*/0, /*new_mmap_size=*/4 * 1024);
+// char read_byte = mmapped_file->region()[4000];
+// mmapped_file->mutable_region()[4001] = write_byte;
+//
+// mmapped_file->PersistToDisk(); // Optional; immediately writes changes to
+// disk.
+//
+// // Found that we need 2048 * 1024 bytes for the file and mmapped region.
+// mmapped_file->GrowAndRemapIfNecessary(
+//     /*new_file_offset=*/0, /*new_mmap_size=*/2048 * 1024);
+// mmapped_file->mutable_region()[2000 * 1024] = write_byte;
+// mmapped_file.reset();
+//
+// ** Usage 2: load by segments **
+//
+// ICING_ASSIGN_OR_RETURN(
+//     std::unique_ptr<MemoryMappedFile> mmapped_file,
+//     MemoryMappedFile::Create(filesystem, "/file.pb",
+//                              READ_WRITE_AUTO_SYNC,
+//                              max_file_size,
+//                              /*pre_mapping_file_offset=*/0,
+//                              /*pre_mapping_mmap_size=*/16 * 1024));
+//
+// // load the first 16K.
+// mmapped_file->GrowAndRemapIfNecessary(
+//     /*new_file_offset=*/0, /*new_mmap_size=*/16 * 1024);
 // char read_byte = mmapped_file->region()[100];
 // mmapped_file->mutable_region()[10] = write_byte;
 //
 // mmapped_file->PersistToDisk(); // Optional; immediately writes changes to
 // disk.
 //
-// mmapped_file->Remap(16*1024, 16* 1024);  // load the next 16K.
+// // load the next 16K.
+// mmapped_file->GrowAndRemapIfNecessary(
+//     /*new_file_offset=*/16 * 1024, /*new_mmap_size=*/16 * 1024);
 // mmapped_file->mutable_region()[10] = write_byte;
 // mmapped_file.reset();
 
@@ -41,12 +77,14 @@
 
 #include <unistd.h>
 
+#include <algorithm>
 #include <cstdint>
 #include <memory>
 #include <string>
 #include <string_view>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/file/filesystem.h"
 
 namespace icing {
@@ -54,8 +92,9 @@ namespace lib {
 
 class MemoryMappedFile {
  public:
-  static size_t __attribute__((const)) system_page_size() {
-    static const size_t page_size = sysconf(_SC_PAGE_SIZE);
+  static int64_t __attribute__((const)) system_page_size() {
+    static const int64_t page_size =
+        static_cast<int64_t>(sysconf(_SC_PAGE_SIZE));
     return page_size;
   }
 
@@ -71,23 +110,95 @@ class MemoryMappedFile {
     // Memory map a read-write file into a writable memory region. Changes made
     // to this region will never be auto-synced to the underlying file. Unless
     // the caller explicitly calls PersistToDisk(), all changes will be lost
-    // when the
-    // MemoryMappedFile is destroyed.
+    // when the MemoryMappedFile is destroyed.
     READ_WRITE_MANUAL_SYNC,
   };
 
-  // file_path : Full path of the file that needs to be memory-mapped.
-  MemoryMappedFile(const Filesystem& filesystem, std::string_view file_path,
-                   Strategy mmap_strategy);
+  // Absolute max file size, 16 GiB.
+  static constexpr int64_t kMaxFileSize = INT64_C(1) << 34;
+
+  // Default max file size, 1 MiB.
+  static constexpr int64_t kDefaultMaxFileSize = INT64_C(1) << 20;
+
+  // Creates a new MemoryMappedFile to read/write content to.
+  //
+  // filesystem    : Object to make system level calls
+  // file_path     : Full path of the file that needs to be memory-mapped.
+  // mmap_strategy : Strategy/optimizations to access the content.
+  // max_file_size : Maximum file size for MemoryMappedFile, default
+  //                 kDefaultMaxFileSize.
+  //
+  // Returns:
+  //   A MemoryMappedFile instance on success
+  //   OUT_OF_RANGE_ERROR if max_file_size is invalid
+  //   INTERNAL_ERROR on I/O error
+  static libtextclassifier3::StatusOr<MemoryMappedFile> Create(
+      const Filesystem& filesystem, std::string_view file_path,
+      Strategy mmap_strategy, int64_t max_file_size = kDefaultMaxFileSize);
+
+  // Creates a new MemoryMappedFile to read/write content to. It remaps when
+  // creating the instance, but doesn't check or grow the actual file size, so
+  // the caller should call GrowAndRemapIfNecessary before accessing region.
+  //
+  // filesystem    : Object to make system level calls
+  // file_path     : Full path of the file that needs to be memory-mapped.
+  // mmap_strategy : Strategy/optimizations to access the content.
+  // max_file_size : Maximum file size for MemoryMappedFile.
+  // pre_mapping_file_offset : The offset of the file to be memory mapped.
+  // pre_mapping_mmap_size   : mmap size for pre-mapping.
+  //
+  // Returns:
+  //   A MemoryMappedFile instance on success
+  //   OUT_OF_RANGE_ERROR if max_file_size, file_offset, or mmap_size is invalid
+  //   INTERNAL_ERROR on I/O error
+  static libtextclassifier3::StatusOr<MemoryMappedFile> Create(
+      const Filesystem& filesystem, std::string_view file_path,
+      Strategy mmap_strategy, int64_t max_file_size,
+      int64_t pre_mapping_file_offset, int64_t pre_mapping_mmap_size);
+
+  // Delete copy constructor and assignment operator.
+  MemoryMappedFile(const MemoryMappedFile& other) = delete;
+  MemoryMappedFile& operator=(const MemoryMappedFile& other) = delete;
+
+  MemoryMappedFile(MemoryMappedFile&& other);
+  MemoryMappedFile& operator=(MemoryMappedFile&& other);
 
   // Frees any region that is still memory-mapped region.
   ~MemoryMappedFile();
 
+  // TODO(b/247671531): migrate all callers to use GrowAndRemapIfNecessary and
+  // deprecate this API.
+  //
   // Memory-map the newly specified region within the file specified by
   // file_offset and mmap_size. Unmaps any previously mmapped region.
+  // It doesn't handle the underlying file growth.
   //
   // Returns any encountered IO error.
-  libtextclassifier3::Status Remap(size_t file_offset, size_t mmap_size);
+  libtextclassifier3::Status Remap(int64_t file_offset, int64_t mmap_size);
+
+  // Attempt to memory-map the newly specified region within the file specified
+  // by new_file_offset and new_mmap_size. It handles mmap and file growth
+  // intelligently.
+  // - Compute least file size needed according to new_file_offset and
+  //   new_mmap_size, and compare with the current file size. If requiring file
+  //   growth, then grow the underlying file (Write) or return error if
+  //   strategy_ is READ_ONLY.
+  // - If new_file_offset is different from the current file_offset_ or
+  //   new_mmap_size is greater than the current mmap_size_, then memory-map
+  //   the newly specified region and unmap any previously mmapped region.
+  //
+  // This API is useful for file growth since it grows the underlying file
+  // internally and handles remapping intelligently. By pre-mmapping a large
+  // memory, we only need to grow the underlying file (Write) without remapping
+  // in each round of growth, which significantly reduces the cost of system
+  // call and memory paging after remap.
+  //
+  // Returns:
+  //   OK on success
+  //   OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid
+  //   Any error from GrowFileSize() and RemapImpl()
+  libtextclassifier3::Status GrowAndRemapIfNecessary(int64_t new_file_offset,
+                                                     int64_t new_mmap_size);
 
   // unmap and free-up the region that has currently been memory mapped.
   void Unmap();
@@ -126,32 +237,147 @@ class MemoryMappedFile {
   };
   libtextclassifier3::Status OptimizeFor(AccessPattern access_pattern);
 
+  Strategy strategy() const { return strategy_; }
+
+  int64_t max_file_size() const { return max_file_size_; }
+
   // Accessors to the memory-mapped region. Returns null if nothing is mapped.
-  const char* region() const { return region_; }
-  char* mutable_region() { return region_; }
+  const char* region() const {
+    return reinterpret_cast<const char*>(mmap_result_) + alignment_adjustment_;
+  }
+  char* mutable_region() {
+    return reinterpret_cast<char*>(mmap_result_) + alignment_adjustment_;
+  }
 
-  size_t region_size() const { return region_size_; }
-  Strategy strategy() const { return strategy_; }
+  int64_t file_offset() const { return file_offset_; }
+
+  // TODO(b/247671531): remove this API after migrating all callers to use
+  //                    GrowAndRemapIfNecessary.
+  int64_t region_size() const { return mmap_size_; }
+
+  // The size that is safe for the client to read/write. This is only valid for
+  // callers that use GrowAndRemapIfNecessary.
+  int64_t available_size() const {
+    return std::min(mmap_size_,
+                    std::max(INT64_C(0), file_size_ - file_offset_));
+  }
 
  private:
+  explicit MemoryMappedFile(const Filesystem& filesystem,
+                            std::string_view file_path, Strategy mmap_strategy,
+                            int64_t max_file_size, int64_t file_size);
+
+  // Grow the underlying file to new_file_size.
+  // Note: it is possible that Write() (implemented in the file system call
+  // library) grows the underlying file partially and returns error due to
+  // failures, so the cached file_size_ may contain out-of-date value, but it is
+  // still guaranteed that file_size_ is always smaller or equal to the actual
+  // file size. In the next round of growing:
+  // - If new_file_size is not greater than file_size_, then we're still
+  //   confident that the actual file size is large enough and therefore skip
+  //   the grow process.
+  // - If new_file_size is greater than file_size_, then we will invoke the
+  //   system call to sync the actual file size. At this moment, file_size_ is
+  //   the actual file size and therefore we can grow the underlying file size
+  //   correctly.
+  //
+  // Returns:
+  //   OK on success
+  //   FAILED_PRECONDITION_ERROR if requiring file growth and strategy_ is
+  //                             READ_ONLY
+  //   OUT_OF_RANGE_ERROR if new_mmap_size exceeds max_file_size_
+  //   INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status GrowFileSize(int64_t new_file_size);
+
+  // Memory-map the newly specified region within the file specified by
+  // new_file_offset and new_mmap_size. Unmaps any previously mmapped region.
+  // It doesn't handle the underlying file growth.
+  //
+  // Returns:
+  //   OK on success
+  //   OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid
+  //   INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status RemapImpl(int64_t new_file_offset,
+                                       int64_t new_mmap_size);
+
+  // Swaps the contents of this with other.
+  void Swap(MemoryMappedFile* other);
+
+  int64_t adjusted_offset() const {
+    return file_offset_ - alignment_adjustment_;
+  }
+
+  int64_t adjusted_mmap_size() const {
+    return alignment_adjustment_ + mmap_size_;
+  }
+
   // Cached constructor params.
-  const Filesystem* const filesystem_;
-  const std::string file_path_;
-  const Strategy strategy_;
+  const Filesystem* filesystem_;
+  std::string file_path_;
+  Strategy strategy_;
 
-  // Offset within the file at which the current memory-mapped region starts.
-  size_t file_offset_ = 0;
+  // Raw file related fields:
+  // - max_file_size_
+  // - file_size_
+
+  // Max file size for MemoryMappedFile. It should not exceed the absolute max
+  // size of memory mapped file (kMaxFileSize). It is only used in
+  // GrowAndRemapIfNecessary(), the new API that handles underlying file growth
+  // internally and remaps intelligently.
+  //
+  // Note: max_file_size_ will be specified in runtime and the caller should
+  // make sure its value is correct and reasonable.
+  int64_t max_file_size_;
 
-  // Region that is currently memory-mapped.
-  char* region_ = nullptr;
-  size_t region_size_ = 0;
+  // Cached file size to avoid calling system call too frequently. It is only
+  // used in GrowAndRemapIfNecessary(), the new API that handles underlying file
+  // growth internally and remaps intelligently.
+  //
+  // Note: it is guaranteed that file_size_ is smaller or equal to the actual
+  // file size as long as the underlying file hasn't been truncated or deleted
+  // externally. See GrowFileSize() for more details.
+  int64_t file_size_;
 
-  // The actual size of the region we mmapped. As the requested region might not
-  // align with system pages, we often mmap more bytes than requested.
-  size_t adjusted_mmap_size_ = 0;
+  // Memory mapped related fields:
+  // - mmap_result_
+  // - file_offset_
+  // - alignment_adjustment_
+  // - mmap_size_
 
   // Raw pointer (or error) returned by calls to mmap().
-  void* mmap_result_ = nullptr;
+  void* mmap_result_;
+
+  // Offset within the file at which the current memory-mapped region starts.
+  int64_t file_offset_;
+
+  // Size that is currently memory-mapped.
+  // Note that the mmapped size can be larger than the underlying file size. We
+  // can reduce remapping by pre-mmapping a large memory and grow the file size
+  // later. See GrowAndRemapIfNecessary().
+  int64_t mmap_size_;
+
+  // The difference between file_offset_ and the actual adjusted (aligned)
+  // offset.
+  // Since mmap requires the offset to be a multiple of system page size, we
+  // have to align file_offset_ to the last multiple of system page size.
+  int64_t alignment_adjustment_;
+
+  // E.g. system_page_size = 5, RemapImpl(/*new_file_offset=*/8, mmap_size)
+  //
+  // File layout:               xxxxx xxxxx xxxxx xxxxx xxxxx xx
+  // file_offset_:                       8
+  // adjusted_offset():               5
+  // region()/mutable_region():          |
+  // mmap_result_:                    |
+  //
+  // alignment_adjustment_: file_offset_ - adjusted_offset()
+  // mmap_size_:            mmap_size
+  // region_size():         mmap_size_
+  // available_size():      std::min(mmap_size_,
+  //                                 std::max(0, file_size_ - file_offset_))
+  // region_range:          [file_offset_, file_offset + mmap_size)
+  // adjusted_mmap_size():  alignment_adjustment_ + mmap_size_
+  // adjusted_mmap_range:   [alignment_offset, file_offset + mmap_size)
 };
 
 }  // namespace lib
diff --git a/icing/file/memory-mapped-file_test.cc b/icing/file/memory-mapped-file_test.cc
new file mode 100644
index 0000000..16f76e6
--- /dev/null
+++ b/icing/file/memory-mapped-file_test.cc
@@ -0,0 +1,668 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/memory-mapped-file.h"
+
+#include <cstdint>
+#include <limits>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+using ::testing::DoDefault;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::IsNull;
+using ::testing::Le;
+using ::testing::Not;
+using ::testing::NotNull;
+using ::testing::Return;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+class MemoryMappedFileTest : public ::testing::Test {
+ protected:
+  void SetUp() override { file_path_ = GetTestTempDir() + "/mmap_test_file"; }
+
+  void TearDown() override { filesystem_.DeleteFile(file_path_.c_str()); }
+
+  const Filesystem& filesystem() const { return filesystem_; }
+
+  Filesystem filesystem_;
+  std::string file_path_;
+};
+
+TEST_F(MemoryMappedFileTest, Create) {
+  constexpr int max_file_size = 8192;
+  MemoryMappedFile::Strategy stragegy =
+      MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC;
+  // Create MemoryMappedFile
+  ICING_ASSERT_OK_AND_ASSIGN(MemoryMappedFile mmapped_file,
+                             MemoryMappedFile::Create(filesystem_, file_path_,
+                                                      stragegy, max_file_size));
+
+  EXPECT_THAT(mmapped_file.strategy(), Eq(stragegy));
+  EXPECT_THAT(mmapped_file.max_file_size(), Eq(max_file_size));
+  EXPECT_THAT(mmapped_file.region(), IsNull());
+  EXPECT_THAT(mmapped_file.mutable_region(), IsNull());
+  EXPECT_THAT(mmapped_file.file_offset(), Eq(0));
+  EXPECT_THAT(mmapped_file.region_size(), Eq(0));
+  EXPECT_THAT(mmapped_file.available_size(), Eq(0));
+}
+
+TEST_F(MemoryMappedFileTest, CreateFromExistingFile) {
+  int init_file_size = 100;
+  {
+    // Initialize file
+    ScopedFd sfd(filesystem_.OpenForWrite(file_path_.c_str()));
+    ASSERT_TRUE(sfd.is_valid());
+    auto buf = std::make_unique<char[]>(init_file_size);
+    ASSERT_TRUE(filesystem_.Write(sfd.get(), buf.get(), init_file_size));
+  }
+
+  constexpr int max_file_size = 8192;
+  MemoryMappedFile::Strategy stragegy =
+      MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC;
+  // Create MemoryMappedFile from an existing file
+  ICING_ASSERT_OK_AND_ASSIGN(MemoryMappedFile mmapped_file,
+                             MemoryMappedFile::Create(filesystem_, file_path_,
+                                                      stragegy, max_file_size));
+
+  EXPECT_THAT(mmapped_file.strategy(), Eq(stragegy));
+  EXPECT_THAT(mmapped_file.max_file_size(), Eq(max_file_size));
+  EXPECT_THAT(mmapped_file.region(), IsNull());
+  EXPECT_THAT(mmapped_file.mutable_region(), IsNull());
+  EXPECT_THAT(mmapped_file.file_offset(), Eq(0));
+  EXPECT_THAT(mmapped_file.region_size(), Eq(0));
+  EXPECT_THAT(mmapped_file.available_size(), Eq(0));
+}
+
+TEST_F(MemoryMappedFileTest, CreateWithInvalidMaxFileSize) {
+  EXPECT_THAT(
+      MemoryMappedFile::Create(filesystem_, file_path_,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               /*max_file_size=*/-1),
+      StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(MemoryMappedFile::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                  /*max_file_size=*/MemoryMappedFile::kMaxFileSize + 1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(MemoryMappedFile::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                  /*max_file_size=*/-1, /*pre_mapping_file_offset=*/0,
+                  /*pre_mapping_mmap_size=*/8192),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(
+      MemoryMappedFile::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+          /*max_file_size=*/MemoryMappedFile::kMaxFileSize + 1,
+          /*pre_mapping_file_offset=*/0, /*pre_mapping_mmap_size=*/8192),
+      StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(MemoryMappedFileTest, CreateWithPreMappingInfo) {
+  constexpr int max_file_size = 8192;
+  constexpr int pre_mapping_file_offset = 99;
+  constexpr int pre_mapping_mmap_size = 2000;
+  MemoryMappedFile::Strategy stragegy =
+      MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC;
+  // Create MemoryMappedFile with pre-mapping file_offset and mmap_size
+  ICING_ASSERT_OK_AND_ASSIGN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(filesystem_, file_path_, stragegy, max_file_size,
+                               pre_mapping_file_offset, pre_mapping_mmap_size));
+
+  EXPECT_THAT(mmapped_file.strategy(), Eq(stragegy));
+  EXPECT_THAT(mmapped_file.max_file_size(), Eq(max_file_size));
+  EXPECT_THAT(mmapped_file.region(), NotNull());
+  EXPECT_THAT(mmapped_file.mutable_region(), NotNull());
+  EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset));
+  EXPECT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+  EXPECT_THAT(mmapped_file.available_size(), Eq(0));
+
+  // Manually grow the file externally and mutate region. There should be no
+  // memory error.
+  {
+    ScopedFd sfd(filesystem_.OpenForAppend(file_path_.c_str()));
+    ASSERT_TRUE(sfd.is_valid());
+    int grow_size = 4096;
+    auto buf = std::make_unique<char[]>(grow_size);
+    ASSERT_TRUE(filesystem_.Write(sfd.get(), buf.get(), grow_size));
+  }
+  mmapped_file.mutable_region()[0] = 'a';
+  ICING_EXPECT_OK(mmapped_file.PersistToDisk());
+
+  {
+    ScopedFd sfd(filesystem_.OpenForRead(file_path_.c_str()));
+    ASSERT_TRUE(sfd.is_valid());
+    int buf_size = 10;
+    auto buf = std::make_unique<char[]>(buf_size);
+    ASSERT_TRUE(filesystem_.PRead(sfd.get(), buf.get(), buf_size,
+                                  pre_mapping_file_offset));
+    EXPECT_THAT(buf.get()[0], Eq('a'));
+  }
+}
+
+TEST_F(MemoryMappedFileTest, CreateWithInvalidPreMappingInfo) {
+  int page_size = MemoryMappedFile::system_page_size();
+  int max_file_size = page_size * 2;
+
+  // Negative file_offset
+  EXPECT_THAT(
+      MemoryMappedFile::Create(filesystem_, file_path_,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               max_file_size,
+                               /*pre_mapping_file_offset=*/-1,
+                               /*pre_mapping_mmap_size=*/page_size),
+      StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  // Negative mmap_size
+  EXPECT_THAT(
+      MemoryMappedFile::Create(filesystem_, file_path_,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               max_file_size, /*pre_mapping_file_offset=*/0,
+                               /*pre_mapping_mmap_size=*/-1),
+      StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  // pre_mapping_file_offset + pre_mapping_mmap_size > max_file_size.
+  int pre_mapping_file_offset = 99;
+  int pre_mapping_mmap_size = max_file_size - pre_mapping_file_offset + 1;
+  EXPECT_THAT(
+      MemoryMappedFile::Create(filesystem_, file_path_,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               max_file_size, pre_mapping_file_offset,
+                               pre_mapping_mmap_size),
+      StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  // Edge cases to make sure the implementation of range check won't have
+  // integer overflow bug.
+  EXPECT_THAT(
+      MemoryMappedFile::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+          /*pre_mapping_file_offset=*/99,
+          /*pre_mapping_mmap_size=*/std::numeric_limits<int64_t>::max()),
+      StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(MemoryMappedFile::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                  max_file_size, /*pre_mapping_file_offset=*/0,
+                  /*pre_mapping_mmap_size=*/INT64_C(-1) *
+                      (std::numeric_limits<int64_t>::max() - max_file_size)),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(
+      MemoryMappedFile::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+          /*pre_mapping_file_offset=*/INT64_C(-1) *
+              (std::numeric_limits<int64_t>::max() - max_file_size),
+          /*pre_mapping_mmap_size=*/page_size),
+      StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+// TODO(b/247671531): remove this test after deprecating Remap
+TEST_F(MemoryMappedFileTest, RemapZeroMmapSizeShouldUnmap) {
+  // Create MemoryMappedFile
+  ICING_ASSERT_OK_AND_ASSIGN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(filesystem_, file_path_,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               MemoryMappedFile::kDefaultMaxFileSize));
+
+  int page_size = MemoryMappedFile::system_page_size();
+  int file_offset = 99;
+  int mmap_size = page_size * 2 - file_offset;
+  ICING_ASSERT_OK(mmapped_file.Remap(file_offset, mmap_size));
+  ASSERT_THAT(mmapped_file.region(), NotNull());
+
+  // Call GrowAndRemapIfNecessary with any file_offset and new_mmap_size = 0.
+  // The original mmapped region should be unmapped.
+  ICING_EXPECT_OK(mmapped_file.Remap(file_offset, /*mmap_size=*/0));
+  EXPECT_THAT(mmapped_file.region(), IsNull());
+}
+
+TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessary) {
+  int page_size = MemoryMappedFile::system_page_size();
+  int pre_mapping_file_offset = 99;
+  int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset;
+  {
+    // Create MemoryMappedFile with pre-mapping file_offset and mmap_size
+    // without growing the file.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        MemoryMappedFile mmapped_file,
+        MemoryMappedFile::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+            MemoryMappedFile::kDefaultMaxFileSize, pre_mapping_file_offset,
+            pre_mapping_mmap_size));
+    ASSERT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(0));
+    const char* original_region = mmapped_file.region();
+
+    // Call GrowAndRemapIfNecessary with same file_offset and new mmap_size that
+    // doesn't exceed pre_mapping_mmap_size. The underlying file size should
+    // grow correctly, but there should be no remap.
+    int new_mmap_size1 = page_size - pre_mapping_file_offset;
+    ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary(
+        pre_mapping_file_offset, new_mmap_size1));
+
+    EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+                Eq(pre_mapping_file_offset + new_mmap_size1));
+    EXPECT_THAT(mmapped_file.region(), Eq(original_region));
+    EXPECT_THAT(mmapped_file.mutable_region(), Eq(original_region));
+    EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset));
+    EXPECT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+    EXPECT_THAT(mmapped_file.available_size(), Eq(new_mmap_size1));
+
+    // Test it with new_mmap_size2 = pre_mapping_mmap_size
+    int new_mmap_size2 = pre_mapping_mmap_size;
+    ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary(
+        pre_mapping_file_offset, new_mmap_size2));
+
+    EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+                Eq(pre_mapping_file_offset + new_mmap_size2));
+    EXPECT_THAT(mmapped_file.region(), Eq(original_region));
+    EXPECT_THAT(mmapped_file.mutable_region(), Eq(original_region));
+    EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset));
+    EXPECT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+    EXPECT_THAT(mmapped_file.available_size(), Eq(new_mmap_size2));
+
+    // Write some bytes to region()[0]. It should write the underlying file at
+    // file_offset.
+    mmapped_file.mutable_region()[0] = 'a';
+    ICING_ASSERT_OK(mmapped_file.PersistToDisk());
+  }
+
+  ScopedFd sfd(filesystem_.OpenForRead(file_path_.c_str()));
+  ASSERT_TRUE(sfd.is_valid());
+  int buf_size = 1;
+  auto buf = std::make_unique<char[]>(buf_size);
+  ASSERT_TRUE(filesystem_.PRead(sfd.get(), buf.get(), buf_size,
+                                pre_mapping_file_offset));
+  EXPECT_THAT(buf.get()[0], Eq('a'));
+}
+
+TEST_F(MemoryMappedFileTest,
+       GrowAndRemapIfNecessaryExceedingPreMappingMmapSize) {
+  int page_size = MemoryMappedFile::system_page_size();
+  int pre_mapping_file_offset = 99;
+  int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset;
+  // Create MemoryMappedFile with pre-mapping file_offset and mmap_size without
+  // growing the file.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(filesystem_, file_path_,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               MemoryMappedFile::kDefaultMaxFileSize,
+                               pre_mapping_file_offset, pre_mapping_mmap_size));
+  const char* original_region = mmapped_file.region();
+
+  // Call GrowAndRemapIfNecessary with same file offset and new mmap_size that
+  // exceeds pre_mapping_mmap_size (but still below max_file_size). The
+  // underlying file size should grow correctly and the region should be
+  // remapped.
+  int new_mmap_size = page_size * 3 - pre_mapping_file_offset;
+  ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset,
+                                                       new_mmap_size));
+
+  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+              Eq(pre_mapping_file_offset + new_mmap_size));
+  EXPECT_THAT(mmapped_file.region(), Not(Eq(original_region)));
+  EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset));
+  EXPECT_THAT(mmapped_file.region_size(), Eq(new_mmap_size));
+  EXPECT_THAT(mmapped_file.available_size(), Eq(new_mmap_size));
+}
+
+TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessaryDecreasingMmapSize) {
+  int page_size = MemoryMappedFile::system_page_size();
+  int pre_mapping_file_offset = 99;
+  int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset;
+  // Create MemoryMappedFile with pre-mapping file_offset and mmap_size, and
+  // call GrowAndRemapIfNecessary to grow the underlying file.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(filesystem_, file_path_,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               MemoryMappedFile::kDefaultMaxFileSize,
+                               pre_mapping_file_offset, pre_mapping_mmap_size));
+  ICING_ASSERT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset,
+                                                       pre_mapping_mmap_size));
+
+  const char* original_region = mmapped_file.region();
+  int original_file_size = filesystem_.GetFileSize(file_path_.c_str());
+  ASSERT_THAT(original_file_size,
+              Eq(pre_mapping_file_offset + pre_mapping_mmap_size));
+  ASSERT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+  ASSERT_THAT(mmapped_file.available_size(), Eq(pre_mapping_mmap_size));
+
+  // Call GrowAndRemapIfNecessary with same file offset and new mmap_size
+  // smaller than pre_mapping_mmap_size. There should be no file growth/truncate
+  // or remap.
+  int new_mmap_size = page_size - pre_mapping_file_offset;
+  ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset,
+                                                       new_mmap_size));
+
+  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+              Eq(original_file_size));
+  EXPECT_THAT(mmapped_file.region(), Eq(original_region));
+  EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset));
+  EXPECT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+  EXPECT_THAT(mmapped_file.available_size(), Eq(pre_mapping_mmap_size));
+}
+
+TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessaryZeroMmapSizeShouldUnmap) {
+  int page_size = MemoryMappedFile::system_page_size();
+  int pre_mapping_file_offset = 99;
+  int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset;
+  // Create MemoryMappedFile with pre-mapping file_offset and mmap_size, and
+  // call GrowAndRemapIfNecessary to grow the underlying file.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(filesystem_, file_path_,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               MemoryMappedFile::kDefaultMaxFileSize,
+                               pre_mapping_file_offset, pre_mapping_mmap_size));
+  ICING_ASSERT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset,
+                                                       pre_mapping_mmap_size));
+
+  int original_file_size = filesystem_.GetFileSize(file_path_.c_str());
+  ASSERT_THAT(original_file_size,
+              Eq(pre_mapping_file_offset + pre_mapping_mmap_size));
+  ASSERT_THAT(mmapped_file.region(), NotNull());
+  ASSERT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+  ASSERT_THAT(mmapped_file.available_size(), Eq(pre_mapping_mmap_size));
+
+  // Call GrowAndRemapIfNecessary with any file_offset and new_mmap_size = 0.
+  // There should be no file growth/truncate, but the original mmapped region
+  // should be unmapped.
+  ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset,
+                                                       /*new_mmap_size=*/0));
+
+  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+              Eq(original_file_size));
+  EXPECT_THAT(mmapped_file.region(), IsNull());
+  EXPECT_THAT(mmapped_file.file_offset(), Eq(0));
+  EXPECT_THAT(mmapped_file.region_size(), Eq(0));
+  EXPECT_THAT(mmapped_file.available_size(), Eq(0));
+}
+
+TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessaryChangeOffset) {
+  int page_size = MemoryMappedFile::system_page_size();
+  int pre_mapping_file_offset = 99;
+  int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset;
+  // Create MemoryMappedFile with pre-mapping file_offset and mmap_size, and
+  // call GrowAndRemapIfNecessary to grow the underlying file.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(filesystem_, file_path_,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               MemoryMappedFile::kDefaultMaxFileSize,
+                               pre_mapping_file_offset, pre_mapping_mmap_size));
+  ICING_ASSERT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset,
+                                                       pre_mapping_mmap_size));
+
+  const char* original_region = mmapped_file.region();
+  int original_file_size = filesystem_.GetFileSize(file_path_.c_str());
+  ASSERT_THAT(original_file_size,
+              Eq(pre_mapping_file_offset + pre_mapping_mmap_size));
+  ASSERT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+  ASSERT_THAT(mmapped_file.available_size(), Eq(pre_mapping_mmap_size));
+
+  // Call GrowAndRemapIfNecessary with different file_offset and new mmap_size
+  // that doesn't require to grow the underlying file. The region should still
+  // be remapped since offset has been changed.
+  int new_file_offset = pre_mapping_file_offset + page_size;
+  int new_mmap_size = page_size * 2 - new_file_offset;
+  ASSERT_THAT(new_file_offset + new_mmap_size, Le(original_file_size));
+  ICING_EXPECT_OK(
+      mmapped_file.GrowAndRemapIfNecessary(new_file_offset, new_mmap_size));
+
+  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+              Eq(original_file_size));
+  EXPECT_THAT(mmapped_file.region(), Not(Eq(original_region)));
+  EXPECT_THAT(mmapped_file.file_offset(), Eq(new_file_offset));
+  EXPECT_THAT(mmapped_file.region_size(), Eq(new_mmap_size));
+  EXPECT_THAT(mmapped_file.available_size(), Eq(new_mmap_size));
+}
+
+TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessaryInvalidMmapRegionInfo) {
+  int page_size = MemoryMappedFile::system_page_size();
+  int max_file_size = page_size * 2;
+  // Create MemoryMappedFile with pre-mapping file_offset and mmap_size, and
+  // call GrowAndRemapIfNecessary to grow the underlying file.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(filesystem_, file_path_,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               max_file_size,
+                               /*pre_mapping_file_offset=*/0,
+                               /*pre_mapping_mmap_size=*/page_size * 2));
+
+  // Negative new_file_offset.
+  EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary(
+                  /*new_file_offset=*/-1,
+                  /*new_mmap_size=*/page_size),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  // Negative new_mmap_size
+  EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary(
+                  /*new_file_offset=*/0,
+                  /*new_mmap_size=*/-1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  // new_file_offset + new_mmap_size > max_file_size.
+  int new_file_offset = 99;
+  int new_mmap_size = max_file_size - new_file_offset + 1;
+  EXPECT_THAT(
+      mmapped_file.GrowAndRemapIfNecessary(new_file_offset, new_mmap_size),
+      StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  // Edge cases to make sure the implementation of range check won't have
+  // integer overflow bug.
+  EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary(
+                  /*new_file_offset=*/99,
+                  /*new_mmap_size=*/std::numeric_limits<int64_t>::max()),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary(
+                  /*new_file_offset=*/0,
+                  /*new_mmap_size=*/INT64_C(-1) *
+                      (std::numeric_limits<int64_t>::max() - max_file_size)),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary(
+                  /*new_file_offset=*/INT64_C(-1) *
+                      (std::numeric_limits<int64_t>::max() - max_file_size),
+                  /*new_mmap_size=*/page_size),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(MemoryMappedFileTest, RemapFailureStillValidInstance) {
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  int page_size = MemoryMappedFile::system_page_size();
+  int max_file_size = page_size * 10;
+
+  // 1. Create MemoryMappedFile with pre-mapping offset=0 and
+  //    mmap_size=page_size. Also call GrowAndRemapIfNecessary to grow the file
+  //    size to page_size.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(*mock_filesystem, file_path_,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               max_file_size,
+                               /*pre_mapping_file_offset=*/0,
+                               /*pre_mapping_mmap_size=*/page_size));
+  ICING_ASSERT_OK(
+      mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0,
+                                           /*new_mmap_size=*/page_size));
+  ASSERT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size));
+  ASSERT_THAT(mmapped_file.region(), NotNull());
+  ASSERT_THAT(mmapped_file.mutable_region(), NotNull());
+  ASSERT_THAT(mmapped_file.file_offset(), Eq(0));
+  ASSERT_THAT(mmapped_file.region_size(), Eq(page_size));
+  ASSERT_THAT(mmapped_file.available_size(), Eq(page_size));
+  mmapped_file.mutable_region()[page_size - 1] = 'a';
+
+  const char* original_region = mmapped_file.region();
+
+  // 2. Call GrowAndRemapIfNecessary with different offset and greater
+  //    mmap_size. Here we're testing the case when file growth succeeds but
+  //    remap (RemapImpl) fails.
+  //    To make RemapImpl fail, mock OpenForWrite to fail. Note that we use
+  //    OpenForAppend when growing the file, so it is ok to make OpenForWrite
+  //    fail without affecting file growth.
+  ON_CALL(*mock_filesystem, OpenForWrite(_)).WillByDefault(Return(-1));
+  EXPECT_THAT(
+      mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/1,
+                                           /*new_mmap_size=*/page_size * 2 - 1),
+      StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+
+  // 3. Verify the result. The file size should be grown, but since remap fails,
+  //    mmap related fields should remain unchanged.
+  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size * 2));
+  EXPECT_THAT(mmapped_file.region(), Eq(original_region));
+  EXPECT_THAT(mmapped_file.mutable_region(), Eq(original_region));
+  EXPECT_THAT(mmapped_file.file_offset(), Eq(0));
+  EXPECT_THAT(mmapped_file.region_size(), Eq(page_size));
+  EXPECT_THAT(mmapped_file.available_size(), Eq(page_size));
+  // We should still be able to get the correct content via region.
+  EXPECT_THAT(mmapped_file.region()[page_size - 1], Eq('a'));
+}
+
+TEST_F(MemoryMappedFileTest, BadFileSizeDuringGrowReturnsError) {
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  int page_size = MemoryMappedFile::system_page_size();
+  int max_file_size = page_size * 10;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(*mock_filesystem, file_path_,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               max_file_size,
+                               /*pre_mapping_file_offset=*/0,
+                               /*pre_mapping_mmap_size=*/page_size));
+  ICING_ASSERT_OK(
+      mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0,
+                                           /*new_mmap_size=*/page_size));
+  ASSERT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size));
+  ASSERT_THAT(mmapped_file.region(), NotNull());
+  ASSERT_THAT(mmapped_file.mutable_region(), NotNull());
+  ASSERT_THAT(mmapped_file.file_offset(), Eq(0));
+  ASSERT_THAT(mmapped_file.region_size(), Eq(page_size));
+  ASSERT_THAT(mmapped_file.available_size(), Eq(page_size));
+  mmapped_file.mutable_region()[page_size - 1] = 'a';
+
+  const char* original_region = mmapped_file.region();
+
+  // Calling GrowAndRemapIfNecessary with larger size will cause file growth.
+  // During file growth, we will attempt to sync the underlying file size via
+  // GetFileSize to see if growing is actually necessary. Mock GetFileSize to
+  // return an error.
+  ON_CALL(*mock_filesystem, GetFileSize(A<const char*>()))
+      .WillByDefault(Return(Filesystem::kBadFileSize));
+
+  // We should fail gracefully and return an INTERNAL error to indicate that
+  // there was an issue retrieving the file size. The underlying file size and
+  // mmap info should remain unchanged.
+  EXPECT_THAT(
+      mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0,
+                                           /*new_mmap_size=*/page_size * 2),
+      StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size));
+  EXPECT_THAT(mmapped_file.region(), Eq(original_region));
+  EXPECT_THAT(mmapped_file.mutable_region(), Eq(original_region));
+  EXPECT_THAT(mmapped_file.file_offset(), Eq(0));
+  EXPECT_THAT(mmapped_file.region_size(), Eq(page_size));
+  EXPECT_THAT(mmapped_file.available_size(), Eq(page_size));
+  // We should still be able to get the correct content via region.
+  EXPECT_THAT(mmapped_file.region()[page_size - 1], Eq('a'));
+}
+
+TEST_F(MemoryMappedFileTest, WriteSucceedsPartiallyAndFailsDuringGrow) {
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  int page_size = MemoryMappedFile::system_page_size();
+  int max_file_size = page_size * 10;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(*mock_filesystem, file_path_,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               max_file_size,
+                               /*pre_mapping_file_offset=*/0,
+                               /*pre_mapping_mmap_size=*/max_file_size));
+
+  // 1. Initially the underlying file size is 0. When calling
+  //    GrowAndRemapIfNecessary first time with new_mmap_size = page_size * 2,
+  //    Write() should be called 2 times and each time should grow the
+  //    underlying file by page_size bytes.
+  //    Mock the 2nd Write() to write partially (1 byte) and fail, so the file
+  //    will only be grown by page_size + 1 bytes in total.
+  auto open_lambda = [this](int fd, const void* data,
+                            size_t data_size) -> bool {
+    EXPECT_THAT(data_size, Gt(1));
+    EXPECT_THAT(this->filesystem_.Write(fd, data, 1), Eq(1));
+    return false;
+  };
+  EXPECT_CALL(*mock_filesystem, Write(A<int>(), A<const void*>(), A<size_t>()))
+      .WillOnce(DoDefault())
+      .WillOnce(open_lambda);
+
+  // 2. Call GrowAndRemapIfNecessary and expect to fail. The actual file size
+  //    should be page_size + 1, but the (cached) file_size_ should be page_size
+  //    since it fails to update that partially written byte of the 2nd Write().
+  EXPECT_THAT(
+      mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0,
+                                           /*new_mmap_size=*/page_size * 2),
+      StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size + 1));
+  EXPECT_THAT(mmapped_file.available_size(), Eq(page_size));
+
+  // 3. Call GrowAndRemapIfNecessary again with new_mmap_size = page_size + 1.
+  //    Even though file_size_ only caches page_size and excludes the partially
+  //    written byte(s) due to failure of the previous round of grow, the next
+  //    round should sync the actual file size to file_size_ via system call and
+  //    skip Write() since the actual file size is large enough for the new
+  //    mmap_size.
+  //    Note: WillOnce() above will ensure that Write() won't be called for
+  //    another time.
+  ICING_EXPECT_OK(
+      mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0,
+                                           /*new_mmap_size=*/page_size + 1));
+  EXPECT_THAT(mmapped_file.available_size(), Eq(page_size + 1));
+
+  // 4. Call GrowAndRemapIfNecessary again with new_mmap_size = page_size * 2.
+  //    Even though the current file size is page_size + 1, the next round of
+  //    grow should automatically calibrate the file size back to a multiple of
+  //    page_size instead of just simply appending page_size bytes to the file.
+  EXPECT_CALL(*mock_filesystem, Write(A<int>(), A<const void*>(), A<size_t>()))
+      .WillOnce(DoDefault());
+  ICING_EXPECT_OK(
+      mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0,
+                                           /*new_mmap_size=*/page_size * 2));
+  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size * 2));
+  EXPECT_THAT(mmapped_file.available_size(), Eq(page_size * 2));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/mock-filesystem.h b/icing/file/mock-filesystem.h
index b89295e..32817d4 100644
--- a/icing/file/mock-filesystem.h
+++ b/icing/file/mock-filesystem.h
@@ -44,6 +44,17 @@ class MockFilesystem : public Filesystem {
           return real_filesystem_.DeleteDirectoryRecursively(dir_name);
         });
 
+    ON_CALL(*this, CopyFile)
+        .WillByDefault([this](const char* src, const char* dst) {
+          return real_filesystem_.CopyFile(src, dst);
+        });
+
+    ON_CALL(*this, CopyDirectory)
+        .WillByDefault(
+            [this](const char* src, const char* dst, bool recursive) {
+              return real_filesystem_.CopyDirectory(src, dst, recursive);
+            });
+
     ON_CALL(*this, FileExists).WillByDefault([this](const char* file_name) {
       return real_filesystem_.FileExists(file_name);
     });
@@ -225,6 +236,11 @@ class MockFilesystem : public Filesystem {
   MOCK_METHOD(bool, DeleteDirectoryRecursively, (const char* dir_name),
               (const));
 
+  MOCK_METHOD(bool, CopyFile, (const char* src, const char* dst), (const));
+
+  MOCK_METHOD(bool, CopyDirectory,
+              (const char* src, const char* dst, bool recursive), (const));
+
   MOCK_METHOD(bool, FileExists, (const char* file_name), (const));
 
   MOCK_METHOD(bool, DirectoryExists, (const char* dir_name), (const));
diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc
new file mode 100644
index 0000000..6936c45
--- /dev/null
+++ b/icing/file/persistent-hash-map.cc
@@ -0,0 +1,750 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/persistent-hash-map.h"
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Helper function to check if there is no termination character '\0' in the
+// key.
+libtextclassifier3::Status ValidateKey(std::string_view key) {
+  if (key.find('\0') != std::string_view::npos) {  // NOLINT
+    return absl_ports::InvalidArgumentError(
+        "Key cannot contain termination character '\\0'");
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+// Helper function to convert the key to bucket index by hash.
+//
+// Returns:
+//   int32_t: A valid bucket index with range [0, num_buckets - 1].
+//   INTERNAL_ERROR if num_buckets == 0
+libtextclassifier3::StatusOr<int32_t> HashKeyToBucketIndex(
+    std::string_view key, int32_t num_buckets) {
+  if (num_buckets == 0) {
+    return absl_ports::InternalError("Should not have empty bucket");
+  }
+  return static_cast<int32_t>(std::hash<std::string_view>()(key) % num_buckets);
+}
+
+// The following 4 methods are helper functions to get the correct path of
+// metadata/bucket/entry/key-value storages, according to the given working
+// directory path.
+std::string GetMetadataFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix,
+                            ".m");
+}
+
+std::string GetBucketStorageFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix,
+                            ".b");
+}
+
+std::string GetEntryStorageFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix,
+                            ".e");
+}
+
+std::string GetKeyValueStorageFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix,
+                            ".k");
+}
+
+// Calculates how many buckets we need given num_entries and
+// max_load_factor_percent. Round it up to 2's power.
+//
+// REQUIRES: 0 < num_entries <= Entry::kMaxNumEntries &&
+//           max_load_factor_percent > 0
+int32_t CalculateNumBucketsRequired(int32_t num_entries,
+                                    int32_t max_load_factor_percent) {
+  // Calculate ceil(num_entries * 100 / max_load_factor_percent)
+  int32_t num_entries_100 = num_entries * 100;
+  int32_t num_buckets_required =
+      num_entries_100 / max_load_factor_percent +
+      (num_entries_100 % max_load_factor_percent == 0 ? 0 : 1);
+  if ((num_buckets_required & (num_buckets_required - 1)) != 0) {
+    // not 2's power
+    return 1 << (32 - __builtin_clz(num_buckets_required));
+  }
+  return num_buckets_required;
+}
+
+}  // namespace
+
+bool PersistentHashMap::Options::IsValid() const {
+  if (!(value_type_size > 0 && value_type_size <= kMaxValueTypeSize &&
+        max_num_entries > 0 && max_num_entries <= Entry::kMaxNumEntries &&
+        max_load_factor_percent > 0 && average_kv_byte_size > 0 &&
+        init_num_buckets > 0 && init_num_buckets <= Bucket::kMaxNumBuckets)) {
+    return false;
+  }
+
+  // We've ensured (static_assert) that storing kMaxNumBuckets buckets won't
+  // exceed FileBackedVector::kMaxFileSize, so only need to verify # of buckets
+  // required won't exceed kMaxNumBuckets.
+  if (CalculateNumBucketsRequired(max_num_entries, max_load_factor_percent) >
+      Bucket::kMaxNumBuckets) {
+    return false;
+  }
+
+  // Verify # of key value pairs can fit into kv_storage.
+  if (average_kv_byte_size > kMaxKVTotalByteSize / max_num_entries) {
+    return false;
+  }
+
+  // Verify init_num_buckets is 2's power. Requiring init_num_buckets to be 2^n
+  // guarantees that num_buckets will eventually grow to be exactly
+  // max_num_buckets since CalculateNumBucketsRequired rounds it up to 2^n.
+  if ((init_num_buckets & (init_num_buckets - 1)) != 0) {
+    return false;
+  }
+
+  return true;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+PersistentHashMap::Create(const Filesystem& filesystem,
+                          std::string working_path, Options options) {
+  if (!options.IsValid()) {
+    return absl_ports::InvalidArgumentError(
+        "Invalid PersistentHashMap options");
+  }
+
+  if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
+      !filesystem.FileExists(GetBucketStorageFilePath(working_path).c_str()) ||
+      !filesystem.FileExists(GetEntryStorageFilePath(working_path).c_str()) ||
+      !filesystem.FileExists(
+          GetKeyValueStorageFilePath(working_path).c_str())) {
+    // Discard working_path if any of them is missing, and reinitialize.
+    if (filesystem.DirectoryExists(working_path.c_str())) {
+      ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+    }
+    return InitializeNewFiles(filesystem, std::move(working_path),
+                              std::move(options));
+  }
+  return InitializeExistingFiles(filesystem, std::move(working_path),
+                                 std::move(options));
+}
+
+PersistentHashMap::~PersistentHashMap() {
+  if (!PersistToDisk().ok()) {
+    ICING_LOG(WARNING)
+        << "Failed to persist hash map to disk while destructing "
+        << working_path_;
+  }
+}
+
+libtextclassifier3::Status PersistentHashMap::Put(std::string_view key,
+                                                  const void* value) {
+  SetDirty();
+
+  ICING_RETURN_IF_ERROR(ValidateKey(key));
+  ICING_ASSIGN_OR_RETURN(
+      int32_t bucket_idx,
+      HashKeyToBucketIndex(key, bucket_storage_->num_elements()));
+
+  ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair,
+                         FindEntryIndexByKey(bucket_idx, key));
+  if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
+    // If not found, then insert new key value pair.
+    return Insert(bucket_idx, key, value);
+  }
+
+  // Otherwise, overwrite the value.
+  ICING_ASSIGN_OR_RETURN(const Entry* entry,
+                         entry_storage_->Get(idx_pair.target_entry_index));
+
+  int32_t kv_len = key.length() + 1 + info().value_type_size;
+  int32_t value_offset = key.length() + 1;
+  ICING_ASSIGN_OR_RETURN(
+      typename FileBackedVector<char>::MutableArrayView mutable_kv_arr,
+      kv_storage_->GetMutable(entry->key_value_index(), kv_len));
+  // It is the same key and value_size is fixed, so we can directly overwrite
+  // serialized value.
+  mutable_kv_arr.SetArray(value_offset, reinterpret_cast<const char*>(value),
+                          info().value_type_size);
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status PersistentHashMap::GetOrPut(std::string_view key,
+                                                       void* next_value) {
+  ICING_RETURN_IF_ERROR(ValidateKey(key));
+  ICING_ASSIGN_OR_RETURN(
+      int32_t bucket_idx,
+      HashKeyToBucketIndex(key, bucket_storage_->num_elements()));
+
+  ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair,
+                         FindEntryIndexByKey(bucket_idx, key));
+  if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
+    // If not found, then insert new key value pair.
+    SetDirty();
+    return Insert(bucket_idx, key, next_value);
+  }
+
+  // Otherwise, copy the hash map value into next_value.
+  return CopyEntryValue(idx_pair.target_entry_index, next_value);
+}
+
+libtextclassifier3::Status PersistentHashMap::Get(std::string_view key,
+                                                  void* value) const {
+  ICING_RETURN_IF_ERROR(ValidateKey(key));
+  ICING_ASSIGN_OR_RETURN(
+      int32_t bucket_idx,
+      HashKeyToBucketIndex(key, bucket_storage_->num_elements()));
+
+  ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair,
+                         FindEntryIndexByKey(bucket_idx, key));
+  if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
+    return absl_ports::NotFoundError(absl_ports::StrCat(
+        "Key not found in PersistentHashMap ", working_path_));
+  }
+
+  return CopyEntryValue(idx_pair.target_entry_index, value);
+}
+
+libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) {
+  SetDirty();
+
+  ICING_RETURN_IF_ERROR(ValidateKey(key));
+  ICING_ASSIGN_OR_RETURN(
+      int32_t bucket_idx,
+      HashKeyToBucketIndex(key, bucket_storage_->num_elements()));
+
+  ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair,
+                         FindEntryIndexByKey(bucket_idx, key));
+  if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
+    return absl_ports::NotFoundError(absl_ports::StrCat(
+        "Key not found in PersistentHashMap ", working_path_));
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      typename FileBackedVector<Entry>::MutableView mutable_target_entry,
+      entry_storage_->GetMutable(idx_pair.target_entry_index));
+  if (idx_pair.prev_entry_index == Entry::kInvalidIndex) {
+    // If prev_entry_idx is Entry::kInvalidIndex, then target_entry must be the
+    // head element of the entry linked list, and we have to update
+    // bucket->head_entry_index_.
+    //
+    // Before: target_entry (head) -> next_entry -> ...
+    // After: next_entry (head) -> ...
+    ICING_ASSIGN_OR_RETURN(
+        typename FileBackedVector<Bucket>::MutableView mutable_bucket,
+        bucket_storage_->GetMutable(bucket_idx));
+    if (mutable_bucket.Get().head_entry_index() !=
+        idx_pair.target_entry_index) {
+      return absl_ports::InternalError(
+          "Bucket head entry index is inconsistent with the actual entry linked"
+          "list head. This shouldn't happen");
+    }
+    mutable_bucket.Get().set_head_entry_index(
+        mutable_target_entry.Get().next_entry_index());
+  } else {
+    // Otherwise, connect prev_entry and next_entry, to remove target_entry from
+    // the entry linked list.
+    //
+    // Before: ... -> prev_entry -> target_entry -> next_entry -> ...
+    // After: ... -> prev_entry -> next_entry -> ...
+    ICING_ASSIGN_OR_RETURN(
+        typename FileBackedVector<Entry>::MutableView mutable_prev_entry,
+        entry_storage_->GetMutable(idx_pair.prev_entry_index));
+    mutable_prev_entry.Get().set_next_entry_index(
+        mutable_target_entry.Get().next_entry_index());
+  }
+
+  // Zero out the key value bytes. It is necessary for iterator to iterate
+  // through kv_storage and handle deleted keys properly.
+  int32_t kv_len = key.length() + 1 + info().value_type_size;
+  ICING_RETURN_IF_ERROR(kv_storage_->Set(
+      mutable_target_entry.Get().key_value_index(), kv_len, '\0'));
+
+  // Invalidate target_entry
+  mutable_target_entry.Get().set_key_value_index(kInvalidKVIndex);
+  mutable_target_entry.Get().set_next_entry_index(Entry::kInvalidIndex);
+
+  ++(info().num_deleted_entries);
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<int64_t> PersistentHashMap::GetDiskUsage() const {
+  ICING_ASSIGN_OR_RETURN(int64_t bucket_storage_disk_usage,
+                         bucket_storage_->GetDiskUsage());
+  ICING_ASSIGN_OR_RETURN(int64_t entry_storage_disk_usage,
+                         entry_storage_->GetDiskUsage());
+  ICING_ASSIGN_OR_RETURN(int64_t kv_storage_disk_usage,
+                         kv_storage_->GetDiskUsage());
+
+  int64_t total = bucket_storage_disk_usage + entry_storage_disk_usage +
+                  kv_storage_disk_usage;
+  Filesystem::IncrementByOrSetInvalid(
+      filesystem_.GetDiskUsage(GetMetadataFilePath(working_path_).c_str()),
+      &total);
+
+  if (total < 0 || total == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError(
+        "Failed to get disk usage of PersistentHashMap");
+  }
+  return total;
+}
+
+libtextclassifier3::StatusOr<int64_t> PersistentHashMap::GetElementsSize()
+    const {
+  ICING_ASSIGN_OR_RETURN(int64_t bucket_storage_elements_size,
+                         bucket_storage_->GetElementsFileSize());
+  ICING_ASSIGN_OR_RETURN(int64_t entry_storage_elements_size,
+                         entry_storage_->GetElementsFileSize());
+  ICING_ASSIGN_OR_RETURN(int64_t kv_storage_elements_size,
+                         kv_storage_->GetElementsFileSize());
+  return bucket_storage_elements_size + entry_storage_elements_size +
+         kv_storage_elements_size;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem,
+                                      std::string&& working_path,
+                                      Options&& options) {
+  // PersistentHashMap uses working_path as working directory path.
+  // Create working directory.
+  if (!filesystem.CreateDirectory(working_path.c_str())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to create directory: ", working_path));
+  }
+
+  int32_t max_num_buckets_required =
+      std::max(options.init_num_buckets,
+               CalculateNumBucketsRequired(options.max_num_entries,
+                                           options.max_load_factor_percent));
+
+  // Initialize bucket_storage
+  int32_t pre_mapping_mmap_size = sizeof(Bucket) * max_num_buckets_required;
+  int32_t max_file_size =
+      pre_mapping_mmap_size + FileBackedVector<Bucket>::Header::kHeaderSize;
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<FileBackedVector<Bucket>> bucket_storage,
+      FileBackedVector<Bucket>::Create(
+          filesystem, GetBucketStorageFilePath(working_path),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+          options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+  // Initialize entry_storage
+  pre_mapping_mmap_size = sizeof(Entry) * options.max_num_entries;
+  max_file_size =
+      pre_mapping_mmap_size + FileBackedVector<Entry>::Header::kHeaderSize;
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<FileBackedVector<Entry>> entry_storage,
+      FileBackedVector<Entry>::Create(
+          filesystem, GetEntryStorageFilePath(working_path),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+          options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+  // Initialize kv_storage
+  pre_mapping_mmap_size =
+      options.average_kv_byte_size * options.max_num_entries;
+  max_file_size =
+      pre_mapping_mmap_size + FileBackedVector<char>::Header::kHeaderSize;
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<FileBackedVector<char>> kv_storage,
+      FileBackedVector<char>::Create(
+          filesystem, GetKeyValueStorageFilePath(working_path),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+          options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+  // Initialize buckets.
+  ICING_RETURN_IF_ERROR(bucket_storage->Set(
+      /*idx=*/0, /*len=*/options.init_num_buckets, Bucket()));
+  ICING_RETURN_IF_ERROR(bucket_storage->PersistToDisk());
+
+  // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and
+  // call GrowAndRemapIfNecessary to grow the underlying file.
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile metadata_mmapped_file,
+      MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               /*max_file_size=*/kMetadataFileSize,
+                               /*pre_mapping_file_offset=*/0,
+                               /*pre_mapping_mmap_size=*/kMetadataFileSize));
+  ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
+      /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
+
+  // Create instance.
+  auto new_persistent_hash_map =
+      std::unique_ptr<PersistentHashMap>(new PersistentHashMap(
+          filesystem, std::move(working_path), std::move(options),
+          std::move(metadata_mmapped_file), std::move(bucket_storage),
+          std::move(entry_storage), std::move(kv_storage)));
+  // Initialize info content by writing mapped memory directly.
+  Info& info_ref = new_persistent_hash_map->info();
+  info_ref.magic = Info::kMagic;
+  info_ref.value_type_size = new_persistent_hash_map->options_.value_type_size;
+  info_ref.max_load_factor_percent =
+      new_persistent_hash_map->options_.max_load_factor_percent;
+  info_ref.num_deleted_entries = 0;
+  info_ref.num_deleted_key_value_bytes = 0;
+  // Initialize new PersistentStorage. The initial checksums will be computed
+  // and set via InitializeNewStorage.
+  ICING_RETURN_IF_ERROR(new_persistent_hash_map->InitializeNewStorage());
+
+  return new_persistent_hash_map;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
+                                           std::string&& working_path,
+                                           Options&& options) {
+  // Initialize metadata file
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile metadata_mmapped_file,
+      MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               /*max_file_size=*/kMetadataFileSize,
+                               /*pre_mapping_file_offset=*/0,
+                               /*pre_mapping_mmap_size=*/kMetadataFileSize));
+  if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
+    return absl_ports::FailedPreconditionError("Incorrect metadata file size");
+  }
+
+  int32_t max_num_buckets_required = CalculateNumBucketsRequired(
+      options.max_num_entries, options.max_load_factor_percent);
+
+  // Initialize bucket_storage
+  int32_t pre_mapping_mmap_size = sizeof(Bucket) * max_num_buckets_required;
+  int32_t max_file_size =
+      pre_mapping_mmap_size + FileBackedVector<Bucket>::Header::kHeaderSize;
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<FileBackedVector<Bucket>> bucket_storage,
+      FileBackedVector<Bucket>::Create(
+          filesystem, GetBucketStorageFilePath(working_path),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+          options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+  // Initialize entry_storage
+  pre_mapping_mmap_size = sizeof(Entry) * options.max_num_entries;
+  max_file_size =
+      pre_mapping_mmap_size + FileBackedVector<Entry>::Header::kHeaderSize;
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<FileBackedVector<Entry>> entry_storage,
+      FileBackedVector<Entry>::Create(
+          filesystem, GetEntryStorageFilePath(working_path),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+          options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+  // Initialize kv_storage
+  pre_mapping_mmap_size =
+      options.average_kv_byte_size * options.max_num_entries;
+  max_file_size =
+      pre_mapping_mmap_size + FileBackedVector<char>::Header::kHeaderSize;
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<FileBackedVector<char>> kv_storage,
+      FileBackedVector<char>::Create(
+          filesystem, GetKeyValueStorageFilePath(working_path),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+          options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+  // Create instance.
+  auto persistent_hash_map =
+      std::unique_ptr<PersistentHashMap>(new PersistentHashMap(
+          filesystem, std::move(working_path), std::move(options),
+          std::move(metadata_mmapped_file), std::move(bucket_storage),
+          std::move(entry_storage), std::move(kv_storage)));
+  // Initialize existing PersistentStorage. Checksums will be validated.
+  ICING_RETURN_IF_ERROR(persistent_hash_map->InitializeExistingStorage());
+
+  // Validate other values of info and options.
+  // Current # of entries should not exceed options_.max_num_entries
+  // We compute max_file_size of 3 storages by options_.max_num_entries. Since
+  // we won't recycle space of deleted entries (and key-value bytes), they're
+  // still occupying space in storages. Even if # of "active" entries doesn't
+  // exceed options_.max_num_entries, the new kvp to be inserted still
+  // potentially exceeds max_file_size.
+  // Therefore, we should use entry_storage_->num_elements() instead of # of
+  // "active" entries
+  // (i.e. entry_storage_->num_elements() - info_ptr->num_deleted_entries) to
+  // check. This feature avoids storages being grown extremely large when there
+  // are many Delete() and Put() operations.
+  if (persistent_hash_map->entry_storage_->num_elements() >
+      persistent_hash_map->options_.max_num_entries) {
+    return absl_ports::FailedPreconditionError(
+        "Current # of entries exceeds max num entries");
+  }
+
+  // Magic should be the same.
+  if (persistent_hash_map->info().magic != Info::kMagic) {
+    return absl_ports::FailedPreconditionError(
+        "PersistentHashMap header magic mismatch");
+  }
+
+  // Value type size should be consistent.
+  if (persistent_hash_map->options_.value_type_size !=
+      persistent_hash_map->info().value_type_size) {
+    return absl_ports::FailedPreconditionError("Incorrect value type size");
+  }
+
+  // Allow max_load_factor_percent_ change.
+  if (persistent_hash_map->options_.max_load_factor_percent !=
+      persistent_hash_map->info().max_load_factor_percent) {
+    ICING_VLOG(2) << "Changing max_load_factor_percent from "
+                  << persistent_hash_map->info().max_load_factor_percent
+                  << " to "
+                  << persistent_hash_map->options_.max_load_factor_percent;
+
+    persistent_hash_map->SetInfoDirty();
+    persistent_hash_map->info().max_load_factor_percent =
+        persistent_hash_map->options_.max_load_factor_percent;
+    ICING_RETURN_IF_ERROR(
+        persistent_hash_map->RehashIfNecessary(/*force_rehash=*/false));
+
+    ICING_RETURN_IF_ERROR(persistent_hash_map->PersistToDisk());
+  }
+
+  return persistent_hash_map;
+}
+
+libtextclassifier3::Status PersistentHashMap::PersistStoragesToDisk(
+    bool force) {
+  if (!force && !is_storage_dirty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  ICING_RETURN_IF_ERROR(bucket_storage_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(entry_storage_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(kv_storage_->PersistToDisk());
+  is_storage_dirty_ = false;
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status PersistentHashMap::PersistMetadataToDisk(
+    bool force) {
+  // We can skip persisting metadata to disk only if both info and storage are
+  // clean.
+  if (!force && !is_info_dirty() && !is_storage_dirty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  // Changes should have been applied to the underlying file when using
+  // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
+  // extra safety step to ensure they are written out.
+  ICING_RETURN_IF_ERROR(metadata_mmapped_file_->PersistToDisk());
+  is_info_dirty_ = false;
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<Crc32> PersistentHashMap::ComputeInfoChecksum(
+    bool force) {
+  if (!force && !is_info_dirty()) {
+    return Crc32(crcs().component_crcs.info_crc);
+  }
+
+  return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32> PersistentHashMap::ComputeStoragesChecksum(
+    bool force) {
+  if (!force && !is_storage_dirty()) {
+    return Crc32(crcs().component_crcs.storages_crc);
+  }
+
+  // Compute crcs
+  ICING_ASSIGN_OR_RETURN(Crc32 bucket_storage_crc,
+                         bucket_storage_->ComputeChecksum());
+  ICING_ASSIGN_OR_RETURN(Crc32 entry_storage_crc,
+                         entry_storage_->ComputeChecksum());
+  ICING_ASSIGN_OR_RETURN(Crc32 kv_storage_crc, kv_storage_->ComputeChecksum());
+
+  return Crc32(bucket_storage_crc.Get() ^ entry_storage_crc.Get() ^
+               kv_storage_crc.Get());
+}
+
+libtextclassifier3::StatusOr<PersistentHashMap::EntryIndexPair>
+PersistentHashMap::FindEntryIndexByKey(int32_t bucket_idx,
+                                       std::string_view key) const {
+  // Iterate all entries in the bucket, compare with key, and return the entry
+  // index if exists.
+  ICING_ASSIGN_OR_RETURN(const Bucket* bucket,
+                         bucket_storage_->Get(bucket_idx));
+
+  int32_t prev_entry_idx = Entry::kInvalidIndex;
+  int32_t curr_entry_idx = bucket->head_entry_index();
+  while (curr_entry_idx != Entry::kInvalidIndex) {
+    ICING_ASSIGN_OR_RETURN(const Entry* entry,
+                           entry_storage_->Get(curr_entry_idx));
+    if (entry->key_value_index() == kInvalidKVIndex) {
+      ICING_LOG(ERROR) << "Got an invalid key value index in the persistent "
+                          "hash map bucket. This shouldn't happen";
+      return absl_ports::InternalError("Unexpected invalid key value index");
+    }
+    ICING_ASSIGN_OR_RETURN(const char* kv_arr,
+                           kv_storage_->Get(entry->key_value_index()));
+    if (key.compare(kv_arr) == 0) {
+      return EntryIndexPair(curr_entry_idx, prev_entry_idx);
+    }
+
+    prev_entry_idx = curr_entry_idx;
+    curr_entry_idx = entry->next_entry_index();
+  }
+
+  return EntryIndexPair(curr_entry_idx, prev_entry_idx);
+}
+
+libtextclassifier3::Status PersistentHashMap::CopyEntryValue(
+    int32_t entry_idx, void* value) const {
+  ICING_ASSIGN_OR_RETURN(const Entry* entry, entry_storage_->Get(entry_idx));
+
+  ICING_ASSIGN_OR_RETURN(const char* kv_arr,
+                         kv_storage_->Get(entry->key_value_index()));
+  int32_t value_offset = strlen(kv_arr) + 1;
+  memcpy(value, kv_arr + value_offset, info().value_type_size);
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status PersistentHashMap::Insert(int32_t bucket_idx,
+                                                     std::string_view key,
+                                                     const void* value) {
+  SetDirty();
+
+  // If entry_storage_->num_elements() + 1 exceeds options_.max_num_entries,
+  // then return error.
+  // We compute max_file_size of 3 storages by options_.max_num_entries. Since
+  // we won't recycle space of deleted entries (and key-value bytes), they're
+  // still occupying space in storages. Even if # of "active" entries (i.e.
+  // size()) doesn't exceed options_.max_num_entries, the new kvp to be inserted
+  // still potentially exceeds max_file_size.
+  // Therefore, we should use entry_storage_->num_elements() instead of size()
+  // to check. This feature avoids storages being grown extremely large when
+  // there are many Delete() and Put() operations.
+  if (entry_storage_->num_elements() > options_.max_num_entries - 1) {
+    return absl_ports::ResourceExhaustedError("Cannot insert new entry");
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      typename FileBackedVector<Bucket>::MutableView mutable_bucket,
+      bucket_storage_->GetMutable(bucket_idx));
+
+  // Append new key value.
+  int32_t new_kv_idx = kv_storage_->num_elements();
+  int32_t kv_len = key.size() + 1 + info().value_type_size;
+  int32_t value_offset = key.size() + 1;
+  ICING_ASSIGN_OR_RETURN(
+      typename FileBackedVector<char>::MutableArrayView mutable_new_kv_arr,
+      kv_storage_->Allocate(kv_len));
+  mutable_new_kv_arr.SetArray(/*idx=*/0, key.data(), key.size());
+  mutable_new_kv_arr.SetArray(/*idx=*/key.size(), "\0", 1);
+  mutable_new_kv_arr.SetArray(/*idx=*/value_offset,
+                              reinterpret_cast<const char*>(value),
+                              info().value_type_size);
+
+  // Append new entry.
+  int32_t new_entry_idx = entry_storage_->num_elements();
+  ICING_RETURN_IF_ERROR(entry_storage_->Append(
+      Entry(new_kv_idx, mutable_bucket.Get().head_entry_index())));
+  mutable_bucket.Get().set_head_entry_index(new_entry_idx);
+
+  return RehashIfNecessary(/*force_rehash=*/false);
+}
+
+libtextclassifier3::Status PersistentHashMap::RehashIfNecessary(
+    bool force_rehash) {
+  int32_t new_num_bucket = bucket_storage_->num_elements();
+  while (new_num_bucket <= Bucket::kMaxNumBuckets / 2 &&
+         size() > static_cast<int64_t>(new_num_bucket) *
+                      info().max_load_factor_percent / 100) {
+    new_num_bucket *= 2;
+  }
+
+  if (!force_rehash && new_num_bucket == bucket_storage_->num_elements()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  SetDirty();
+
+  // Resize and reset buckets.
+  ICING_RETURN_IF_ERROR(
+      bucket_storage_->Set(0, new_num_bucket, Bucket(Entry::kInvalidIndex)));
+
+  // Iterate all key value pairs in kv_storage, rehash and insert.
+  Iterator iter = GetIterator();
+  int32_t entry_idx = 0;
+  while (iter.Advance()) {
+    ICING_ASSIGN_OR_RETURN(int32_t bucket_idx,
+                           HashKeyToBucketIndex(iter.GetKey(), new_num_bucket));
+    ICING_ASSIGN_OR_RETURN(FileBackedVector<Bucket>::MutableView mutable_bucket,
+                           bucket_storage_->GetMutable(bucket_idx));
+
+    // Update entry and bucket.
+    ICING_RETURN_IF_ERROR(entry_storage_->Set(
+        entry_idx,
+        Entry(iter.GetIndex(), mutable_bucket.Get().head_entry_index())));
+    mutable_bucket.Get().set_head_entry_index(entry_idx);
+
+    ++entry_idx;
+  }
+
+  // Since there will be some deleted entries, after rehashing entry_storage_
+  // # of vector elements may be greater than the actual # of entries.
+  // Therefore, we have to truncate entry_storage_ to the correct size.
+  if (entry_idx < entry_storage_->num_elements()) {
+    ICING_RETURN_IF_ERROR(entry_storage_->TruncateTo(entry_idx));
+  }
+
+  info().num_deleted_entries = 0;
+
+  return libtextclassifier3::Status::OK;
+}
+
+bool PersistentHashMap::Iterator::Advance() {
+  // Jump over the current key value pair before advancing to the next valid
+  // key value pair. In the first round (after construction), curr_key_len_
+  // is 0, so don't jump over anything.
+  if (curr_key_len_ != 0) {
+    curr_kv_idx_ += curr_key_len_ + 1 + map_->info().value_type_size;
+    curr_key_len_ = 0;
+  }
+
+  // By skipping null chars, we will be automatically handling deleted entries
+  // (which are zeroed out during deletion).
+  for (const char* curr_kv_ptr = map_->kv_storage_->array() + curr_kv_idx_;
+       curr_kv_idx_ < map_->kv_storage_->num_elements();
+       ++curr_kv_ptr, ++curr_kv_idx_) {
+    if (*curr_kv_ptr != '\0') {
+      curr_key_len_ = strlen(curr_kv_ptr);
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/persistent-hash-map.h b/icing/file/persistent-hash-map.h
new file mode 100644
index 0000000..5f7999d
--- /dev/null
+++ b/icing/file/persistent-hash-map.h
@@ -0,0 +1,529 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_PERSISTENT_HASH_MAP_H_
+#define ICING_FILE_PERSISTENT_HASH_MAP_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// Low level persistent hash map.
+// It supports variant length serialized key + fixed length serialized value.
+// Key and value can be any type, but callers should serialize key/value by
+// themselves and pass raw bytes into the hash map, and the serialized key
+// should not contain termination character '\0'.
+class PersistentHashMap : public PersistentStorage {
+ public:
+  // For iterating through persistent hash map. The order is not guaranteed.
+  //
+  // Not thread-safe.
+  //
+  // Change in underlying persistent hash map invalidates iterator.
+  class Iterator {
+   public:
+    // Advance to the next entry.
+    //
+    // Returns:
+    //   True on success, otherwise false.
+    bool Advance();
+
+    int32_t GetIndex() const { return curr_kv_idx_; }
+
+    // Get the key.
+    //
+    // REQUIRES: The preceding call for Advance() is true.
+    std::string_view GetKey() const {
+      return std::string_view(map_->kv_storage_->array() + curr_kv_idx_,
+                              curr_key_len_);
+    }
+
+    // Get the memory mapped address of the value.
+    //
+    // REQUIRES: The preceding call for Advance() is true.
+    const void* GetValue() const {
+      return static_cast<const void*>(map_->kv_storage_->array() +
+                                      curr_kv_idx_ + curr_key_len_ + 1);
+    }
+
+   private:
+    explicit Iterator(const PersistentHashMap* map)
+        : map_(map), curr_kv_idx_(0), curr_key_len_(0) {}
+
+    // Does not own
+    const PersistentHashMap* map_;
+
+    int32_t curr_kv_idx_;
+    int32_t curr_key_len_;
+
+    friend class PersistentHashMap;
+  };
+
+  // Metadata file layout: <Crcs><Info>
+  static constexpr int32_t kCrcsMetadataFileOffset = 0;
+  static constexpr int32_t kInfoMetadataFileOffset =
+      static_cast<int32_t>(sizeof(Crcs));
+
+  struct Info {
+    static constexpr int32_t kMagic = 0x653afd7b;
+
+    int32_t magic;
+    int32_t value_type_size;
+    int32_t max_load_factor_percent;
+    int32_t num_deleted_entries;
+    int32_t num_deleted_key_value_bytes;
+
+    Crc32 ComputeChecksum() const {
+      return Crc32(
+          std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
+    }
+  } __attribute__((packed));
+  static_assert(sizeof(Info) == 20, "");
+
+  static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+  static_assert(kMetadataFileSize == 32, "");
+
+  // Bucket
+  class Bucket {
+   public:
+    // Absolute max # of buckets allowed. Since we're using FileBackedVector to
+    // store buckets, add some static_asserts to ensure numbers here are
+    // compatible with FileBackedVector.
+    static constexpr int32_t kMaxNumBuckets = 1 << 24;
+
+    explicit Bucket(int32_t head_entry_index = Entry::kInvalidIndex)
+        : head_entry_index_(head_entry_index) {}
+
+    // For FileBackedVector
+    bool operator==(const Bucket& other) const {
+      return head_entry_index_ == other.head_entry_index_;
+    }
+
+    int32_t head_entry_index() const { return head_entry_index_; }
+    void set_head_entry_index(int32_t head_entry_index) {
+      head_entry_index_ = head_entry_index;
+    }
+
+   private:
+    int32_t head_entry_index_;
+  } __attribute__((packed));
+  static_assert(sizeof(Bucket) == 4, "");
+  static_assert(sizeof(Bucket) == FileBackedVector<Bucket>::kElementTypeSize,
+                "Bucket type size is inconsistent with FileBackedVector "
+                "element type size");
+  static_assert(Bucket::kMaxNumBuckets <=
+                    (FileBackedVector<Bucket>::kMaxFileSize -
+                     FileBackedVector<Bucket>::Header::kHeaderSize) /
+                        FileBackedVector<Bucket>::kElementTypeSize,
+                "Max # of buckets cannot fit into FileBackedVector");
+
+  // Entry
+  class Entry {
+   public:
+    // Absolute max # of entries allowed. Since we're using FileBackedVector to
+    // store entries, add some static_asserts to ensure numbers here are
+    // compatible with FileBackedVector.
+    //
+    // Still the actual max # of entries are determined by key-value storage,
+    // since length of the key varies and affects # of actual key-value pairs
+    // that can be stored.
+    static constexpr int32_t kMaxNumEntries = 1 << 23;
+    static constexpr int32_t kMaxIndex = kMaxNumEntries - 1;
+    static constexpr int32_t kInvalidIndex = -1;
+
+    explicit Entry(int32_t key_value_index, int32_t next_entry_index)
+        : key_value_index_(key_value_index),
+          next_entry_index_(next_entry_index) {}
+
+    bool operator==(const Entry& other) const {
+      return key_value_index_ == other.key_value_index_ &&
+             next_entry_index_ == other.next_entry_index_;
+    }
+
+    int32_t key_value_index() const { return key_value_index_; }
+    void set_key_value_index(int32_t key_value_index) {
+      key_value_index_ = key_value_index;
+    }
+
+    int32_t next_entry_index() const { return next_entry_index_; }
+    void set_next_entry_index(int32_t next_entry_index) {
+      next_entry_index_ = next_entry_index;
+    }
+
+   private:
+    int32_t key_value_index_;
+    int32_t next_entry_index_;
+  } __attribute__((packed));
+  static_assert(sizeof(Entry) == 8, "");
+  static_assert(sizeof(Entry) == FileBackedVector<Entry>::kElementTypeSize,
+                "Entry type size is inconsistent with FileBackedVector "
+                "element type size");
+  static_assert(Entry::kMaxNumEntries <=
+                    (FileBackedVector<Entry>::kMaxFileSize -
+                     FileBackedVector<Entry>::Header::kHeaderSize) /
+                        FileBackedVector<Entry>::kElementTypeSize,
+                "Max # of entries cannot fit into FileBackedVector");
+
+  // Key-value serialized type
+  static constexpr int32_t kMaxKVTotalByteSize = 1 << 28;
+  static constexpr int32_t kMaxKVIndex = kMaxKVTotalByteSize - 1;
+  static constexpr int32_t kInvalidKVIndex = -1;
+  static_assert(sizeof(char) == FileBackedVector<char>::kElementTypeSize,
+                "Char type size is inconsistent with FileBackedVector element "
+                "type size");
+  static_assert(kMaxKVTotalByteSize <=
+                    FileBackedVector<char>::kMaxFileSize -
+                        FileBackedVector<char>::Header::kHeaderSize,
+                "Max total byte size of key value pairs cannot fit into "
+                "FileBackedVector");
+
+  static constexpr int32_t kMaxValueTypeSize = 1 << 10;
+
+  struct Options {
+    static constexpr int32_t kDefaultMaxLoadFactorPercent = 100;
+    static constexpr int32_t kDefaultAverageKVByteSize = 32;
+    static constexpr int32_t kDefaultInitNumBuckets = 1 << 13;
+
+    explicit Options(
+        int32_t value_type_size_in,
+        int32_t max_num_entries_in = Entry::kMaxNumEntries,
+        int32_t max_load_factor_percent_in = kDefaultMaxLoadFactorPercent,
+        int32_t average_kv_byte_size_in = kDefaultAverageKVByteSize,
+        int32_t init_num_buckets_in = kDefaultInitNumBuckets,
+        bool pre_mapping_fbv_in = false)
+        : value_type_size(value_type_size_in),
+          max_num_entries(max_num_entries_in),
+          max_load_factor_percent(max_load_factor_percent_in),
+          average_kv_byte_size(average_kv_byte_size_in),
+          init_num_buckets(init_num_buckets_in),
+          pre_mapping_fbv(pre_mapping_fbv_in) {}
+
+    bool IsValid() const;
+
+    // (fixed) size of the serialized value type for hash map.
+    int32_t value_type_size;
+
+    // Max # of entries, default Entry::kMaxNumEntries.
+    int32_t max_num_entries;
+
+    // Percentage of the max loading for the hash map. If load_factor_percent
+    // exceeds max_load_factor_percent, then rehash will be invoked (and # of
+    // buckets will be doubled).
+    //   load_factor_percent = 100 * num_keys / num_buckets
+    //
+    // Note that load_factor_percent exceeding 100 is considered valid.
+    int32_t max_load_factor_percent;
+
+    // Average byte size of a key value pair. It is used to estimate kv_storage_
+    // pre_mapping_mmap_size.
+    int32_t average_kv_byte_size;
+
+    // Initial # of buckets for the persistent hash map. It should be 2's power.
+    // It is used when creating new persistent hash map and ignored when
+    // creating the instance from existing files.
+    int32_t init_num_buckets;
+
+    // Flag indicating whether memory map max possible file size for underlying
+    // FileBackedVector before growing the actual file size.
+    bool pre_mapping_fbv;
+  };
+
+  static constexpr WorkingPathType kWorkingPathType =
+      WorkingPathType::kDirectory;
+  static constexpr std::string_view kFilePrefix = "persistent_hash_map";
+
+  // Creates a new PersistentHashMap to read/write/delete key value pairs.
+  //
+  // filesystem: Object to make system level calls
+  // working_path: Specifies the working path for PersistentStorage.
+  //               PersistentHashMap uses working path as working directory and
+  //               all related files will be stored under this directory. It
+  //               takes full ownership and of working_path_, including
+  //               creation/deletion. It is the caller's responsibility to
+  //               specify correct working path and avoid mixing different
+  //               persistent storages together under the same path. Also the
+  //               caller has the ownership for the parent directory of
+  //               working_path_, and it is responsible for parent directory
+  //               creation/deletion. See PersistentStorage for more details
+  //               about the concept of working_path.
+  // options: Options instance.
+  //
+  // Returns:
+  //   INVALID_ARGUMENT_ERROR if any value in options is invalid.
+  //   FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+  //                             checksum or any other inconsistency.
+  //   INTERNAL_ERROR on I/O errors.
+  //   Any FileBackedVector errors.
+  static libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+  Create(const Filesystem& filesystem, std::string working_path,
+         Options options);
+
+  // Deletes PersistentHashMap under working_path.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+                                            std::string working_path) {
+    return PersistentStorage::Discard(filesystem, working_path,
+                                      kWorkingPathType);
+  }
+
+  ~PersistentHashMap() override;
+
+  // Update a key value pair. If key does not exist, then insert (key, value)
+  // into the storage. Otherwise overwrite the value into the storage.
+  //
+  // REQUIRES: the buffer pointed to by value must be of value_size()
+  //
+  // Returns:
+  //   OK on success
+  //   RESOURCE_EXHAUSTED_ERROR if # of entries reach options_.max_num_entries
+  //   INVALID_ARGUMENT_ERROR if the key is invalid (i.e. contains '\0')
+  //   INTERNAL_ERROR on I/O error or any data inconsistency
+  //   Any FileBackedVector errors
+  libtextclassifier3::Status Put(std::string_view key, const void* value);
+
+  // If key does not exist, then insert (key, next_value) into the storage.
+  // Otherwise, copy the hash map value into next_value.
+  //
+  // REQUIRES: the buffer pointed to by next_value must be of value_size()
+  //
+  // Returns:
+  //   OK on success
+  //   INVALID_ARGUMENT_ERROR if the key is invalid (i.e. contains '\0')
+  //   INTERNAL_ERROR on I/O error or any data inconsistency
+  //   Any FileBackedVector errors
+  libtextclassifier3::Status GetOrPut(std::string_view key, void* next_value);
+
+  // Get the value by key from the storage. If key exists, then copy the hash
+  // map value into into value buffer. Otherwise, return NOT_FOUND_ERROR.
+  //
+  // REQUIRES: the buffer pointed to by value must be of value_size()
+  //
+  // Returns:
+  //   OK on success
+  //   NOT_FOUND_ERROR if the key doesn't exist
+  //   INVALID_ARGUMENT_ERROR if the key is invalid (i.e. contains '\0')
+  //   INTERNAL_ERROR on I/O error or any data inconsistency
+  //   Any FileBackedVector errors
+  libtextclassifier3::Status Get(std::string_view key, void* value) const;
+
+  // Delete the key value pair from the storage. If key doesn't exist, then do
+  // nothing and return NOT_FOUND_ERROR.
+  //
+  // Returns:
+  //   OK on success
+  //   NOT_FOUND_ERROR if the key doesn't exist
+  //   INVALID_ARGUMENT_ERROR if the key is invalid (i.e. contains '\0')
+  //   INTERNAL_ERROR on I/O error or any data inconsistency
+  //   Any FileBackedVector errors
+  libtextclassifier3::Status Delete(std::string_view key);
+
+  Iterator GetIterator() const { return Iterator(this); }
+
+  // Calculates and returns the disk usage (metadata + 3 storages total file
+  // size) in bytes.
+  //
+  // Returns:
+  //   Disk usage on success
+  //   INTERNAL_ERROR on I/O error
+  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
+  // Returns the total file size of the all the elements held in the persistent
+  // hash map. File size is in bytes. This excludes the size of any internal
+  // metadata, i.e. crcs/info of persistent hash map, file backed vector's
+  // header.
+  //
+  // Returns:
+  //   File size on success
+  //   INTERNAL_ERROR on I/O error
+  libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+
+  int32_t size() const {
+    return entry_storage_->num_elements() - info().num_deleted_entries;
+  }
+
+  bool empty() const { return size() == 0; }
+
+  int32_t num_buckets() const { return bucket_storage_->num_elements(); }
+
+ private:
+  struct EntryIndexPair {
+    int32_t target_entry_index;
+    int32_t prev_entry_index;
+
+    explicit EntryIndexPair(int32_t target_entry_index_in,
+                            int32_t prev_entry_index_in)
+        : target_entry_index(target_entry_index_in),
+          prev_entry_index(prev_entry_index_in) {}
+  };
+
+  explicit PersistentHashMap(
+      const Filesystem& filesystem, std::string&& working_path,
+      Options&& options, MemoryMappedFile&& metadata_mmapped_file,
+      std::unique_ptr<FileBackedVector<Bucket>> bucket_storage,
+      std::unique_ptr<FileBackedVector<Entry>> entry_storage,
+      std::unique_ptr<FileBackedVector<char>> kv_storage)
+      : PersistentStorage(filesystem, std::move(working_path),
+                          kWorkingPathType),
+        options_(std::move(options)),
+        metadata_mmapped_file_(std::make_unique<MemoryMappedFile>(
+            std::move(metadata_mmapped_file))),
+        bucket_storage_(std::move(bucket_storage)),
+        entry_storage_(std::move(entry_storage)),
+        kv_storage_(std::move(kv_storage)),
+        is_info_dirty_(false),
+        is_storage_dirty_(false) {}
+
+  static libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+  InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+                     Options&& options);
+
+  static libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+  InitializeExistingFiles(const Filesystem& filesystem,
+                          std::string&& working_path, Options&& options);
+
+  // Flushes contents of all storages to underlying files.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
+
+  // Flushes contents of metadata file.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
+
+  // Computes and returns Info checksum.
+  //
+  // Returns:
+  //   - Crc of the Info on success
+  libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
+
+  // Computes and returns all storages checksum. Checksums of bucket_storage_,
+  // entry_storage_ and kv_storage_ will be combined together by XOR.
+  //
+  // Returns:
+  //   - Crc of all storages on success
+  //   - INTERNAL_ERROR if any data inconsistency
+  libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+      bool force) override;
+
+  // Find the index of the target entry (that contains the key) from a bucket
+  // (specified by bucket index). Also return the previous entry index, since
+  // Delete() needs it to update the linked list and head entry index. The
+  // caller should specify the desired bucket index.
+  //
+  // Returns:
+  //   std::pair<int32_t, int32_t>: target entry index and previous entry index
+  //                                on success. If not found, then target entry
+  //                                index will be Entry::kInvalidIndex
+  //   INTERNAL_ERROR if any content inconsistency
+  //   Any FileBackedVector errors
+  libtextclassifier3::StatusOr<EntryIndexPair> FindEntryIndexByKey(
+      int32_t bucket_idx, std::string_view key) const;
+
+  // Copy the hash map value of the entry into value buffer.
+  //
+  // REQUIRES: entry_idx should be valid.
+  // REQUIRES: the buffer pointed to by value must be of value_size()
+  //
+  // Returns:
+  //   OK on success
+  //   Any FileBackedVector errors
+  libtextclassifier3::Status CopyEntryValue(int32_t entry_idx,
+                                            void* value) const;
+
+  // Insert a new key value pair into a bucket (specified by the bucket index).
+  // The caller should specify the desired bucket index and make sure that the
+  // key is not present in the hash map before calling.
+  //
+  // Returns:
+  //   OK on success
+  //   Any FileBackedVector errors
+  libtextclassifier3::Status Insert(int32_t bucket_idx, std::string_view key,
+                                    const void* value);
+
+  // Rehash function. If force_rehash is true or the hash map loading is greater
+  // than max_load_factor, then it will rehash all keys.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on I/O error or any data inconsistency
+  //   Any FileBackedVector errors
+  libtextclassifier3::Status RehashIfNecessary(bool force_rehash);
+
+  Crcs& crcs() override {
+    return *reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
+                                    kCrcsMetadataFileOffset);
+  }
+
+  const Crcs& crcs() const override {
+    return *reinterpret_cast<const Crcs*>(metadata_mmapped_file_->region() +
+                                          kCrcsMetadataFileOffset);
+  }
+
+  Info& info() {
+    return *reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
+                                    kInfoMetadataFileOffset);
+  }
+
+  const Info& info() const {
+    return *reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
+                                          kInfoMetadataFileOffset);
+  }
+
+  void SetInfoDirty() { is_info_dirty_ = true; }
+  // When storage is dirty, we have to set info dirty as well. So just expose
+  // SetDirty to set both.
+  void SetDirty() {
+    is_info_dirty_ = true;
+    is_storage_dirty_ = true;
+  }
+
+  bool is_info_dirty() const { return is_info_dirty_; }
+  bool is_storage_dirty() const { return is_storage_dirty_; }
+
+  Options options_;
+
+  std::unique_ptr<MemoryMappedFile> metadata_mmapped_file_;
+
+  // Storages
+  std::unique_ptr<FileBackedVector<Bucket>> bucket_storage_;
+  std::unique_ptr<FileBackedVector<Entry>> entry_storage_;
+  std::unique_ptr<FileBackedVector<char>> kv_storage_;
+
+  bool is_info_dirty_;
+  bool is_storage_dirty_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_PERSISTENT_HASH_MAP_H_
diff --git a/icing/file/persistent-hash-map_test.cc b/icing/file/persistent-hash-map_test.cc
new file mode 100644
index 0000000..5535629
--- /dev/null
+++ b/icing/file/persistent-hash-map_test.cc
@@ -0,0 +1,1577 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/persistent-hash-map.h"
+
+#include <cstring>
+#include <string_view>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+
+using ::testing::Contains;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+using ::testing::Key;
+using ::testing::Lt;
+using ::testing::Ne;
+using ::testing::Not;
+using ::testing::Pair;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+using ::testing::UnorderedElementsAre;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using Bucket = PersistentHashMap::Bucket;
+using Crcs = PersistentStorage::Crcs;
+using Entry = PersistentHashMap::Entry;
+using Info = PersistentHashMap::Info;
+using Options = PersistentHashMap::Options;
+
+static constexpr int32_t kCorruptedValueOffset = 3;
+static constexpr int32_t kTestInitNumBuckets = 1;
+
+class PersistentHashMapTest : public ::testing::TestWithParam<bool> {
+ protected:
+  void SetUp() override {
+    base_dir_ = GetTestTempDir() + "/icing";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    working_path_ = base_dir_ + "/persistent_hash_map_test";
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  std::vector<char> Serialize(int val) {
+    std::vector<char> ret(sizeof(val));
+    memcpy(ret.data(), &val, sizeof(val));
+    return ret;
+  }
+
+  libtextclassifier3::StatusOr<int> GetValueByKey(
+      PersistentHashMap* persistent_hash_map, std::string_view key) {
+    int val;
+    ICING_RETURN_IF_ERROR(persistent_hash_map->Get(key, &val));
+    return val;
+  }
+
+  std::unordered_map<std::string, int> GetAllKeyValuePairs(
+      PersistentHashMap::Iterator&& iter) {
+    std::unordered_map<std::string, int> kvps;
+
+    while (iter.Advance()) {
+      int val;
+      memcpy(&val, iter.GetValue(), sizeof(val));
+      kvps.emplace(iter.GetKey(), val);
+    }
+    return kvps;
+  }
+
+  Filesystem filesystem_;
+  std::string base_dir_;
+  std::string working_path_;
+};
+
+TEST_P(PersistentHashMapTest, OptionsInvalidValueTypeSize) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  ASSERT_TRUE(options.IsValid());
+
+  options.value_type_size = -1;
+  EXPECT_FALSE(options.IsValid());
+
+  options.value_type_size = 0;
+  EXPECT_FALSE(options.IsValid());
+
+  options.value_type_size = PersistentHashMap::kMaxValueTypeSize + 1;
+  EXPECT_FALSE(options.IsValid());
+}
+
+TEST_P(PersistentHashMapTest, OptionsInvalidMaxNumEntries) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  ASSERT_TRUE(options.IsValid());
+
+  options.max_num_entries = -1;
+  EXPECT_FALSE(options.IsValid());
+
+  options.max_num_entries = 0;
+  EXPECT_FALSE(options.IsValid());
+
+  options.max_num_entries = Entry::kMaxNumEntries + 1;
+  EXPECT_FALSE(options.IsValid());
+}
+
+TEST_P(PersistentHashMapTest, OptionsInvalidMaxLoadFactorPercent) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  ASSERT_TRUE(options.IsValid());
+
+  options.max_load_factor_percent = -1;
+  EXPECT_FALSE(options.IsValid());
+
+  options.max_load_factor_percent = 0;
+  EXPECT_FALSE(options.IsValid());
+}
+
+TEST_P(PersistentHashMapTest, OptionsInvalidAverageKVByteSize) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  ASSERT_TRUE(options.IsValid());
+
+  options.average_kv_byte_size = -1;
+  EXPECT_FALSE(options.IsValid());
+
+  options.average_kv_byte_size = 0;
+  EXPECT_FALSE(options.IsValid());
+}
+
+TEST_P(PersistentHashMapTest, OptionsInvalidInitNumBuckets) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  ASSERT_TRUE(options.IsValid());
+
+  options.init_num_buckets = -1;
+  EXPECT_FALSE(options.IsValid());
+
+  options.init_num_buckets = 0;
+  EXPECT_FALSE(options.IsValid());
+
+  options.init_num_buckets = Bucket::kMaxNumBuckets + 1;
+  EXPECT_FALSE(options.IsValid());
+
+  // not 2's power
+  options.init_num_buckets = 3;
+  EXPECT_FALSE(options.IsValid());
+}
+
+TEST_P(PersistentHashMapTest, OptionsNumBucketsRequiredExceedsMaxNumBuckets) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  ASSERT_TRUE(options.IsValid());
+
+  options.max_num_entries = Entry::kMaxNumEntries;
+  options.max_load_factor_percent = 30;
+  EXPECT_FALSE(options.IsValid());
+}
+
+TEST_P(PersistentHashMapTest,
+       OptionsEstimatedNumKeyValuePairExceedsStorageMaxSize) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  ASSERT_TRUE(options.IsValid());
+
+  options.max_num_entries = 1 << 20;
+  options.average_kv_byte_size = 1 << 20;
+  ASSERT_THAT(static_cast<int64_t>(options.max_num_entries) *
+                  options.average_kv_byte_size,
+              Gt(PersistentHashMap::kMaxKVTotalByteSize));
+  EXPECT_FALSE(options.IsValid());
+}
+
+TEST_P(PersistentHashMapTest, InvalidWorkingPath) {
+  EXPECT_THAT(PersistentHashMap::Create(
+                  filesystem_, "/dev/null/persistent_hash_map_test",
+                  Options(/*value_type_size_in=*/sizeof(int))),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_P(PersistentHashMapTest, CreateWithInvalidOptionsShouldFail) {
+  Options invalid_options(/*value_type_size_in=*/-1);
+  invalid_options.pre_mapping_fbv = GetParam();
+  ASSERT_FALSE(invalid_options.IsValid());
+
+  EXPECT_THAT(
+      PersistentHashMap::Create(filesystem_, working_path_, invalid_options),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(PersistentHashMapTest, InitializeNewFiles) {
+  {
+    ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
+
+    Options options(/*value_type_size_in=*/sizeof(int));
+    options.pre_mapping_fbv = GetParam();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_,
+                                  std::move(options)));
+    EXPECT_THAT(persistent_hash_map, Pointee(IsEmpty()));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+
+  // Metadata file should be initialized correctly for both info and crcs
+  // sections.
+  const std::string metadata_file_path = absl_ports::StrCat(
+      working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
+  ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+  ASSERT_TRUE(metadata_sfd.is_valid());
+
+  // Check info section
+  Info info;
+  ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+                                PersistentHashMap::kInfoMetadataFileOffset));
+  EXPECT_THAT(info.magic, Eq(Info::kMagic));
+  EXPECT_THAT(info.value_type_size, Eq(sizeof(int)));
+  EXPECT_THAT(info.max_load_factor_percent,
+              Eq(Options::kDefaultMaxLoadFactorPercent));
+  EXPECT_THAT(info.num_deleted_entries, Eq(0));
+  EXPECT_THAT(info.num_deleted_key_value_bytes, Eq(0));
+
+  // Check crcs section
+  Crcs crcs;
+  ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+                                PersistentHashMap::kCrcsMetadataFileOffset));
+  // # of elements in bucket_storage should be 1, so it should have non-zero
+  // all storages crc value.
+  EXPECT_THAT(crcs.component_crcs.storages_crc, Ne(0));
+  EXPECT_THAT(crcs.component_crcs.info_crc,
+              Eq(Crc32(std::string_view(reinterpret_cast<const char*>(&info),
+                                        sizeof(Info)))
+                     .Get()));
+  EXPECT_THAT(crcs.all_crc,
+              Eq(Crc32(std::string_view(
+                           reinterpret_cast<const char*>(&crcs.component_crcs),
+                           sizeof(Crcs::ComponentCrcs)))
+                     .Get()));
+}
+
+TEST_P(PersistentHashMapTest, InitializeNewFilesWithCustomInitNumBuckets) {
+  int custom_init_num_buckets = 128;
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/custom_init_num_buckets,
+              /*pre_mapping_fbv=*/GetParam())));
+  EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(custom_init_num_buckets));
+}
+
+TEST_P(PersistentHashMapTest,
+       InitializeNewFilesWithInitNumBucketsSmallerThanNumBucketsRequired) {
+  int init_num_buckets = 65536;
+
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/1,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/init_num_buckets,
+              /*pre_mapping_fbv=*/GetParam())));
+  EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(init_num_buckets));
+}
+
+TEST_P(PersistentHashMapTest, InitNumBucketsShouldNotAffectExistingFiles) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  options.pre_mapping_fbv = GetParam();
+
+  int original_init_num_buckets = 4;
+  {
+    options.init_num_buckets = original_init_num_buckets;
+    ASSERT_TRUE(options.IsValid());
+
+    // Create new persistent hash map
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+    EXPECT_THAT(persistent_hash_map->num_buckets(),
+                Eq(original_init_num_buckets));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+
+  // Set new init_num_buckets.
+  options.init_num_buckets = 8;
+  ASSERT_TRUE(options.IsValid());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(filesystem_, working_path_, options));
+  // # of buckets should still be the original value.
+  EXPECT_THAT(persistent_hash_map->num_buckets(),
+              Eq(original_init_num_buckets));
+}
+
+TEST_P(PersistentHashMapTest,
+       InitializationShouldFailWithoutPersistToDiskOrDestruction) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  options.pre_mapping_fbv = GetParam();
+
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(filesystem_, working_path_, options));
+
+  // Put some key value pairs.
+  ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+  ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
+  ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data()));
+  // Call Delete() to change PersistentHashMap metadata info
+  // (num_deleted_entries)
+  ICING_ASSERT_OK(persistent_hash_map->Delete("c"));
+
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
+  ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
+  ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2));
+
+  // Without calling PersistToDisk, checksums will not be recomputed or synced
+  // to disk, so initializing another instance on the same files should fail.
+  EXPECT_THAT(PersistentHashMap::Create(filesystem_, working_path_, options),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_P(PersistentHashMapTest, InitializationShouldSucceedWithPersistToDisk) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  options.pre_mapping_fbv = GetParam();
+
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map1,
+      PersistentHashMap::Create(filesystem_, working_path_, options));
+
+  // Put some key value pairs.
+  ICING_ASSERT_OK(persistent_hash_map1->Put("a", Serialize(1).data()));
+  ICING_ASSERT_OK(persistent_hash_map1->Put("b", Serialize(2).data()));
+  ICING_ASSERT_OK(persistent_hash_map1->Put("c", Serialize(3).data()));
+  // Call Delete() to change PersistentHashMap metadata info
+  // (num_deleted_entries)
+  ICING_ASSERT_OK(persistent_hash_map1->Delete("c"));
+
+  ASSERT_THAT(persistent_hash_map1, Pointee(SizeIs(2)));
+  ASSERT_THAT(GetValueByKey(persistent_hash_map1.get(), "a"), IsOkAndHolds(1));
+  ASSERT_THAT(GetValueByKey(persistent_hash_map1.get(), "b"), IsOkAndHolds(2));
+
+  // After calling PersistToDisk, all checksums should be recomputed and synced
+  // correctly to disk, so initializing another instance on the same files
+  // should succeed, and we should be able to get the same contents.
+  ICING_EXPECT_OK(persistent_hash_map1->PersistToDisk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map2,
+      PersistentHashMap::Create(filesystem_, working_path_, options));
+  EXPECT_THAT(persistent_hash_map2, Pointee(SizeIs(2)));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map2.get(), "a"), IsOkAndHolds(1));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map2.get(), "b"), IsOkAndHolds(2));
+}
+
+TEST_P(PersistentHashMapTest, InitializationShouldSucceedAfterDestruction) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  options.pre_mapping_fbv = GetParam();
+
+  {
+    // Create new persistent hash map
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+    ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+    ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
+    ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data()));
+    // Call Delete() to change PersistentHashMap metadata info
+    // (num_deleted_entries)
+    ICING_ASSERT_OK(persistent_hash_map->Delete("c"));
+
+    ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
+    ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
+    ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2));
+  }
+
+  {
+    // The previous instance went out of scope and was destructed. Although we
+    // didn't call PersistToDisk explicitly, the destructor should invoke it and
+    // thus initializing another instance on the same files should succeed, and
+    // we should be able to get the same contents.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+    EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
+    EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
+    EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2));
+  }
+}
+
+TEST_P(PersistentHashMapTest,
+       InitializeExistingFilesWithDifferentMagicShouldFail) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  options.pre_mapping_fbv = GetParam();
+
+  {
+    // Create new persistent hash map
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+    ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+
+  {
+    // Manually change kMagic and update checksum
+    const std::string metadata_file_path = absl_ports::StrCat(
+        working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
+    ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+    ASSERT_TRUE(metadata_sfd.is_valid());
+
+    Crcs crcs;
+    ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+                                  PersistentHashMap::kCrcsMetadataFileOffset));
+
+    Info info;
+    ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+                                  PersistentHashMap::kInfoMetadataFileOffset));
+
+    // Manually change magic and update checksums.
+    info.magic += kCorruptedValueOffset;
+    crcs.component_crcs.info_crc = info.ComputeChecksum().Get();
+    crcs.all_crc = crcs.component_crcs.ComputeChecksum().Get();
+    ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+                                   PersistentHashMap::kCrcsMetadataFileOffset,
+                                   &crcs, sizeof(Crcs)));
+    ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+                                   PersistentHashMap::kInfoMetadataFileOffset,
+                                   &info, sizeof(Info)));
+  }
+
+  {
+    // Attempt to create the persistent hash map with different magic. This
+    // should fail.
+    libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+        persistent_hash_map_or =
+            PersistentHashMap::Create(filesystem_, working_path_, options);
+    EXPECT_THAT(persistent_hash_map_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(persistent_hash_map_or.status().error_message(),
+                HasSubstr("PersistentHashMap header magic mismatch"));
+  }
+}
+
+TEST_P(PersistentHashMapTest,
+       InitializeExistingFilesWithDifferentValueTypeSizeShouldFail) {
+  {
+    // Create new persistent hash map
+    Options options(/*value_type_size_in=*/sizeof(int));
+    options.pre_mapping_fbv = GetParam();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+    ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+
+  {
+    // Attempt to create the persistent hash map with different value type size.
+    // This should fail.
+    ASSERT_THAT(sizeof(char), Ne(sizeof(int)));
+
+    Options options(/*value_type_size_in=*/sizeof(char));
+    options.pre_mapping_fbv = GetParam();
+    libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+        persistent_hash_map_or =
+            PersistentHashMap::Create(filesystem_, working_path_, options);
+    EXPECT_THAT(persistent_hash_map_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(persistent_hash_map_or.status().error_message(),
+                HasSubstr("Incorrect value type size"));
+  }
+}
+
+TEST_P(PersistentHashMapTest,
+       InitializeExistingFilesWithMaxNumEntriesSmallerThanSizeShouldFail) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  options.pre_mapping_fbv = GetParam();
+
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(filesystem_, working_path_, options));
+  ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+  ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
+
+  ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+
+  {
+    // Attempt to create the persistent hash map with max num entries smaller
+    // than the current size. This should fail.
+    options.max_num_entries = 1;
+    ASSERT_TRUE(options.IsValid());
+
+    EXPECT_THAT(PersistentHashMap::Create(filesystem_, working_path_, options),
+                StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  }
+
+  // Delete 1 kvp.
+  ICING_ASSERT_OK(persistent_hash_map->Delete("a"));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(1)));
+  ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+
+  {
+    // Attempt to create the persistent hash map with max num entries:
+    // - Not smaller than current # of active kvps.
+    // - Smaller than # of all inserted kvps (regardless of activeness).
+    // This should fail.
+    options.max_num_entries = 1;
+    ASSERT_TRUE(options.IsValid());
+
+    EXPECT_THAT(PersistentHashMap::Create(filesystem_, working_path_, options),
+                StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  }
+}
+
+TEST_P(PersistentHashMapTest, InitializeExistingFilesWithWrongAllCrc) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  options.pre_mapping_fbv = GetParam();
+
+  {
+    // Create new persistent hash map
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+    ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+
+  const std::string metadata_file_path = absl_ports::StrCat(
+      working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
+  ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+  ASSERT_TRUE(metadata_sfd.is_valid());
+
+  Crcs crcs;
+  ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+                                PersistentHashMap::kCrcsMetadataFileOffset));
+
+  // Manually corrupt all_crc
+  crcs.all_crc += kCorruptedValueOffset;
+  ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+                                 PersistentHashMap::kCrcsMetadataFileOffset,
+                                 &crcs, sizeof(Crcs)));
+  metadata_sfd.reset();
+
+  {
+    // Attempt to create the persistent hash map with metadata containing
+    // corrupted all_crc. This should fail.
+    libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+        persistent_hash_map_or =
+            PersistentHashMap::Create(filesystem_, working_path_, options);
+    EXPECT_THAT(persistent_hash_map_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(persistent_hash_map_or.status().error_message(),
+                HasSubstr("Invalid all crc"));
+  }
+}
+
+TEST_P(PersistentHashMapTest,
+       InitializeExistingFilesWithCorruptedInfoShouldFail) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  options.pre_mapping_fbv = GetParam();
+
+  {
+    // Create new persistent hash map
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+    ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+
+  const std::string metadata_file_path = absl_ports::StrCat(
+      working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
+  ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+  ASSERT_TRUE(metadata_sfd.is_valid());
+
+  Info info;
+  ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+                                PersistentHashMap::kInfoMetadataFileOffset));
+
+  // Modify info, but don't update the checksum. This would be similar to
+  // corruption of info.
+  info.num_deleted_entries += kCorruptedValueOffset;
+  ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+                                 PersistentHashMap::kInfoMetadataFileOffset,
+                                 &info, sizeof(Info)));
+  {
+    // Attempt to create the persistent hash map with info that doesn't match
+    // its checksum and confirm that it fails.
+    libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+        persistent_hash_map_or =
+            PersistentHashMap::Create(filesystem_, working_path_, options);
+    EXPECT_THAT(persistent_hash_map_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(persistent_hash_map_or.status().error_message(),
+                HasSubstr("Invalid info crc"));
+  }
+}
+
+TEST_P(PersistentHashMapTest,
+       InitializeExistingFilesWithCorruptedBucketStorage) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  options.pre_mapping_fbv = GetParam();
+
+  {
+    // Create new persistent hash map
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+    ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+
+  {
+    // Update bucket storage manually.
+    const std::string bucket_storage_file_path = absl_ports::StrCat(
+        working_path_, "/", PersistentHashMap::kFilePrefix, ".b");
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<Bucket>> bucket_storage,
+        FileBackedVector<Bucket>::Create(
+            filesystem_, bucket_storage_file_path,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc,
+                               bucket_storage->ComputeChecksum());
+    ICING_ASSERT_OK(bucket_storage->Append(Bucket()));
+    ICING_ASSERT_OK(bucket_storage->PersistToDisk());
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc,
+                               bucket_storage->ComputeChecksum());
+    ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+  }
+
+  {
+    // Attempt to create the persistent hash map with metadata containing
+    // corrupted bucket_storage_crc. This should fail.
+    libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+        persistent_hash_map_or =
+            PersistentHashMap::Create(filesystem_, working_path_, options);
+    EXPECT_THAT(persistent_hash_map_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(persistent_hash_map_or.status().error_message(),
+                HasSubstr("Invalid storages crc"));
+  }
+}
+
+TEST_P(PersistentHashMapTest,
+       InitializeExistingFilesWithCorruptedEntryStorage) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  options.pre_mapping_fbv = GetParam();
+
+  {
+    // Create new persistent hash map
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+    ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+
+  {
+    // Update entry storage manually.
+    const std::string entry_storage_file_path = absl_ports::StrCat(
+        working_path_, "/", PersistentHashMap::kFilePrefix, ".e");
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<Entry>> entry_storage,
+        FileBackedVector<Entry>::Create(
+            filesystem_, entry_storage_file_path,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, entry_storage->ComputeChecksum());
+    ICING_ASSERT_OK(entry_storage->Append(
+        Entry(/*key_value_index=*/-1, /*next_entry_index=*/-1)));
+    ICING_ASSERT_OK(entry_storage->PersistToDisk());
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, entry_storage->ComputeChecksum());
+    ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+  }
+
+  {
+    // Attempt to create the persistent hash map with metadata containing
+    // corrupted entry_storage_crc. This should fail.
+    libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+        persistent_hash_map_or =
+            PersistentHashMap::Create(filesystem_, working_path_, options);
+    EXPECT_THAT(persistent_hash_map_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(persistent_hash_map_or.status().error_message(),
+                HasSubstr("Invalid storages crc"));
+  }
+}
+
+TEST_P(PersistentHashMapTest,
+       InitializeExistingFilesWithCorruptedKeyValueStorage) {
+  Options options(/*value_type_size_in=*/sizeof(int));
+  options.pre_mapping_fbv = GetParam();
+
+  {
+    // Create new persistent hash map
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+    ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+
+  {
+    // Update kv storage manually.
+    const std::string kv_storage_file_path = absl_ports::StrCat(
+        working_path_, "/", PersistentHashMap::kFilePrefix, ".k");
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> kv_storage,
+        FileBackedVector<char>::Create(
+            filesystem_, kv_storage_file_path,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, kv_storage->ComputeChecksum());
+    ICING_ASSERT_OK(kv_storage->Append('z'));
+    ICING_ASSERT_OK(kv_storage->PersistToDisk());
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, kv_storage->ComputeChecksum());
+    ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+  }
+
+  {
+    // Attempt to create the persistent hash map with metadata containing
+    // corrupted kv_storage_crc. This should fail.
+    libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+        persistent_hash_map_or =
+            PersistentHashMap::Create(filesystem_, working_path_, options);
+    EXPECT_THAT(persistent_hash_map_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(persistent_hash_map_or.status().error_message(),
+                HasSubstr("Invalid storages crc"));
+  }
+}
+
+TEST_P(PersistentHashMapTest,
+       InitializeExistingFilesAllowDifferentMaxLoadFactorPercent) {
+  Options options(
+      /*value_type_size_in=*/sizeof(int),
+      /*max_num_entries_in=*/Entry::kMaxNumEntries,
+      /*max_load_factor_percent_in=*/Options::kDefaultMaxLoadFactorPercent,
+      /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+      /*init_num_buckets_in=*/kTestInitNumBuckets,
+      /*pre_mapping_fbv=*/GetParam());
+
+  {
+    // Create new persistent hash map
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+    ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+    ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
+
+    ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
+    ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
+    ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+
+  {
+    // Set new max_load_factor_percent.
+    options.max_load_factor_percent = 200;
+    ASSERT_TRUE(options.IsValid());
+    ASSERT_THAT(options.max_load_factor_percent,
+                Ne(Options::kDefaultMaxLoadFactorPercent));
+
+    // Attempt to create the persistent hash map with different max load factor
+    // percent. This should succeed and metadata should be modified correctly.
+    // Also verify all entries should remain unchanged.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+
+    EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
+    EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
+    EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+
+  const std::string metadata_file_path = absl_ports::StrCat(
+      working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
+  ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+  ASSERT_TRUE(metadata_sfd.is_valid());
+
+  Info info;
+  ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+                                PersistentHashMap::kInfoMetadataFileOffset));
+  EXPECT_THAT(info.max_load_factor_percent,
+              Eq(options.max_load_factor_percent));
+
+  // Also should update crcs correctly. We test it by creating instance again
+  // and make sure it won't get corrupted crcs/info errors.
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+}
+
+TEST_P(PersistentHashMapTest,
+       InitializeExistingFilesWithDifferentMaxLoadFactorPercentShouldRehash) {
+  Options options(
+      /*value_type_size_in=*/sizeof(int),
+      /*max_num_entries_in=*/Entry::kMaxNumEntries,
+      /*max_load_factor_percent_in=*/Options::kDefaultMaxLoadFactorPercent,
+      /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+      /*init_num_buckets_in=*/kTestInitNumBuckets,
+      /*pre_mapping_fbv=*/GetParam());
+
+  double prev_loading_percent;
+  int prev_num_buckets;
+  {
+    // Create new persistent hash map
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+    ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+    ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
+    ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data()));
+
+    ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+    ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
+    ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2));
+    ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "c"), IsOkAndHolds(3));
+
+    prev_loading_percent = persistent_hash_map->size() * 100.0 /
+                           persistent_hash_map->num_buckets();
+    prev_num_buckets = persistent_hash_map->num_buckets();
+    ASSERT_THAT(prev_loading_percent,
+                Not(Gt(Options::kDefaultMaxLoadFactorPercent)));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+
+  {
+    // Set greater max_load_factor_percent.
+    options.max_load_factor_percent = 150;
+    ASSERT_TRUE(options.IsValid());
+    ASSERT_THAT(options.max_load_factor_percent, Gt(prev_loading_percent));
+
+    // Attempt to create the persistent hash map with max load factor greater
+    // than previous loading. There should be no rehashing and # of buckets
+    // should remain the same.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+
+    EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(prev_num_buckets));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+
+  {
+    // Set smaller max_load_factor_percent.
+    options.max_load_factor_percent = 50;
+    ASSERT_TRUE(options.IsValid());
+    ASSERT_THAT(options.max_load_factor_percent, Lt(prev_loading_percent));
+
+    // Attempt to create the persistent hash map with max load factor smaller
+    // than previous loading. There should be rehashing since the loading
+    // exceeds the limit.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<PersistentHashMap> persistent_hash_map,
+        PersistentHashMap::Create(filesystem_, working_path_, options));
+
+    // After changing max_load_factor_percent, there should be rehashing and the
+    // new loading should not be greater than the new max load factor.
+    EXPECT_THAT(persistent_hash_map->size() * 100.0 /
+                    persistent_hash_map->num_buckets(),
+                Not(Gt(options.max_load_factor_percent)));
+    EXPECT_THAT(persistent_hash_map->num_buckets(), Ne(prev_num_buckets));
+
+    EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
+    EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2));
+    EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "c"), IsOkAndHolds(3));
+
+    ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+  }
+}
+
+TEST_P(PersistentHashMapTest, PutAndGet) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv=*/GetParam())));
+
+  EXPECT_THAT(persistent_hash_map, Pointee(IsEmpty()));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-youtube.com"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  ICING_EXPECT_OK(
+      persistent_hash_map->Put("default-google.com", Serialize(100).data()));
+  ICING_EXPECT_OK(
+      persistent_hash_map->Put("default-youtube.com", Serialize(50).data()));
+
+  EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+              IsOkAndHolds(100));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-youtube.com"),
+              IsOkAndHolds(50));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "key-not-exist"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+}
+
+TEST_P(PersistentHashMapTest, PutShouldOverwriteValueIfKeyExists) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv=*/GetParam())));
+
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com", Serialize(100).data()));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(1)));
+  ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+              IsOkAndHolds(100));
+
+  ICING_EXPECT_OK(
+      persistent_hash_map->Put("default-google.com", Serialize(200).data()));
+  EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(1)));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+              IsOkAndHolds(200));
+
+  ICING_EXPECT_OK(
+      persistent_hash_map->Put("default-google.com", Serialize(300).data()));
+  EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(1)));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+              IsOkAndHolds(300));
+}
+
+TEST_P(PersistentHashMapTest, ShouldRehash) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv=*/GetParam())));
+
+  int original_num_buckets = persistent_hash_map->num_buckets();
+  // Insert 100 key value pairs. There should be rehashing so the loading of
+  // hash map doesn't exceed max_load_factor.
+  for (int i = 0; i < 100; ++i) {
+    std::string key = "default-google.com-" + std::to_string(i);
+    ICING_ASSERT_OK(persistent_hash_map->Put(key, &i));
+    ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(i + 1)));
+
+    EXPECT_THAT(persistent_hash_map->size() * 100.0 /
+                    persistent_hash_map->num_buckets(),
+                Not(Gt(Options::kDefaultMaxLoadFactorPercent)));
+  }
+  EXPECT_THAT(persistent_hash_map->num_buckets(), Ne(original_num_buckets));
+
+  // After rehashing, we should still be able to get all inserted entries.
+  for (int i = 0; i < 100; ++i) {
+    std::string key = "default-google.com-" + std::to_string(i);
+    EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), key), IsOkAndHolds(i));
+  }
+}
+
+TEST_P(PersistentHashMapTest, GetOrPutShouldPutIfKeyDoesNotExist) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv=*/GetParam())));
+
+  ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  int val = 1;
+  EXPECT_THAT(persistent_hash_map->GetOrPut("default-google.com", &val),
+              IsOk());
+  EXPECT_THAT(val, Eq(1));
+  EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(1)));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+              IsOkAndHolds(1));
+}
+
+TEST_P(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  ASSERT_THAT(
+      persistent_hash_map->Put("default-google.com", Serialize(1).data()),
+      IsOk());
+  ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+              IsOkAndHolds(1));
+
+  int val = 2;
+  EXPECT_THAT(persistent_hash_map->GetOrPut("default-google.com", &val),
+              IsOk());
+  EXPECT_THAT(val, Eq(1));
+  EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(1)));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+              IsOkAndHolds(1));
+}
+
+TEST_P(PersistentHashMapTest, Delete) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  // Delete a non-existing key should get NOT_FOUND error
+  EXPECT_THAT(persistent_hash_map->Delete("default-google.com"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com", Serialize(100).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-youtube.com", Serialize(50).data()));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
+
+  // Delete an existing key should succeed
+  ICING_EXPECT_OK(persistent_hash_map->Delete("default-google.com"));
+  EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(1)));
+  // The deleted key should not be found.
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  // Other key should remain unchanged and available
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-youtube.com"),
+              IsOkAndHolds(50));
+
+  // Insert back the deleted key. Should get new value
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com", Serialize(200).data()));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+              IsOkAndHolds(200));
+
+  // Delete again
+  ICING_EXPECT_OK(persistent_hash_map->Delete("default-google.com"));
+  EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(1)));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  // Other keys should remain unchanged and available
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-youtube.com"),
+              IsOkAndHolds(50));
+}
+
+TEST_P(PersistentHashMapTest, DeleteMultiple) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  std::unordered_map<std::string, int> existing_keys;
+  std::unordered_set<std::string> deleted_keys;
+  // Insert 100 key value pairs
+  for (int i = 0; i < 100; ++i) {
+    std::string key = "default-google.com-" + std::to_string(i);
+    ICING_ASSERT_OK(persistent_hash_map->Put(key, &i));
+    existing_keys[key] = i;
+  }
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(existing_keys.size())));
+
+  // Delete several keys.
+  // Simulate with std::unordered_map and verify.
+  std::vector<int> delete_target_ids{3, 4, 6, 9, 13, 18, 24, 31, 39, 48, 58};
+  for (const int delete_target_id : delete_target_ids) {
+    std::string key = "default-google.com-" + std::to_string(delete_target_id);
+    ASSERT_THAT(existing_keys, Contains(Key(key)));
+    ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), key),
+                IsOkAndHolds(existing_keys[key]));
+    ICING_EXPECT_OK(persistent_hash_map->Delete(key));
+
+    existing_keys.erase(key);
+    deleted_keys.insert(key);
+  }
+
+  // Deleted keys should not be found.
+  for (const std::string& deleted_key : deleted_keys) {
+    EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), deleted_key),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  }
+  // Other keys should remain unchanged and available
+  for (const auto& [existing_key, existing_value] : existing_keys) {
+    EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), existing_key),
+                IsOkAndHolds(existing_value));
+  }
+  // Verify by iterator as well
+  EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()),
+              Eq(existing_keys));
+}
+
+TEST_P(PersistentHashMapTest, DeleteBucketHeadElement) {
+  // Create new persistent hash map
+  // Set max_load_factor_percent as 1000. Load factor percent is calculated as
+  // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
+  // buckets in an empty PersistentHashMap) and a max_load_factor_percent of
+  // 1000, we would allow the insertion of up to 10 keys before rehashing.
+  // Preventing rehashing makes it much easier to test collisions.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/1000,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+  ASSERT_THAT(persistent_hash_map->num_buckets(), Eq(1));
+
+  // Delete the head element of the bucket. Note that in our implementation, the
+  // last added element will become the head element of the bucket.
+  ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-2"));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-0"),
+              IsOkAndHolds(0));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-1"),
+              IsOkAndHolds(1));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-2"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_P(PersistentHashMapTest, DeleteBucketIntermediateElement) {
+  // Create new persistent hash map
+  // Set max_load_factor_percent as 1000. Load factor percent is calculated as
+  // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
+  // buckets in an empty PersistentHashMap) and a max_load_factor_percent of
+  // 1000, we would allow the insertion of up to 10 keys before rehashing.
+  // Preventing rehashing makes it much easier to test collisions.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/1000,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+  ASSERT_THAT(persistent_hash_map->num_buckets(), Eq(1));
+
+  // Delete any intermediate element of the bucket.
+  ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-1"));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-0"),
+              IsOkAndHolds(0));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-1"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-2"),
+              IsOkAndHolds(2));
+}
+
+TEST_P(PersistentHashMapTest, DeleteBucketTailElement) {
+  // Create new persistent hash map
+  // Set max_load_factor_percent as 1000. Load factor percent is calculated as
+  // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
+  // buckets in an empty PersistentHashMap) and a max_load_factor_percent of
+  // 1000, we would allow the insertion of up to 10 keys before rehashing.
+  // Preventing rehashing makes it much easier to test collisions.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/1000,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+  ASSERT_THAT(persistent_hash_map->num_buckets(), Eq(1));
+
+  // Delete the last element of the bucket. Note that in our implementation, the
+  // first added element will become the tail element of the bucket.
+  ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-0"));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-0"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-1"),
+              IsOkAndHolds(1));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-2"),
+              IsOkAndHolds(2));
+}
+
+TEST_P(PersistentHashMapTest, DeleteBucketOnlySingleElement) {
+  // Create new persistent hash map
+  // Set max_load_factor_percent as 1000. Load factor percent is calculated as
+  // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
+  // buckets in an empty PersistentHashMap) and a max_load_factor_percent of
+  // 1000, we would allow the insertion of up to 10 keys before rehashing.
+  // Preventing rehashing makes it much easier to test collisions.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/1000,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com", Serialize(100).data()));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(1)));
+
+  // Delete the only single element of the bucket.
+  ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com"));
+  ASSERT_THAT(persistent_hash_map, Pointee(IsEmpty()));
+  EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_P(PersistentHashMapTest, OperationsWhenReachingMaxNumEntries) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/1,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com", Serialize(100).data()));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(1)));
+
+  // Put new key should fail.
+  EXPECT_THAT(
+      persistent_hash_map->Put("default-youtube.com", Serialize(50).data()),
+      StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  // Modify existing key should succeed.
+  EXPECT_THAT(
+      persistent_hash_map->Put("default-google.com", Serialize(200).data()),
+      IsOk());
+
+  // Put after delete should still fail. See the comment in
+  // PersistentHashMap::Insert for more details.
+  ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com"));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(0)));
+  EXPECT_THAT(
+      persistent_hash_map->Put("default-youtube.com", Serialize(50).data()),
+      StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST_P(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) {
+  // Create new persistent hash map
+  Options options(/*value_type_size_in=*/sizeof(int));
+  options.pre_mapping_fbv = GetParam();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(filesystem_, working_path_, options));
+
+  const char invalid_key[] = "a\0bc";
+  std::string_view invalid_key_view(invalid_key, 4);
+
+  int val = 1;
+  EXPECT_THAT(persistent_hash_map->Put(invalid_key_view, &val),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(persistent_hash_map->GetOrPut(invalid_key_view, &val),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(persistent_hash_map->Get(invalid_key_view, &val),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(persistent_hash_map->Delete(invalid_key_view),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(PersistentHashMapTest, EmptyHashMapIterator) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  EXPECT_FALSE(persistent_hash_map->GetIterator().Advance());
+}
+
+TEST_P(PersistentHashMapTest, Iterator) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  std::unordered_map<std::string, int> kvps;
+  // Insert 100 key value pairs
+  for (int i = 0; i < 100; ++i) {
+    std::string key = "default-google.com-" + std::to_string(i);
+    ICING_ASSERT_OK(persistent_hash_map->Put(key, &i));
+    kvps.emplace(key, i);
+  }
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(kvps.size())));
+
+  EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()),
+              Eq(kvps));
+}
+
+TEST_P(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+
+  // Delete the first key value pair.
+  ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-0"));
+  EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()),
+              UnorderedElementsAre(Pair("default-google.com-1", 1),
+                                   Pair("default-google.com-2", 2)));
+}
+
+TEST_P(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+
+  // Delete any intermediate key value pair.
+  ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-1"));
+  EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()),
+              UnorderedElementsAre(Pair("default-google.com-0", 0),
+                                   Pair("default-google.com-2", 2)));
+}
+
+TEST_P(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+
+  // Delete the last key value pair.
+  ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-2"));
+  EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()),
+              UnorderedElementsAre(Pair("default-google.com-0", 0),
+                                   Pair("default-google.com-1", 1)));
+}
+
+TEST_P(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) {
+  // Create new persistent hash map
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem_, working_path_,
+          Options(
+              /*value_type_size_in=*/sizeof(int),
+              /*max_num_entries_in=*/Entry::kMaxNumEntries,
+              /*max_load_factor_percent_in=*/
+              Options::kDefaultMaxLoadFactorPercent,
+              /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize,
+              /*init_num_buckets_in=*/kTestInitNumBuckets,
+              /*pre_mapping_fbv_in=*/GetParam())));
+
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+  ICING_ASSERT_OK(
+      persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+  ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+
+  // Delete all key value pairs.
+  ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-0"));
+  ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-1"));
+  ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-2"));
+  ASSERT_THAT(persistent_hash_map, Pointee(IsEmpty()));
+  EXPECT_FALSE(persistent_hash_map->GetIterator().Advance());
+}
+
+INSTANTIATE_TEST_SUITE_P(PersistentHashMapTest, PersistentHashMapTest,
+                         testing::Values(true, false));
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/persistent-storage.cc b/icing/file/persistent-storage.cc
new file mode 100644
index 0000000..9a595ef
--- /dev/null
+++ b/icing/file/persistent-storage.cc
@@ -0,0 +1,55 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/persistent-storage.h"
+
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/legacy/core/icing-string-util.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::Status PersistentStorage::Discard(
+    const Filesystem& filesystem, const std::string& working_path,
+    WorkingPathType working_path_type) {
+  switch (working_path_type) {
+    case WorkingPathType::kSingleFile: {
+      if (!filesystem.DeleteFile(working_path.c_str())) {
+        return absl_ports::InternalError(absl_ports::StrCat(
+            "Failed to delete PersistentStorage file: ", working_path));
+      }
+      return libtextclassifier3::Status::OK;
+    }
+    case WorkingPathType::kDirectory: {
+      if (!filesystem.DeleteDirectoryRecursively(working_path.c_str())) {
+        return absl_ports::InternalError(absl_ports::StrCat(
+            "Failed to delete PersistentStorage directory: ", working_path));
+      }
+      return libtextclassifier3::Status::OK;
+    }
+    case WorkingPathType::kDummy:
+      return libtextclassifier3::Status::OK;
+  }
+  return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+      "Unknown working path type %d for PersistentStorage %s",
+      static_cast<int>(working_path_type), working_path.c_str()));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/persistent-storage.h b/icing/file/persistent-storage.h
new file mode 100644
index 0000000..9cb5e4d
--- /dev/null
+++ b/icing/file/persistent-storage.h
@@ -0,0 +1,369 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_PERSISTENT_STORAGE_H_
+#define ICING_FILE_PERSISTENT_STORAGE_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// PersistentStorage: an abstract class for all persistent data structures.
+// - It provides some common persistent file methods, e.g. PersistToDisk.
+// - It encapsulates most of the checksum handling logics (including update and
+//   validation).
+//
+// Terminology:
+// - Crcs: checksum section
+// - Info: (custom) information for derived class
+// - Metadata: Crcs + Info
+//
+// Usually a persistent data structure will have its own custom Info and
+// storages (single or composite storages) definition. To create a new
+// persistent data structure via PersistentStorage:
+// - Decide what type the working path is (single file or directory). See
+//   working_path_ and WorkingPathType for more details.
+// - Create a new class that inherits PersistentStorage:
+//   - Declare custom Info and design the metadata section layout.
+//     Usually the layout is <Crcs><Info>, and there are 2 common ways to
+//     manage metadata section:
+//     - Have a separate file for metadata. In this case, the new persistent
+//       data structure contains multiple files, so working path should be used
+//       as directory path and multiple files will be stored under it. Example:
+//       PersistentHashMap.
+//     - Have a single file for both metadata and storage data. In this case,
+//       the file layout should be <Crcs><Info><Storage Data>, and
+//       working path should be used as file path. Example: FileBackedVector.
+//   - Handle working path file/directory creation and deletion.
+//     PersistentStorage only provides static Discard() method to use. The
+//     derived class should implement other logics, e.g. working path (file
+//     /directory) creation, check condition to discard working path and start
+//     over new file(s).
+//   - Implement all pure virtual methods:
+//     - PersistStoragesToDisk: persist all (composite) storages. In general,
+//       the implementation will be calling PersistToDisk for all composite
+//       storages.
+//     - PersistMetadataToDisk: persist metadata, including Crcs and Info.
+//       - If the derived class maintains a concrete Crc and (custom) Info
+//         instance, then it should perform write/pwrite into the metadata
+//         section.
+//       - If the derived class uses memory-mapped region directly for metadata,
+//         then it should call MemoryMappedFile::PersistToDisk.
+//       - See crcs() for more details.
+//     - ComputeInfoChecksum: compute the checksum for custom Info.
+//     - ComputeStoragesChecksum: compute the (combined) checksum for all
+//       (composite) storages. In general, the implementation will be calling
+//       UpdateChecksums for all composite storages and XOR all checksums.
+//     - crcs(): provide the reference for PersistentStorage to write checksums.
+//       The derived class can either maintain a concrete Crcs instance, or
+//       reinterpret_cast the memory-mapped region to Crcs reference. Either
+//       choice is fine as long as PersistMetadataToDisk flushes it to disk
+//       correctly.
+// - Call either InitializeNewStorage or InitializeExistingStorage when creating
+//   and initializing an instance, depending on initializing new storage or from
+//   existing file(s).
+class PersistentStorage {
+ public:
+  enum class WorkingPathType {
+    kSingleFile,
+    kDirectory,
+    kDummy,
+  };
+
+  // Crcs and Info will be written into the metadata section. Info is defined by
+  // the actual implementation of each persistent storage. Usually the Metadata
+  // layout is: <Crcs><Info>
+  struct Crcs {
+    struct ComponentCrcs {
+      uint32_t info_crc;
+      uint32_t storages_crc;
+
+      bool operator==(const ComponentCrcs& other) const {
+        return info_crc == other.info_crc && storages_crc == other.storages_crc;
+      }
+
+      Crc32 ComputeChecksum() const {
+        return Crc32(std::string_view(reinterpret_cast<const char*>(this),
+                                      sizeof(ComponentCrcs)));
+      }
+    } __attribute__((packed));
+
+    bool operator==(const Crcs& other) const {
+      return all_crc == other.all_crc && component_crcs == other.component_crcs;
+    }
+
+    uint32_t all_crc;
+    ComponentCrcs component_crcs;
+  } __attribute__((packed));
+  static_assert(sizeof(Crcs) == 12, "");
+
+  // Deletes working_path according to its type.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  //   - INVALID_ARGUMENT_ERROR if working_path_type is unknown type
+  static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+                                            const std::string& working_path,
+                                            WorkingPathType working_path_type);
+
+  virtual ~PersistentStorage() = default;
+
+  // Initializes new persistent storage. It computes the initial checksums and
+  // writes into the metadata file.
+  //
+  // Note: either InitializeNewStorage or InitializeExistingStorage should be
+  // invoked after creating a PersistentStorage instance before using, otherwise
+  // an uninitialized instance will fail to use persistent storage features,
+  // e.g. PersistToDisk, UpdateChecksums.
+  //
+  // Returns:
+  //   - OK on success or already initialized
+  //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+  //     on actual implementation
+  libtextclassifier3::Status InitializeNewStorage() {
+    if (is_initialized_) {
+      return libtextclassifier3::Status::OK;
+    }
+
+    ICING_RETURN_IF_ERROR(UpdateChecksumsInternal(/*force=*/true));
+    ICING_RETURN_IF_ERROR(PersistStoragesToDisk(/*force=*/true));
+    ICING_RETURN_IF_ERROR(PersistMetadataToDisk(/*force=*/true));
+
+    is_initialized_ = true;
+    return libtextclassifier3::Status::OK;
+  }
+
+  // Initializes persistent storage from existing file(s).
+  //
+  // It enforces the following check(s):
+  // - Validate checksums.
+  //
+  // Note: either InitializeNewStorage or InitializeExistingStorage should be
+  // invoked after creating a PersistentStorage instance before using.
+  //
+  // Returns:
+  //   - OK on success or already initialized
+  //   - FAILED_PRECONDITION_ERROR if checksum validation fails.
+  //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+  //     on actual implementation
+  libtextclassifier3::Status InitializeExistingStorage() {
+    if (is_initialized_) {
+      return libtextclassifier3::Status::OK;
+    }
+
+    ICING_RETURN_IF_ERROR(ValidateChecksums());
+
+    is_initialized_ = true;
+    return libtextclassifier3::Status::OK;
+  }
+
+  // Flushes contents to underlying files.
+  // 1) Flushes storages.
+  // 2) Updates all checksums by new data.
+  // 3) Flushes metadata.
+  //
+  // Force flag will be passed down to PersistMetadataToDisk,
+  // PersistStoragesToDisk, ComputeInfoChecksum, ComputeStoragesChecksum.
+  // - If force == true, then performs actual persisting operations/recomputes
+  //   the checksum.
+  // - Otherwise, the derived class can decide itself whether skipping
+  //   persisting operations/doing lazy checksum recomputing if the storage is
+  //   not dirty.
+  //
+  // Returns:
+  //   - OK on success
+  //   - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized
+  //   - Any errors from PersistStoragesToDisk, UpdateChecksums,
+  //     PersistMetadataToDisk, depending on actual implementation
+  libtextclassifier3::Status PersistToDisk(bool force = false) {
+    if (!is_initialized_) {
+      return absl_ports::FailedPreconditionError(absl_ports::StrCat(
+          "PersistentStorage ", working_path_, " not initialized"));
+    }
+
+    ICING_RETURN_IF_ERROR(UpdateChecksumsInternal(force));
+    ICING_RETURN_IF_ERROR(PersistStoragesToDisk(force));
+    ICING_RETURN_IF_ERROR(PersistMetadataToDisk(force));
+    return libtextclassifier3::Status::OK;
+  }
+
+  // Updates checksums of all components and returns the overall crc (all_crc)
+  // of the persistent storage.
+  //
+  // Force flag will be passed down ComputeInfoChecksum,
+  // ComputeStoragesChecksum.
+  // - If force == true, then recomputes the checksum.
+  // - Otherwise, the derived class can decide itself whether doing lazy
+  //   checksum recomputing if the storage is not dirty.
+  //
+  // Returns:
+  //   - Overall crc of the persistent storage on success
+  //   - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized
+  //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+  //     on actual implementation
+  libtextclassifier3::StatusOr<Crc32> UpdateChecksums(bool force = false) {
+    if (!is_initialized_) {
+      return absl_ports::FailedPreconditionError(absl_ports::StrCat(
+          "PersistentStorage ", working_path_, " not initialized"));
+    }
+
+    return UpdateChecksumsInternal(force);
+  }
+
+ protected:
+  explicit PersistentStorage(const Filesystem& filesystem,
+                             std::string working_path,
+                             WorkingPathType working_path_type)
+      : filesystem_(filesystem),
+        working_path_(std::move(working_path)),
+        working_path_type_(working_path_type),
+        is_initialized_(false) {}
+
+  // Flushes contents of metadata. The implementation should flush Crcs and Info
+  // correctly, depending on whether they're using memory-mapped regions or
+  // concrete instances in the derived class.
+  //
+  // Returns:
+  //   - OK on success
+  //   - Any other errors, depending on actual implementation
+  virtual libtextclassifier3::Status PersistMetadataToDisk(bool force) = 0;
+
+  // Flushes contents of all storages to underlying files.
+  //
+  // Returns:
+  //   - OK on success
+  //   - Any other errors, depending on actual implementation
+  virtual libtextclassifier3::Status PersistStoragesToDisk(bool force) = 0;
+
+  // Computes and returns Info checksum.
+  // - If force = true, then recompute the entire checksum.
+  // - Otherwise, the derived class can decide itself whether doing lazy
+  //   checksum computing if the storage is not dirty.
+  //
+  // This function will be mainly called by UpdateChecksums.
+  //
+  // Returns:
+  //   - Crc of the Info on success
+  //   - Any other errors, depending on actual implementation
+  virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(
+      bool force) = 0;
+
+  // Computes and returns all storages checksum. If there are multiple storages,
+  // usually we XOR their checksums together to a single checksum.
+  // - If force = true, then recompute the entire checksum.
+  // - Otherwise, the derived class can decide itself whether doing lazy
+  //   checksum computing if the storage is not dirty.
+  //
+  // This function will be mainly called by UpdateChecksums.
+  //
+  // Returns:
+  //   - Crc of all storages on success
+  //   - Any other errors from depending on actual implementation
+  virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+      bool force) = 0;
+
+  // Returns the Crcs instance reference. The derived class can either own a
+  // concrete Crcs instance, or reinterpret_cast the memory-mapped region to
+  // Crcs reference. PersistMetadataToDisk should flush it to disk correctly.
+  virtual Crcs& crcs() = 0;
+  virtual const Crcs& crcs() const = 0;
+
+  const Filesystem& filesystem_;  // Does not own
+  // Path to the storage. It can be a single file path or a directory path
+  // depending on the implementation of the derived class.
+  //
+  // Note that the derived storage class will take full ownership and of
+  // working_path_, including creation/deletion. It is the caller's
+  // responsibility to specify correct working path and avoid mixing different
+  // persistent storages together under the same path. Also the caller has the
+  // ownership for the parent directory of working_path_, and it is responsible
+  // for parent directory creation/deletion.
+  std::string working_path_;
+  WorkingPathType working_path_type_;
+
+  bool is_initialized_;
+
+ private:
+  // Updates checksums of all components and returns the overall crc (all_crc)
+  // of the persistent storage. Different from UpdateChecksums, it won't check
+  // if PersistentStorage is initialized or not.
+  //
+  // Returns:
+  //   - Overall crc of the persistent storage on success
+  //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+  //     on actual implementation
+  libtextclassifier3::StatusOr<Crc32> UpdateChecksumsInternal(bool force) {
+    Crcs& crcs_ref = crcs();
+    // Compute and update storages + info checksums.
+    ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum(force));
+    ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum(force));
+    if (crcs_ref.component_crcs.info_crc == info_crc.Get() &&
+        crcs_ref.component_crcs.storages_crc == storages_crc.Get()) {
+      // If info and storages crc haven't changed, then we don't have to update
+      // checksums.
+      return Crc32(crcs_ref.all_crc);
+    }
+
+    crcs_ref.component_crcs.info_crc = info_crc.Get();
+    crcs_ref.component_crcs.storages_crc = storages_crc.Get();
+
+    // Finally compute and update overall checksum.
+    crcs_ref.all_crc = crcs_ref.component_crcs.ComputeChecksum().Get();
+    return Crc32(crcs_ref.all_crc);
+  }
+
+  // Validates all checksums of the persistent storage.
+  //
+  // Returns:
+  //   - OK on success
+  //   - FAILED_PRECONDITION_ERROR if any checksum is incorrect.
+  //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+  //     on actual implementation
+  libtextclassifier3::Status ValidateChecksums() {
+    const Crcs& crcs_ref = crcs();
+    if (crcs_ref.all_crc != crcs_ref.component_crcs.ComputeChecksum().Get()) {
+      return absl_ports::FailedPreconditionError("Invalid all crc");
+    }
+
+    ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum(/*force=*/true));
+    if (crcs_ref.component_crcs.info_crc != info_crc.Get()) {
+      return absl_ports::FailedPreconditionError("Invalid info crc");
+    }
+
+    ICING_ASSIGN_OR_RETURN(Crc32 storages_crc,
+                           ComputeStoragesChecksum(/*force=*/true));
+    if (crcs_ref.component_crcs.storages_crc != storages_crc.Get()) {
+      return absl_ports::FailedPreconditionError("Invalid storages crc");
+    }
+
+    return libtextclassifier3::Status::OK;
+  }
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_PERSISTENT_STORAGE_H_
diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h
new file mode 100644
index 0000000..a36bd9e
--- /dev/null
+++ b/icing/file/portable-file-backed-proto-log.h
@@ -0,0 +1,1263 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// File-backed log of protos with append-only writes and position based reads.
+//
+// There should only be one instance of a PortableFileBackedProtoLog of the same
+// file at a time; using multiple instances at the same time may lead to
+// undefined behavior.
+//
+// The entire checksum is computed on initialization to verify the contents are
+// valid. On failure, the log will be truncated to the last verified state when
+// PersistToDisk() was called. If the log cannot successfully restore the last
+// state due to disk corruption or some other inconsistency, then the entire log
+// will be lost.
+//
+// Each proto written to the file will have a metadata written just before it.
+// The metadata consists of
+//   {
+//     1 bytes of kProtoMagic;
+//     3 bytes of the proto size
+//     n bytes of the proto itself
+//   }
+//
+// All metadata is written in a portable format, encoded with htonl before
+// writing to file and decoded with ntohl when reading from file.
+//
+// Example usage:
+//   ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+//       PortableFileBackedProtoLog<DocumentProto>::Create(filesystem,
+//       file_path_,
+//                                                  options));
+//   auto proto_log = create_result.proto_log;
+//
+//   Document document;
+//   document.set_namespace("com.google.android.example");
+//   document.set_uri("www.google.com");
+//
+//   int64_t document_offset = proto_log->WriteProto(document));
+//   Document same_document = proto_log->ReadProto(document_offset));
+//   proto_log->PersistToDisk();
+
+#ifndef ICING_FILE_PORTABLE_FILE_BACKED_PROTO_LOG_H_
+#define ICING_FILE_PORTABLE_FILE_BACKED_PROTO_LOG_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/gzip_stream.h"
+#include "icing/portable/platform.h"
+#include "icing/portable/zlib.h"
+#include "icing/util/bit-util.h"
+#include "icing/util/crc32.h"
+#include "icing/util/data-loss.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+
+namespace icing {
+namespace lib {
+
+template <typename ProtoT>
+class PortableFileBackedProtoLog {
+ public:
+  struct Options {
+    // Whether to compress each proto before writing to the proto log.
+    bool compress;
+
+    // Byte-size limit for each proto written to the store. This does not
+    // include the bytes needed for the metadata of each proto.
+    //
+    // NOTE: Currently, we only support protos up to 16MiB. We store the proto
+    // size in 3 bytes within the metadata.
+    //
+    // NOTE: This limit is only enforced for future writes. If the store
+    // previously had a higher limit, then reading older entries could return
+    // larger protos.
+    //
+    // NOTE: The max_proto_size is the upper limit for input protos into the
+    // ProtoLog. Even if the proto is larger than max_proto_size, but compresses
+    // to a smaller size, ProtoLog will not accept it. Protos that result in a
+    // compressed size larger than max_proto_size are also not accepted.
+    const int32_t max_proto_size;
+
+    // Level of compression if enabled, NO_COMPRESSION = 0, BEST_SPEED = 1,
+    // BEST_COMPRESSION = 9
+    const int32_t compression_level;
+
+    // Must specify values for options.
+    Options() = delete;
+    explicit Options(
+        bool compress_in, const int32_t max_proto_size_in = kMaxProtoSize,
+        const int32_t compression_level_in = kDeflateCompressionLevel)
+        : compress(compress_in),
+          max_proto_size(max_proto_size_in),
+          compression_level(compression_level_in) {}
+  };
+
+  // Our internal max for protos.
+  //
+  // WARNING: Changing this to a larger number may invalidate our assumption
+  // that that proto size can safely be stored in the last 3 bytes of the proto
+  // header.
+  static constexpr int kMaxProtoSize = (1 << 24) - 1;  // 16MiB
+  static_assert(kMaxProtoSize <= 0x00FFFFFF,
+                "kMaxProtoSize doesn't fit in 3 bytes");
+
+  // Level of compression, BEST_SPEED = 1, BEST_COMPRESSION = 9
+  static constexpr int kDeflateCompressionLevel = 3;
+
+  // Number of bytes we reserve for the heading at the beginning of the proto
+  // log. We reserve this so the header can grow without running into the
+  // contents of the proto log, triggering an unnecessary migration of the data.
+  static constexpr int kHeaderReservedBytes = 256;
+
+  // Header stored at the beginning of the file before the rest of the log
+  // contents. Stores metadata on the log.
+  class Header {
+   public:
+    static constexpr int32_t kMagic = 0xf4c6f67a;
+
+    // We should go directly from 0 to 2 the next time we have to change the
+    // format.
+    static constexpr int32_t kFileFormatVersion = 0;
+
+    uint32_t CalculateHeaderChecksum() const {
+      Crc32 crc;
+
+      // Get a string_view of all the fields of the Header, excluding the
+      // magic_nbytes_ and header_checksum_nbytes_
+      std::string_view header_str(
+          reinterpret_cast<const char*>(this) +
+              offsetof(Header, header_checksum_nbytes_) +
+              sizeof(header_checksum_nbytes_),
+          sizeof(Header) - sizeof(magic_nbytes_) -
+              sizeof(header_checksum_nbytes_));
+      crc.Append(header_str);
+      return crc.Get();
+    }
+
+    int32_t GetMagic() const { return GNetworkToHostL(magic_nbytes_); }
+
+    void SetMagic(int32_t magic_in) {
+      magic_nbytes_ = GHostToNetworkL(magic_in);
+    }
+
+    int32_t GetFileFormatVersion() const {
+      return GNetworkToHostL(file_format_version_nbytes_);
+    }
+
+    void SetFileFormatVersion(int32_t file_format_version_in) {
+      file_format_version_nbytes_ = GHostToNetworkL(file_format_version_in);
+    }
+
+    int32_t GetMaxProtoSize() const {
+      return GNetworkToHostL(max_proto_size_nbytes_);
+    }
+
+    void SetMaxProtoSize(int32_t max_proto_size_in) {
+      max_proto_size_nbytes_ = GHostToNetworkL(max_proto_size_in);
+    }
+
+    int32_t GetLogChecksum() const {
+      return GNetworkToHostL(log_checksum_nbytes_);
+    }
+
+    void SetLogChecksum(int32_t log_checksum_in) {
+      log_checksum_nbytes_ = GHostToNetworkL(log_checksum_in);
+    }
+
+    int64_t GetRewindOffset() const {
+      return GNetworkToHostLL(rewind_offset_nbytes_);
+    }
+
+    void SetRewindOffset(int64_t rewind_offset_in) {
+      rewind_offset_nbytes_ = GHostToNetworkLL(rewind_offset_in);
+    }
+
+    int32_t GetHeaderChecksum() const {
+      return GNetworkToHostL(header_checksum_nbytes_);
+    }
+
+    void SetHeaderChecksum(int32_t header_checksum_in) {
+      header_checksum_nbytes_ = GHostToNetworkL(header_checksum_in);
+    }
+
+    bool GetCompressFlag() const { return GetFlag(kCompressBit); }
+
+    void SetCompressFlag(bool compress) { SetFlag(kCompressBit, compress); }
+
+    bool GetDirtyFlag() const { return GetFlag(kDirtyBit); }
+
+    void SetDirtyFlag(bool dirty) { SetFlag(kDirtyBit, dirty); }
+
+   private:
+    // The least-significant bit offset at which the compress flag is stored in
+    // 'flags_nbytes_'. Represents whether the protos in the log are compressed
+    // or not.
+    static constexpr int32_t kCompressBit = 0;
+
+    // The least-significant bit offset at which the dirty flag is stored in
+    // 'flags'. Represents whether the checksummed portion of the log has been
+    // modified after the last checksum was computed.
+    static constexpr int32_t kDirtyBit = 1;
+
+    bool GetFlag(int offset) const {
+      return bit_util::BitfieldGet(flags_, offset, /*len=*/1);
+    }
+
+    void SetFlag(int offset, bool value) {
+      bit_util::BitfieldSet(value, offset, /*len=*/1, &flags_);
+    }
+
+    // Holds the magic as a quick sanity check against file corruption.
+    //
+    // Field is in network-byte order.
+    int32_t magic_nbytes_ = GHostToNetworkL(kMagic);
+
+    // Must be at the beginning after kMagic. Contains the crc checksum of
+    // the following fields.
+    //
+    // Field is in network-byte order.
+    uint32_t header_checksum_nbytes_ = 0;
+
+    // Last known good offset at which the log and its checksum were updated.
+    // If we crash between writing to the log and updating the checksum, we can
+    // try to rewind the log to this offset and verify the checksum is still
+    // valid instead of throwing away the entire log.
+    //
+    // Field is in network-byte order.
+    int64_t rewind_offset_nbytes_ = GHostToNetworkLL(kHeaderReservedBytes);
+
+    // Version number tracking how we serialize the file to disk. If we change
+    // how/what we write to disk, this version should be updated and this class
+    // should handle a migration.
+    //
+    // Currently at kFileFormatVersion.
+    //
+    // Field is in network-byte order.
+    int32_t file_format_version_nbytes_ = 0;
+
+    // The maximum proto size that can be written to the log.
+    //
+    // Field is in network-byte order.
+    int32_t max_proto_size_nbytes_ = 0;
+
+    // Checksum of the log elements, doesn't include the header fields.
+    //
+    // Field is in network-byte order.
+    uint32_t log_checksum_nbytes_ = 0;
+
+    // Bits are used to hold various flags.
+    //   Lowest bit is whether the protos are compressed or not.
+    //
+    // Field is only 1 byte, so is byte-order agnostic.
+    uint8_t flags_ = 0;
+
+    // NOTE: New fields should *almost always* be added to the end here. Since
+    // this class may have already been written to disk, appending fields
+    // increases the chances that changes are backwards-compatible.
+  };
+  static_assert(sizeof(Header) <= kHeaderReservedBytes,
+                "Header has grown past our reserved bytes!");
+
+  struct CreateResult {
+    // A successfully initialized log.
+    std::unique_ptr<PortableFileBackedProtoLog<ProtoT>> proto_log;
+
+    // The data status after initializing from a previous state. Data loss can
+    // happen if the file is corrupted or some previously added data was
+    // unpersisted. This may be used to signal that any derived data off of the
+    // proto log may need to be regenerated.
+    DataLoss data_loss = DataLoss::NONE;
+
+    // Whether the proto log had to recalculate the checksum to check its
+    // integrity. This can be avoided if no changes were made or the log was
+    // able to update its checksum before shutting down. But it may have to
+    // recalculate if it's unclear if we crashed after updating the log, but
+    // before updating our checksum.
+    bool recalculated_checksum = false;
+
+    bool has_data_loss() const {
+      return data_loss == DataLoss::PARTIAL || data_loss == DataLoss::COMPLETE;
+    }
+  };
+
+  // Factory method to create, initialize, and return a
+  // PortableFileBackedProtoLog. Will create the file if it doesn't exist.
+  //
+  // If on re-initialization the log detects disk corruption or some previously
+  // added data was unpersisted, the log will rewind to the last-good state. The
+  // log saves these checkpointed "good" states when PersistToDisk() is called
+  // or the log is safely destructed. If the log rewinds successfully to the
+  // last-good state, then the returned CreateResult.data_loss indicates
+  // whether it has a data loss and what kind of data loss it is (partial or
+  // complete) so that any derived data may know that it needs to be updated. If
+  // the log re-initializes successfully without any data loss,
+  // CreateResult.data_loss will be NONE.
+  //
+  // Params:
+  //   filesystem: Handles system level calls
+  //   file_path: Path of the underlying file. Directory of the file should
+  //   already exist
+  //   options: Configuration options for the proto log
+  //
+  // Returns:
+  //   PortableFileBackedProtoLog::CreateResult on success
+  //   INVALID_ARGUMENT on an invalid option
+  //   INTERNAL_ERROR on IO error
+  static libtextclassifier3::StatusOr<CreateResult> Create(
+      const Filesystem* filesystem, const std::string& file_path,
+      const Options& options);
+
+  // Not copyable
+  PortableFileBackedProtoLog(const PortableFileBackedProtoLog&) = delete;
+  PortableFileBackedProtoLog& operator=(const PortableFileBackedProtoLog&) =
+      delete;
+
+  // This will update the checksum of the log as well.
+  ~PortableFileBackedProtoLog();
+
+  // Writes the serialized proto to the underlying file. Writes are applied
+  // directly to the underlying file. Users do not need to sync the file after
+  // writing.
+  //
+  // Returns:
+  //   Offset of the newly appended proto in file on success
+  //   INVALID_ARGUMENT if proto is too large, as decided by
+  //     Options.max_proto_size
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> WriteProto(const ProtoT& proto);
+
+  // Reads out a proto located at file_offset from the file.
+  //
+  // Returns:
+  //   A proto on success
+  //   NOT_FOUND if the proto at the given offset has been erased
+  //   OUT_OF_RANGE_ERROR if file_offset exceeds file size
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<ProtoT> ReadProto(int64_t file_offset) const;
+
+  // Erases the data of a proto located at file_offset from the file.
+  //
+  // Returns:
+  //   OK on success
+  //   OUT_OF_RANGE_ERROR if file_offset exceeds file size
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status EraseProto(int64_t file_offset);
+
+  // Calculates and returns the disk usage in bytes. Rounds up to the nearest
+  // block size.
+  //
+  // Returns:
+  //   Disk usage on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
+  // Returns the file size of all the elements held in the log. File size is in
+  // bytes. This excludes the size of any internal metadata of the log, e.g. the
+  // log's header.
+  //
+  // Returns:
+  //   File size on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
+
+  // An iterator helping to find offsets of all the protos in file.
+  // Example usage:
+  //
+  // while (iterator.Advance().ok()) {
+  //   int64_t offset = iterator.GetOffset();
+  //   // Do something
+  // }
+  class Iterator {
+   public:
+    Iterator(const Filesystem& filesystem, int fd, int64_t initial_offset);
+
+    // Advances to the position of next proto whether it has been erased or not.
+    //
+    // Returns:
+    //   OK on success
+    //   OUT_OF_RANGE_ERROR if it reaches the end
+    //   INTERNAL_ERROR on IO error
+    libtextclassifier3::Status Advance();
+
+    // Returns the file offset of current proto.
+    int64_t GetOffset();
+
+   private:
+    static constexpr int64_t kInvalidOffset = -1;
+    // Used to read proto metadata
+    // Offset of first proto
+    const Filesystem* const filesystem_;
+    int64_t initial_offset_;
+    int64_t current_offset_;
+    int64_t file_size_;
+    int fd_;
+  };
+
+  // Returns an iterator of current proto log. The caller needs to keep the
+  // proto log unchanged while using the iterator, otherwise unexpected
+  // behaviors could happen.
+  Iterator GetIterator();
+
+  // Persists all changes since initialization or the last call to
+  // PersistToDisk(). Any changes that aren't persisted may be lost if the
+  // system fails to close safely.
+  //
+  // Example use case:
+  //
+  //   Document document;
+  //   document.set_namespace("com.google.android.example");
+  //   document.set_uri("www.google.com");
+  //
+  //   {
+  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+  //         PortableFileBackedProtoLog<DocumentProto>::Create(filesystem,
+  //         file_path,
+  //                                                    options));
+  //     auto proto_log = std::move(create_result.proto_log);
+  //
+  //     int64_t document_offset = proto_log->WriteProto(document));
+  //
+  //     // We lose the document here since it wasn't persisted.
+  //     // *SYSTEM CRASH*
+  //   }
+  //
+  //   {
+  //     // Can still successfully create after a crash since the log can
+  //     // rewind/truncate to recover into a previously good state
+  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+  //         PortableFileBackedProtoLog<DocumentProto>::Create(filesystem,
+  //         file_path,
+  //                                                    options));
+  //     auto proto_log = std::move(create_result.proto_log);
+  //
+  //     // Lost the proto since we didn't PersistToDisk before the crash
+  //     proto_log->ReadProto(document_offset)); // INVALID_ARGUMENT error
+  //
+  //     int64_t document_offset = proto_log->WriteProto(document));
+  //
+  //     // Persisted this time, so we should be ok.
+  //     ICING_ASSERT_OK(proto_log->PersistToDisk());
+  //   }
+  //
+  //   {
+  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+  //         PortableFileBackedProtoLog<DocumentProto>::Create(filesystem,
+  //         file_path,
+  //                                                    options));
+  //     auto proto_log = std::move(create_result.proto_log);
+  //
+  //     // SUCCESS
+  //     Document same_document = proto_log->ReadProto(document_offset));
+  //   }
+  //
+  // NOTE: Since all protos are already written to the file directly, this
+  // just updates the checksum and rewind position. Without these updates,
+  // future initializations will truncate the file and discard unpersisted
+  // changes.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status PersistToDisk();
+
+  // Calculates the checksum of the log contents. Excludes the header content.
+  //
+  // Returns:
+  //   Crc of the log content
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
+
+ private:
+  // Object can only be instantiated via the ::Create factory.
+  PortableFileBackedProtoLog(const Filesystem* filesystem,
+                             const std::string& file_path,
+                             std::unique_ptr<Header> header,
+                             int32_t compression_level);
+
+  // Initializes a new proto log.
+  //
+  // Returns:
+  //   std::unique_ptr<CreateResult> on success
+  //   INTERNAL_ERROR on IO error
+  static libtextclassifier3::StatusOr<CreateResult> InitializeNewFile(
+      const Filesystem* filesystem, const std::string& file_path,
+      const Options& options);
+
+  // Verifies that the existing proto log is in a good state. If not in a good
+  // state, then the proto log may be truncated to the last good state and
+  // content will be lost.
+  //
+  // Returns:
+  //   std::unique_ptr<CreateResult> on success
+  //   INTERNAL_ERROR on IO error or internal inconsistencies in the file
+  //   INVALID_ARGUMENT_ERROR if options aren't consistent with previous
+  //     instances
+  static libtextclassifier3::StatusOr<CreateResult> InitializeExistingFile(
+      const Filesystem* filesystem, const std::string& file_path,
+      const Options& options, int64_t file_size);
+
+  // Takes an initial checksum and updates it with the content between `start`
+  // and `end` offsets in the file.
+  //
+  // Returns:
+  //   Crc of the content between `start`, inclusive, and `end`, exclusive.
+  //   INTERNAL_ERROR on IO error
+  //   INVALID_ARGUMENT_ERROR if start and end aren't within the file size
+  static libtextclassifier3::StatusOr<Crc32> ComputeChecksum(
+      const Filesystem* filesystem, const std::string& file_path,
+      Crc32 initial_crc, int64_t start, int64_t end);
+
+  // Reads out the metadata of a proto located at file_offset from the fd.
+  // Metadata will be returned in host byte order endianness.
+  //
+  // Returns:
+  //   Proto's metadata on success
+  //   OUT_OF_RANGE_ERROR if file_offset exceeds file_size
+  //   INTERNAL_ERROR if the metadata is invalid or any IO errors happen
+  static libtextclassifier3::StatusOr<int32_t> ReadProtoMetadata(
+      const Filesystem* const filesystem, int fd, int64_t file_offset,
+      int64_t file_size);
+
+  // Writes metadata of a proto to the fd. Takes in a host byte order endianness
+  // metadata and converts it into a portable metadata before writing.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on any IO errors
+  static libtextclassifier3::Status WriteProtoMetadata(
+      const Filesystem* filesystem, int fd, int32_t host_order_metadata);
+
+  static bool IsEmptyBuffer(const char* buffer, int size) {
+    return std::all_of(buffer, buffer + size,
+                       [](const char byte) { return byte == 0; });
+  }
+
+  // Helper function to get stored proto size from the metadata.
+  // Metadata format: 8 bits magic + 24 bits size
+  static int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
+
+  // Helper function to get stored proto magic from the metadata.
+  // Metadata format: 8 bits magic + 24 bits size
+  static uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
+
+  // Magic number added in front of every proto. Used when reading out protos
+  // as a first check for corruption in each entry in the file. Even if there is
+  // a corruption, the best we can do is roll back to our last recovery point
+  // and throw away un-flushed data. We can discard/reuse this byte if needed so
+  // that we have 4 bytes to store the size of protos, and increase the size of
+  // protos we support.
+  static constexpr uint8_t kProtoMagic = 0x5C;
+
+  // Chunks of the file to mmap at a time, so we don't mmap the entire file.
+  // Only used on 32-bit devices
+  static constexpr int kMmapChunkSize = 4 * 1024 * 1024;  // 4MiB
+
+  ScopedFd fd_;
+  const Filesystem* const filesystem_;
+  const std::string file_path_;
+  std::unique_ptr<Header> header_;
+  const int32_t compression_level_;
+};
+
+template <typename ProtoT>
+PortableFileBackedProtoLog<ProtoT>::PortableFileBackedProtoLog(
+    const Filesystem* filesystem, const std::string& file_path,
+    std::unique_ptr<Header> header, int32_t compression_level)
+    : filesystem_(filesystem),
+      file_path_(file_path),
+      header_(std::move(header)),
+      compression_level_(compression_level) {
+  fd_.reset(filesystem_->OpenForAppend(file_path.c_str()));
+}
+
+template <typename ProtoT>
+PortableFileBackedProtoLog<ProtoT>::~PortableFileBackedProtoLog() {
+  if (!PersistToDisk().ok()) {
+    ICING_LOG(WARNING) << "Error persisting to disk during destruction of "
+                          "PortableFileBackedProtoLog: "
+                       << file_path_;
+  }
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<
+    typename PortableFileBackedProtoLog<ProtoT>::CreateResult>
+PortableFileBackedProtoLog<ProtoT>::Create(const Filesystem* filesystem,
+                                           const std::string& file_path,
+                                           const Options& options) {
+  if (options.max_proto_size <= 0) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "options.max_proto_size must be greater than 0, was %d",
+        options.max_proto_size));
+  }
+
+  // Since we store the proto_size in 3 bytes, we can only support protos of up
+  // to 16MiB.
+  if (options.max_proto_size > kMaxProtoSize) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "options.max_proto_size must be under 16MiB, was %d",
+        options.max_proto_size));
+  }
+
+  if (options.compression_level < 0 || options.compression_level > 9) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "options.compression_level must be between 0 and 9 inclusive, was %d",
+        options.compression_level));
+  }
+
+  if (!filesystem->FileExists(file_path.c_str())) {
+    return InitializeNewFile(filesystem, file_path, options);
+  }
+
+  int64_t file_size = filesystem->GetFileSize(file_path.c_str());
+  if (file_size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Bad file size '", file_path, "'"));
+  }
+
+  if (file_size == 0) {
+    return InitializeNewFile(filesystem, file_path, options);
+  }
+
+  return InitializeExistingFile(filesystem, file_path, options, file_size);
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<
+    typename PortableFileBackedProtoLog<ProtoT>::CreateResult>
+PortableFileBackedProtoLog<ProtoT>::InitializeNewFile(
+    const Filesystem* filesystem, const std::string& file_path,
+    const Options& options) {
+  // Grow to the minimum reserved bytes for the header.
+  if (!filesystem->Truncate(file_path.c_str(), kHeaderReservedBytes)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to initialize file size: ", file_path));
+  }
+
+  // Create the header
+  std::unique_ptr<Header> header = std::make_unique<Header>();
+  header->SetCompressFlag(options.compress);
+  header->SetMaxProtoSize(options.max_proto_size);
+  header->SetHeaderChecksum(header->CalculateHeaderChecksum());
+
+  if (!filesystem->Write(file_path.c_str(), header.get(), sizeof(Header))) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write header for file: ", file_path));
+  }
+
+  CreateResult create_result = {
+      std::unique_ptr<PortableFileBackedProtoLog<ProtoT>>(
+          new PortableFileBackedProtoLog<ProtoT>(filesystem, file_path,
+                                                 std::move(header),
+                                                 options.compression_level)),
+      /*data_loss=*/DataLoss::NONE, /*recalculated_checksum=*/false};
+
+  return create_result;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<
+    typename PortableFileBackedProtoLog<ProtoT>::CreateResult>
+PortableFileBackedProtoLog<ProtoT>::InitializeExistingFile(
+    const Filesystem* filesystem, const std::string& file_path,
+    const Options& options, int64_t file_size) {
+  bool header_changed = false;
+  if (file_size < kHeaderReservedBytes) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("File header too short for: ", file_path));
+  }
+
+  std::unique_ptr<Header> header = std::make_unique<Header>();
+  if (!filesystem->PRead(file_path.c_str(), header.get(), sizeof(Header),
+                         /*offset=*/0)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to read header for file: ", file_path));
+  }
+
+  // Make sure the header is still valid before we use any of its values. This
+  // is covered by the header_checksum check below, but this is a quick check
+  // that can save us from an extra crc computation.
+  if (header->GetMagic() != Header::kMagic) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Invalid header kMagic for file: ", file_path));
+  }
+
+  if (header->GetHeaderChecksum() != header->CalculateHeaderChecksum()) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Invalid header checksum for: ", file_path));
+  }
+
+  if (header->GetFileFormatVersion() != Header::kFileFormatVersion) {
+    // If this changes, we might need to handle a migration rather than throwing
+    // an error.
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Invalid header file format version: ", file_path));
+  }
+
+  if (header->GetCompressFlag() != options.compress) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Inconsistent compress option, expected %d, actual %d",
+        header->GetCompressFlag(), options.compress));
+  }
+
+  int32_t existing_max_proto_size = header->GetMaxProtoSize();
+  if (existing_max_proto_size > options.max_proto_size) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Max proto size cannot be smaller than previous "
+        "instantiations, previous size %d, wanted size %d",
+        header->GetMaxProtoSize(), options.max_proto_size));
+  } else if (existing_max_proto_size < options.max_proto_size) {
+    // It's fine if our new max size is greater than our previous one. Existing
+    // data is still valid.
+    header->SetMaxProtoSize(options.max_proto_size);
+    header_changed = true;
+  }
+
+  DataLoss data_loss = DataLoss::NONE;
+
+  // If we have any documents in our tail, get rid of them since they're not in
+  // our checksum. Our checksum reflects content up to the rewind offset.
+  if (file_size > header->GetRewindOffset()) {
+    if (!filesystem->Truncate(file_path.c_str(), header->GetRewindOffset())) {
+      return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+          "Failed to truncate '%s' to size %lld", file_path.data(),
+          static_cast<long long>(header->GetRewindOffset())));
+    }
+    data_loss = DataLoss::PARTIAL;
+  }
+
+  bool recalculated_checksum = false;
+
+  // If our dirty flag is set, that means we might have crashed in the middle of
+  // erasing a proto. This could have happened anywhere between:
+  //   A. Set dirty flag to true and update header checksum
+  //   B. Erase the proto
+  //   C. Set dirty flag to false, update log checksum, update header checksum
+  //
+  // Scenario 1: We went down between A and B. Maybe our dirty flag is a
+  // false alarm and we can keep all our data.
+  //
+  // Scenario 2: We went down between B and C. Our data is compromised and
+  // we need to throw everything out.
+  if (header->GetDirtyFlag()) {
+    // Recompute the log's checksum to detect which scenario we're in.
+    ICING_ASSIGN_OR_RETURN(
+        Crc32 calculated_log_checksum,
+        ComputeChecksum(filesystem, file_path, Crc32(),
+                        /*start=*/kHeaderReservedBytes, /*end=*/file_size));
+
+    if (header->GetLogChecksum() != calculated_log_checksum.Get()) {
+      // Still doesn't match, we're in Scenario 2. Throw out all our data now
+      // and initialize as a new instance.
+      ICING_ASSIGN_OR_RETURN(CreateResult create_result,
+                             InitializeNewFile(filesystem, file_path, options));
+      create_result.data_loss = DataLoss::COMPLETE;
+      create_result.recalculated_checksum = true;
+      return create_result;
+    }
+    // Otherwise we're good, checksum matches our contents so continue
+    // initializing like normal.
+    recalculated_checksum = true;
+
+    // Update our header.
+    header->SetDirtyFlag(false);
+    header_changed = true;
+  }
+
+  if (header_changed) {
+    header->SetHeaderChecksum(header->CalculateHeaderChecksum());
+
+    if (!filesystem->PWrite(file_path.c_str(), /*offset=*/0, header.get(),
+                            sizeof(Header))) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Failed to update header to: ", file_path));
+    }
+  }
+
+  CreateResult create_result = {
+      std::unique_ptr<PortableFileBackedProtoLog<ProtoT>>(
+          new PortableFileBackedProtoLog<ProtoT>(filesystem, file_path,
+                                                 std::move(header),
+                                                 options.compression_level)),
+      data_loss, recalculated_checksum};
+
+  return create_result;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<Crc32>
+PortableFileBackedProtoLog<ProtoT>::ComputeChecksum(
+    const Filesystem* filesystem, const std::string& file_path,
+    Crc32 initial_crc, int64_t start, int64_t end) {
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(*filesystem, file_path,
+                               MemoryMappedFile::Strategy::READ_ONLY));
+  Crc32 new_crc(initial_crc.Get());
+
+  if (start < 0) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Starting checksum offset of file '%s' must be greater than 0, was "
+        "%lld",
+        file_path.c_str(), static_cast<long long>(start)));
+  }
+
+  if (end < start) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Ending checksum offset of file '%s' must be greater than start "
+        "'%lld', was '%lld'",
+        file_path.c_str(), static_cast<long long>(start),
+        static_cast<long long>(end)));
+  }
+
+  int64_t file_size = filesystem->GetFileSize(file_path.c_str());
+  if (end > file_size) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Ending checksum offset of file '%s' must be within "
+        "file size of %lld, was %lld",
+        file_path.c_str(), static_cast<long long>(file_size),
+        static_cast<long long>(end)));
+  }
+
+  Architecture architecture = GetArchitecture();
+  switch (architecture) {
+    case Architecture::BIT_64: {
+      // Don't mmap in chunks here since mmapping can be harmful on 64-bit
+      // devices where mmap/munmap calls need the mmap write semaphore, which
+      // blocks mmap/munmap/mprotect and all page faults from executing while
+      // they run. On 64-bit devices, this doesn't actually load into memory, it
+      // just makes the file faultable. So the whole file should be ok.
+      // b/185822878.
+      ICING_RETURN_IF_ERROR(mmapped_file.Remap(start, end - start));
+      auto mmap_str = std::string_view(mmapped_file.region(), end - start);
+      new_crc.Append(mmap_str);
+      break;
+    }
+    case Architecture::BIT_32:
+      [[fallthrough]];
+    case Architecture::UNKNOWN: {
+      // 32-bit devices only have 4GB of RAM. Mmap in chunks to not use up too
+      // much memory at once. If we're unknown, then also chunk it because we're
+      // not sure what the device can handle.
+      for (int i = start; i < end; i += kMmapChunkSize) {
+        // Don't read past the file size.
+        int next_chunk_size = kMmapChunkSize;
+        if ((i + kMmapChunkSize) >= end) {
+          next_chunk_size = end - i;
+        }
+
+        ICING_RETURN_IF_ERROR(mmapped_file.Remap(i, next_chunk_size));
+
+        auto mmap_str =
+            std::string_view(mmapped_file.region(), next_chunk_size);
+        new_crc.Append(mmap_str);
+      }
+      break;
+    }
+  }
+
+  return new_crc;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t>
+PortableFileBackedProtoLog<ProtoT>::WriteProto(const ProtoT& proto) {
+  int64_t proto_size = proto.ByteSizeLong();
+  int32_t host_order_metadata;
+  int64_t current_position = filesystem_->GetCurrentPosition(fd_.get());
+
+  if (proto_size > header_->GetMaxProtoSize()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "proto_size, %lld, was too large to write. Max is %d",
+        static_cast<long long>(proto_size), header_->GetMaxProtoSize()));
+  }
+
+  // At this point, we've guaranteed that proto_size is under kMaxProtoSize
+  // (see
+  // ::Create), so we can safely store it in an int.
+  int final_size = 0;
+
+  std::string proto_str;
+  google::protobuf::io::StringOutputStream proto_stream(&proto_str);
+
+  if (header_->GetCompressFlag()) {
+    protobuf_ports::GzipOutputStream::Options options;
+    options.format = protobuf_ports::GzipOutputStream::ZLIB;
+    options.compression_level = compression_level_;
+
+    protobuf_ports::GzipOutputStream compressing_stream(&proto_stream, options);
+
+    bool success = proto.SerializeToZeroCopyStream(&compressing_stream) &&
+                   compressing_stream.Close();
+
+    if (!success) {
+      return absl_ports::InternalError("Error compressing proto.");
+    }
+
+    final_size = proto_str.size();
+
+    // In case the compressed proto is larger than the original proto, we also
+    // can't write it.
+    if (final_size > header_->GetMaxProtoSize()) {
+      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+          "Compressed proto size, %d, was greater than "
+          "max_proto_size, %d",
+          final_size, header_->GetMaxProtoSize()));
+    }
+  } else {
+    // Serialize the proto directly into the write buffer at an offset of the
+    // metadata.
+    proto.SerializeToZeroCopyStream(&proto_stream);
+    final_size = proto_str.size();
+  }
+
+  // 1st byte for magic, next 3 bytes for proto size.
+  host_order_metadata = (kProtoMagic << 24) | final_size;
+
+  // Actually write metadata, has to be done after we know the possibly
+  // compressed proto size
+  ICING_RETURN_IF_ERROR(
+      WriteProtoMetadata(filesystem_, fd_.get(), host_order_metadata));
+
+  // Write the serialized proto
+  if (!filesystem_->Write(fd_.get(), proto_str.data(), proto_str.size())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write proto to: ", file_path_));
+  }
+
+  return current_position;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<ProtoT>
+PortableFileBackedProtoLog<ProtoT>::ReadProto(int64_t file_offset) const {
+  int64_t file_size = filesystem_->GetFileSize(fd_.get());
+  // Read out the metadata
+  if (file_size == Filesystem::kBadFileSize) {
+    return absl_ports::OutOfRangeError("Unable to correctly read size.");
+  }
+  ICING_ASSIGN_OR_RETURN(
+      int32_t metadata,
+      ReadProtoMetadata(filesystem_, fd_.get(), file_offset, file_size));
+
+  // Copy out however many bytes it says the proto is
+  int stored_size = GetProtoSize(metadata);
+  file_offset += sizeof(metadata);
+
+  // Read the compressed proto out.
+  if (file_offset + stored_size > file_size) {
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
+                                      "out of range of the file size, %lld",
+                                      static_cast<long long>(file_offset),
+                                      static_cast<long long>(file_size - 1)));
+  }
+  auto buf = std::make_unique<char[]>(stored_size);
+  if (!filesystem_->PRead(fd_.get(), buf.get(), stored_size, file_offset)) {
+    return absl_ports::InternalError("");
+  }
+
+  if (IsEmptyBuffer(buf.get(), stored_size)) {
+    return absl_ports::NotFoundError("The proto data has been erased.");
+  }
+
+  google::protobuf::io::ArrayInputStream proto_stream(buf.get(), stored_size);
+
+  // Deserialize proto
+  ProtoT proto;
+  if (header_->GetCompressFlag()) {
+    protobuf_ports::GzipInputStream decompress_stream(&proto_stream);
+    proto.ParseFromZeroCopyStream(&decompress_stream);
+  } else {
+    proto.ParseFromZeroCopyStream(&proto_stream);
+  }
+
+  return proto;
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::EraseProto(
+    int64_t file_offset) {
+  int64_t file_size = filesystem_->GetFileSize(fd_.get());
+  if (file_size == Filesystem::kBadFileSize) {
+    return absl_ports::OutOfRangeError("Unable to correctly read size.");
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      int32_t metadata,
+      ReadProtoMetadata(filesystem_, fd_.get(), file_offset, file_size));
+  // Copy out however many bytes it says the proto is
+  int stored_size = GetProtoSize(metadata);
+  file_offset += sizeof(metadata);
+  if (file_offset + stored_size > file_size) {
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
+                                      "out of range of the file size, %lld",
+                                      static_cast<long long>(file_offset),
+                                      static_cast<long long>(file_size - 1)));
+  }
+  auto buf = std::make_unique<char[]>(stored_size);
+
+  // We need to update the crc checksum if the erased area is before the
+  // rewind position.
+  int32_t new_crc;
+  if (file_offset < header_->GetRewindOffset()) {
+    // Set to "dirty" before we start writing anything.
+    header_->SetDirtyFlag(true);
+    header_->SetHeaderChecksum(header_->CalculateHeaderChecksum());
+    if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
+                             sizeof(Header))) {
+      return absl_ports::InternalError(absl_ports::StrCat(
+          "Failed to update dirty bit of header to: ", file_path_));
+    }
+
+    // We need to calculate [original string xor 0s].
+    // The xored string is the same as the original string because 0 xor 0 =
+    // 0, 1 xor 0 = 1.
+    // Read the compressed proto out.
+    if (!filesystem_->PRead(fd_.get(), buf.get(), stored_size, file_offset)) {
+      return absl_ports::InternalError("");
+    }
+    const std::string_view xored_str(buf.get(), stored_size);
+
+    Crc32 crc(header_->GetLogChecksum());
+    ICING_ASSIGN_OR_RETURN(
+        new_crc,
+        crc.UpdateWithXor(xored_str,
+                          /*full_data_size=*/header_->GetRewindOffset() -
+                              kHeaderReservedBytes,
+                          /*position=*/file_offset - kHeaderReservedBytes));
+  }
+
+  // Clear the region.
+  memset(buf.get(), '\0', stored_size);
+  if (!filesystem_->PWrite(fd_.get(), file_offset, buf.get(), stored_size)) {
+    return absl_ports::InternalError("");
+  }
+
+  // If we cleared something in our checksummed area, we should update our
+  // checksum and reset our dirty bit.
+  if (file_offset < header_->GetRewindOffset()) {
+    header_->SetDirtyFlag(false);
+    header_->SetLogChecksum(new_crc);
+    header_->SetHeaderChecksum(header_->CalculateHeaderChecksum());
+
+    if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
+                             sizeof(Header))) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Failed to update header to: ", file_path_));
+    }
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t>
+PortableFileBackedProtoLog<ProtoT>::GetDiskUsage() const {
+  int64_t size = filesystem_->GetDiskUsage(file_path_.c_str());
+  if (size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError("Failed to get disk usage of proto log");
+  }
+  return size;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t>
+PortableFileBackedProtoLog<ProtoT>::GetElementsFileSize() const {
+  int64_t total_file_size = filesystem_->GetFileSize(file_path_.c_str());
+  if (total_file_size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError(
+        "Failed to get file size of elments in the proto log");
+  }
+  return total_file_size - kHeaderReservedBytes;
+}
+
+template <typename ProtoT>
+PortableFileBackedProtoLog<ProtoT>::Iterator::Iterator(
+    const Filesystem& filesystem, int fd, int64_t initial_offset)
+    : filesystem_(&filesystem),
+      initial_offset_(initial_offset),
+      current_offset_(kInvalidOffset),
+      fd_(fd) {
+  file_size_ = filesystem_->GetFileSize(fd_);
+  if (file_size_ == Filesystem::kBadFileSize) {
+    // Fails all Advance() calls
+    file_size_ = 0;
+  }
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status
+PortableFileBackedProtoLog<ProtoT>::Iterator::Advance() {
+  if (current_offset_ == kInvalidOffset) {
+    // First Advance() call
+    current_offset_ = initial_offset_;
+  } else {
+    // Jumps to the next proto position
+    ICING_ASSIGN_OR_RETURN(
+        int32_t metadata,
+        ReadProtoMetadata(filesystem_, fd_, current_offset_, file_size_));
+    current_offset_ += sizeof(metadata) + GetProtoSize(metadata);
+  }
+
+  if (current_offset_ < file_size_) {
+    return libtextclassifier3::Status::OK;
+  } else {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "The next proto offset, %lld, is out of file range [0, %lld)",
+        static_cast<long long>(current_offset_),
+        static_cast<long long>(file_size_)));
+  }
+}
+
+template <typename ProtoT>
+int64_t PortableFileBackedProtoLog<ProtoT>::Iterator::GetOffset() {
+  return current_offset_;
+}
+
+template <typename ProtoT>
+typename PortableFileBackedProtoLog<ProtoT>::Iterator
+PortableFileBackedProtoLog<ProtoT>::GetIterator() {
+  return Iterator(*filesystem_, fd_.get(),
+                  /*initial_offset=*/kHeaderReservedBytes);
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int32_t>
+PortableFileBackedProtoLog<ProtoT>::ReadProtoMetadata(
+    const Filesystem* const filesystem, int fd, int64_t file_offset,
+    int64_t file_size) {
+  // Checks file_offset
+  if (file_offset >= file_size) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "offset, %lld, is out of file range [0, %lld)",
+        static_cast<long long>(file_offset),
+        static_cast<long long>(file_size)));
+  }
+  int32_t portable_metadata;
+  int metadata_size = sizeof(portable_metadata);
+  if (file_offset + metadata_size >= file_size) {
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Wrong metadata offset %lld, metadata doesn't fit in "
+        "with file range [0, %lld)",
+        static_cast<long long>(file_offset),
+        static_cast<long long>(file_size)));
+  }
+
+  if (!filesystem->PRead(fd, &portable_metadata, metadata_size, file_offset)) {
+    return absl_ports::InternalError("");
+  }
+
+  // Need to switch it back to host order endianness after reading from disk.
+  int32_t host_order_metadata = GNetworkToHostL(portable_metadata);
+
+  // Checks magic number
+  uint8_t stored_k_proto_magic = GetProtoMagic(host_order_metadata);
+  if (stored_k_proto_magic != kProtoMagic) {
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Failed to read kProtoMagic, expected %d, actual %d", kProtoMagic,
+        stored_k_proto_magic));
+  }
+
+  return host_order_metadata;
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status
+PortableFileBackedProtoLog<ProtoT>::WriteProtoMetadata(
+    const Filesystem* filesystem, int fd, int32_t host_order_metadata) {
+  // Convert it into portable endian format before writing to disk
+  int32_t portable_metadata = GHostToNetworkL(host_order_metadata);
+  int portable_metadata_size = sizeof(portable_metadata);
+
+  // Write metadata
+  if (!filesystem->Write(fd, &portable_metadata, portable_metadata_size)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write proto metadata."));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::PersistToDisk() {
+  int64_t file_size = filesystem_->GetFileSize(file_path_.c_str());
+  if (file_size == header_->GetRewindOffset()) {
+    // No new protos appended, don't need to update the checksum.
+    return libtextclassifier3::Status::OK;
+  }
+
+  ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum());
+
+  header_->SetLogChecksum(crc.Get());
+  header_->SetRewindOffset(file_size);
+  header_->SetHeaderChecksum(header_->CalculateHeaderChecksum());
+
+  if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
+                           sizeof(Header)) ||
+      !filesystem_->DataSync(fd_.get())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to update header to: ", file_path_));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<Crc32>
+PortableFileBackedProtoLog<ProtoT>::ComputeChecksum() {
+  int64_t file_size = filesystem_->GetFileSize(file_path_.c_str());
+  int64_t new_content_size = file_size - header_->GetRewindOffset();
+  Crc32 crc;
+  if (new_content_size == 0) {
+    // No new protos appended, return cached checksum
+    return Crc32(header_->GetLogChecksum());
+  } else if (new_content_size < 0) {
+    // File shrunk, recalculate the entire checksum.
+    ICING_ASSIGN_OR_RETURN(
+        crc,
+        ComputeChecksum(filesystem_, file_path_, Crc32(),
+                        /*start=*/kHeaderReservedBytes, /*end=*/file_size));
+  } else {
+    // Append new changes to the existing checksum.
+    ICING_ASSIGN_OR_RETURN(
+        crc, ComputeChecksum(
+                 filesystem_, file_path_, Crc32(header_->GetLogChecksum()),
+                 /*start=*/header_->GetRewindOffset(), /*end=*/file_size));
+  }
+  return crc;
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_PORTABLE_FILE_BACKED_PROTO_LOG_H_
diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc
new file mode 100644
index 0000000..d7ea4bb
--- /dev/null
+++ b/icing/file/portable-file-backed-proto-log_benchmark.cc
@@ -0,0 +1,343 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <random>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/document.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/tmp-directory.h"
+
+// go/microbenchmarks
+//
+// To build and run on a local machine:
+//   $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//   icing/file:portable-file-backed-proto-log_benchmark
+//
+//   $ blaze-bin/icing/file/portable-file-backed-proto-log_benchmark
+//   --benchmark_filter=all
+//
+//
+// To build and run on an Android device (must be connected and rooted):
+//   $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//   --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//   icing/file:portable-file-backed-proto-log_benchmark
+//
+//   $ adb root
+//
+//   $ adb push
+//   blaze-bin/icing/file/portable-file-backed-proto-log_benchmark
+//   /data/local/tmp/
+//
+//   $ adb shell /data/local/tmp/portable-file-backed-proto-log-benchmark
+//   --benchmark_filter=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+void BM_Write(benchmark::State& state) {
+  const Filesystem filesystem;
+  int string_length = state.range(0);
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s%d%s", GetTestTempDir().c_str(), "/proto_", string_length, ".log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+                       &filesystem, file_path,
+                       PortableFileBackedProtoLog<DocumentProto>::Options(
+                           compress, max_proto_size))
+                       .ValueOrDie()
+                       .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->WriteProto(document));
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          string_length);
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Write)
+    ->Arg(1)
+    ->Arg(32)
+    ->Arg(512)
+    ->Arg(1024)
+    ->Arg(4 * 1024)
+    ->Arg(8 * 1024)
+    ->Arg(16 * 1024)
+    ->Arg(32 * 1024)
+    ->Arg(256 * 1024)
+    ->Arg(2 * 1024 * 1024)
+    ->Arg(8 * 1024 * 1024)
+    ->Arg(15 * 1024 * 1024);  // We do 15MiB here since our max proto size is
+                              // 16MiB, and we need some extra space for the
+                              // rest of the document properties
+
+void BM_Read(benchmark::State& state) {
+  const Filesystem filesystem;
+  int string_length = state.range(0);
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s%d%s", GetTestTempDir().c_str(), "/proto_", string_length, ".log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+                       &filesystem, file_path,
+                       PortableFileBackedProtoLog<DocumentProto>::Options(
+                           compress, max_proto_size))
+                       .ValueOrDie()
+                       .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+                             proto_log->WriteProto(document));
+
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->ReadProto(write_offset));
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          string_length);
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Read)
+    ->Arg(1)
+    ->Arg(32)
+    ->Arg(512)
+    ->Arg(1024)
+    ->Arg(4 * 1024)
+    ->Arg(8 * 1024)
+    ->Arg(16 * 1024)
+    ->Arg(32 * 1024)
+    ->Arg(256 * 1024)
+    ->Arg(2 * 1024 * 1024)
+    ->Arg(8 * 1024 * 1024)
+    ->Arg(15 * 1024 * 1024);  // We do 15MiB here since our max proto size is
+                              // 16MiB, and we need some extra space for the
+                              // rest of the document properties
+                              //
+void BM_Erase(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s", GetTestTempDir().c_str(), "/proto.log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+                       &filesystem, file_path,
+                       PortableFileBackedProtoLog<DocumentProto>::Options(
+                           compress, max_proto_size))
+                       .ValueOrDie()
+                       .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str = RandomString(kAlNumAlphabet, /*len=*/1, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+                               proto_log->WriteProto(document));
+    state.ResumeTiming();
+
+    testing::DoNotOptimize(proto_log->EraseProto(write_offset));
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Erase);
+
+void BM_ComputeChecksum(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = GetTestTempDir() + "/proto.log";
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+                       &filesystem, file_path,
+                       PortableFileBackedProtoLog<DocumentProto>::Options(
+                           compress, max_proto_size))
+                       .ValueOrDie()
+                       .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  // Make each document 1KiB
+  int string_length = 1024;
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  int num_docs = state.range(0);
+  for (int i = 0; i < num_docs; ++i) {
+    ICING_ASSERT_OK(proto_log->WriteProto(document));
+  }
+
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->ComputeChecksum());
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_ComputeChecksum)->Range(1024, 1 << 20);
+
+void BM_ComputeChecksumWithCachedChecksum(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = GetTestTempDir() + "/proto.log";
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+                       &filesystem, file_path,
+                       PortableFileBackedProtoLog<DocumentProto>::Options(
+                           compress, max_proto_size))
+                       .ValueOrDie()
+                       .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  // Make the document 1KiB
+  int string_length = 1024;
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  // Write some content and persist. This should update our cached checksum to
+  // include the document.
+  ICING_ASSERT_OK(proto_log->WriteProto(document));
+  ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+  // This ComputeChecksum call shouldn't need to do any computation since we can
+  // reuse our cached checksum.
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->ComputeChecksum());
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_ComputeChecksumWithCachedChecksum);
+
+void BM_ComputeChecksumOnlyForTail(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = GetTestTempDir() + "/proto.log";
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+                       &filesystem, file_path,
+                       PortableFileBackedProtoLog<DocumentProto>::Options(
+                           compress, max_proto_size))
+                       .ValueOrDie()
+                       .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  // Make the document 1KiB
+  int string_length = 1024;
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  // Write some content and persist. This should update our cached checksum to
+  // include the document.
+  ICING_ASSERT_OK(proto_log->WriteProto(document));
+  ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+  // Write another proto into the tail, but it's not included in our cached
+  // checksum since we didn't call persist.
+  ICING_ASSERT_OK(proto_log->WriteProto(document));
+
+  // ComputeChecksum should be calculating the checksum of the tail and adding
+  // it to the cached checksum we have.
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->ComputeChecksum());
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_ComputeChecksumOnlyForTail);
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc
new file mode 100644
index 0000000..cc70151
--- /dev/null
+++ b/icing/file/portable-file-backed-proto-log_test.cc
@@ -0,0 +1,1265 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/portable-file-backed-proto-log.h"
+
+#include <cstdint>
+#include <cstdlib>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/document.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::A;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::Not;
+using ::testing::NotNull;
+using ::testing::Pair;
+using ::testing::Return;
+
+using Header = PortableFileBackedProtoLog<DocumentProto>::Header;
+
+Header ReadHeader(Filesystem filesystem, const std::string& file_path) {
+  Header header;
+  filesystem.PRead(file_path.c_str(), &header, sizeof(Header),
+                   /*offset=*/0);
+  return header;
+}
+
+void WriteHeader(Filesystem filesystem, const std::string& file_path,
+                 Header& header) {
+  filesystem.Write(file_path.c_str(), &header, sizeof(Header));
+}
+
+class PortableFileBackedProtoLogTest : public ::testing::Test {
+ protected:
+  // Adds a user-defined default construct because a const member variable may
+  // make the compiler accidentally delete the default constructor.
+  // https://stackoverflow.com/a/47368753
+  PortableFileBackedProtoLogTest() {}
+
+  void SetUp() override {
+    file_path_ = GetTestTempDir() + "/proto_log";
+    filesystem_.DeleteFile(file_path_.c_str());
+  }
+
+  void TearDown() override { filesystem_.DeleteFile(file_path_.c_str()); }
+
+  const Filesystem filesystem_;
+  std::string file_path_;
+  bool compress_ = true;
+  int32_t compression_level_ =
+      PortableFileBackedProtoLog<DocumentProto>::kDeflateCompressionLevel;
+  int64_t max_proto_size_ = 256 * 1024;  // 256 KiB
+};
+
+TEST_F(PortableFileBackedProtoLogTest, Initialize) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(
+              compress_, max_proto_size_, compression_level_)));
+  EXPECT_THAT(create_result.proto_log, NotNull());
+  EXPECT_FALSE(create_result.has_data_loss());
+  EXPECT_FALSE(create_result.recalculated_checksum);
+
+  // Can't recreate the same file with different options.
+  ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+                  &filesystem_, file_path_,
+                  PortableFileBackedProtoLog<DocumentProto>::Options(
+                      !compress_, max_proto_size_, compression_level_)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, InitializeValidatesOptions) {
+  // max_proto_size must be greater than 0
+  int invalid_max_proto_size = 0;
+  ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+                  &filesystem_, file_path_,
+                  PortableFileBackedProtoLog<DocumentProto>::Options(
+                      compress_, invalid_max_proto_size, compression_level_)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // max_proto_size must be under 16 MiB
+  invalid_max_proto_size = 16 * 1024 * 1024;
+  ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+                  &filesystem_, file_path_,
+                  PortableFileBackedProtoLog<DocumentProto>::Options(
+                      compress_, invalid_max_proto_size, compression_level_)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // compression_level must be between 0 and 9 inclusive
+  int invalid_compression_level = -1;
+  ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+                  &filesystem_, file_path_,
+                  PortableFileBackedProtoLog<DocumentProto>::Options(
+                      compress_, max_proto_size_, invalid_compression_level)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // compression_level must be between 0 and 9 inclusive
+  invalid_compression_level = 10;
+  ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+                  &filesystem_, file_path_,
+                  PortableFileBackedProtoLog<DocumentProto>::Options(
+                      compress_, max_proto_size_, invalid_compression_level)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ReservedSpaceForHeader) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(
+              compress_, max_proto_size_, compression_level_)));
+
+  // With no protos written yet, the log should be minimum the size of the
+  // reserved header space.
+  ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()),
+            PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes);
+}
+
+TEST_F(PortableFileBackedProtoLogTest, WriteProtoTooLarge) {
+  int max_proto_size = 1;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(
+              compress_, max_proto_size, compression_level_)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  // Proto is too large for the max_proto_size_in
+  ASSERT_THAT(proto_log->WriteProto(document),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ReadProtoWrongKProtoMagic) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(
+              compress_, max_proto_size_, compression_level_)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  // Write a proto
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t file_offset,
+                             proto_log->WriteProto(document));
+
+  // The 4 bytes of metadata that just doesn't have the same kProtoMagic
+  // specified in file-backed-proto-log.h
+  uint32_t wrong_magic = 0x7E000000;
+
+  // Sanity check that we opened the file correctly
+  int fd = filesystem_.OpenForWrite(file_path_.c_str());
+  ASSERT_GT(fd, 0);
+
+  // Write the wrong kProtoMagic in, kProtoMagics are stored at the beginning of
+  // a proto entry.
+  filesystem_.PWrite(fd, file_offset, &wrong_magic, sizeof(wrong_magic));
+
+  ASSERT_THAT(proto_log->ReadProto(file_offset),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ReadWriteUncompressedProto) {
+  int last_offset;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/false, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write the first proto
+    DocumentProto document1 =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(int written_position,
+                               proto_log->WriteProto(document1));
+
+    int document1_offset = written_position;
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document1)));
+
+    // Write a second proto that's close to the max size. Leave some room for
+    // the rest of the proto properties.
+    std::string long_str(max_proto_size_ - 1024, 'a');
+    DocumentProto document2 = DocumentBuilder()
+                                  .SetKey("namespace2", "uri2")
+                                  .AddStringProperty("long_str", long_str)
+                                  .Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(written_position,
+                               proto_log->WriteProto(document2));
+
+    int document2_offset = written_position;
+    last_offset = written_position;
+    ASSERT_GT(document2_offset, document1_offset);
+
+    // Check the second proto
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+  }
+
+  {
+    // Make a new proto_log with the same file_path, and make sure we
+    // can still write to the same underlying file.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/false, max_proto_size_, compression_level_)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write a third proto
+    DocumentProto document3 =
+        DocumentBuilder().SetKey("namespace3", "uri3").Build();
+
+    ASSERT_THAT(recreated_proto_log->WriteProto(document3),
+                IsOkAndHolds(Gt(last_offset)));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) {
+  int last_offset;
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/true, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write the first proto
+    DocumentProto document1 =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(int written_position,
+                               proto_log->WriteProto(document1));
+
+    int document1_offset = written_position;
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document1)));
+
+    // Write a second proto that's close to the max size. Leave some room for
+    // the rest of the proto properties.
+    std::string long_str(max_proto_size_ - 1024, 'a');
+    DocumentProto document2 = DocumentBuilder()
+                                  .SetKey("namespace2", "uri2")
+                                  .AddStringProperty("long_str", long_str)
+                                  .Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(written_position,
+                               proto_log->WriteProto(document2));
+
+    int document2_offset = written_position;
+    last_offset = written_position;
+    ASSERT_GT(document2_offset, document1_offset);
+
+    // Check the second proto
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+  }
+
+  {
+    // Make a new proto_log with the same file_path, and make sure we
+    // can still write to the same underlying file.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/true, max_proto_size_, compression_level_)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write a third proto
+    DocumentProto document3 =
+        DocumentBuilder().SetKey("namespace3", "uri3").Build();
+
+    ASSERT_THAT(recreated_proto_log->WriteProto(document3),
+                IsOkAndHolds(Gt(last_offset)));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ReadWriteDifferentCompressionLevel) {
+  int document1_offset;
+  int document2_offset;
+  int document3_offset;
+
+  // The first proto to write that's close to the max size. Leave some room for
+  // the rest of the proto properties.
+  std::string long_str(max_proto_size_ - 1024, 'a');
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .AddStringProperty("long_str", long_str)
+                                .Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace2", "uri2").Build();
+  DocumentProto document3 =
+      DocumentBuilder().SetKey("namespace3", "uri3").Build();
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/true, max_proto_size_,
+                /*compression_level_in=*/3)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
+                               proto_log->WriteProto(document1));
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(document1_offset),
+                IsOkAndHolds(EqualsProto(document1)));
+
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+  }
+
+  // Make a new proto_log with the same file_path but different compression
+  // level, and make sure we can still read from and write to the same
+  // underlying file.
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/true, max_proto_size_,
+                /*compression_level_in=*/9)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Check the first proto
+    ASSERT_THAT(recreated_proto_log->ReadProto(document1_offset),
+                IsOkAndHolds(EqualsProto(document1)));
+
+    // Write a second proto
+    ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
+                               recreated_proto_log->WriteProto(document2));
+
+    ASSERT_GT(document2_offset, document1_offset);
+
+    // Check the second proto
+    ASSERT_THAT(recreated_proto_log->ReadProto(document2_offset),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    ICING_ASSERT_OK(recreated_proto_log->PersistToDisk());
+  }
+
+  // One more time but with 0 compression level
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/true, max_proto_size_,
+                /*compression_level=*/0)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Check the first proto
+    ASSERT_THAT(recreated_proto_log->ReadProto(document1_offset),
+                IsOkAndHolds(EqualsProto(document1)));
+
+    // Check the second proto
+    ASSERT_THAT(recreated_proto_log->ReadProto(document2_offset),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    // Write a third proto
+    ICING_ASSERT_OK_AND_ASSIGN(document3_offset,
+                               recreated_proto_log->WriteProto(document3));
+
+    ASSERT_GT(document3_offset, document2_offset);
+
+    // Check the third proto
+    ASSERT_THAT(recreated_proto_log->ReadProto(document3_offset),
+                IsOkAndHolds(EqualsProto(document3)));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+       WriteDifferentCompressionLevelDifferentSizes) {
+  int document_log_size_with_compression_3;
+  int document_log_size_with_no_compression;
+
+  // The first proto to write that's close to the max size. Leave some room for
+  // the rest of the proto properties.
+  std::string long_str(max_proto_size_ - 1024, 'a');
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .AddStringProperty("long_str", long_str)
+                                .Build();
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/true, max_proto_size_,
+                /*compression_level_in=*/3)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write the proto
+    ICING_ASSERT_OK(proto_log->WriteProto(document1));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    document_log_size_with_compression_3 =
+        filesystem_.GetFileSize(file_path_.c_str());
+  }
+
+  // Delete the proto_log so we can reuse the file_path
+  filesystem_.DeleteFile(file_path_.c_str());
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/true, max_proto_size_,
+                /*compression_level_in=*/0)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write the proto
+    ICING_ASSERT_OK(proto_log->WriteProto(document1));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    document_log_size_with_no_compression =
+        filesystem_.GetFileSize(file_path_.c_str());
+
+    // Uncompressed document file size should be larger than original compressed
+    // document file size
+    ASSERT_GT(document_log_size_with_no_compression,
+              document_log_size_with_compression_3);
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+  }
+
+  int corrupt_checksum = 24;
+
+  // Write the corrupted header
+  Header header = ReadHeader(filesystem_, file_path_);
+  header.SetHeaderChecksum(corrupt_checksum);
+  WriteHeader(filesystem_, file_path_, header);
+
+  {
+    // Reinitialize the same proto_log
+    ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+                    &filesystem_, file_path_,
+                    PortableFileBackedProtoLog<DocumentProto>::Options(
+                        compress_, max_proto_size_, compression_level_)),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL,
+                         HasSubstr("Invalid header checksum")));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, DifferentMagic) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+
+    // Corrupt the magic that's stored at the beginning of the header.
+    int invalid_magic = -1;
+    ASSERT_THAT(invalid_magic, Not(Eq(Header::kMagic)));
+
+    // Write the corrupted header
+    Header header = ReadHeader(filesystem_, file_path_);
+    header.SetMagic(invalid_magic);
+    WriteHeader(filesystem_, file_path_, header);
+  }
+
+  {
+    // Reinitialize the same proto_log
+    ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+                    &filesystem_, file_path_,
+                    PortableFileBackedProtoLog<DocumentProto>::Options(
+                        compress_, max_proto_size_, compression_level_)),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL,
+                         HasSubstr("Invalid header kMagic")));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+       UnableToDetectCorruptContentWithoutDirtyBit) {
+  // This is intentional that we can't detect corruption. We're trading off
+  // earlier corruption detection for lower initialization latency. By not
+  // calculating the checksum on initialization, we can initialize much faster,
+  // but at the cost of detecting corruption. Note that even if we did detect
+  // corruption, there was nothing we could've done except throw an error to
+  // clients. We'll still do that, but at some later point when the log is
+  // attempting to be accessed and we can't actually deserialize a proto from
+  // it. See the description in cl/374278280 for more details.
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+
+    DocumentProto document =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    // Write and persist an document.
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
+                               proto_log->WriteProto(document));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    // "Corrupt" the content written in the log.
+    document.set_uri("invalid");
+    std::string serialized_document = document.SerializeAsString();
+    ASSERT_TRUE(filesystem_.PWrite(file_path_.c_str(), document_offset,
+                                   serialized_document.data(),
+                                   serialized_document.size()));
+  }
+
+  {
+    // We can recover, and we don't have data loss.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+    EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+    EXPECT_FALSE(create_result.recalculated_checksum);
+
+    // We still have the corrupted content in our file, we didn't throw
+    // everything out.
+    EXPECT_THAT(
+        filesystem_.GetFileSize(file_path_.c_str()),
+        Gt(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+       DetectAndThrowOutCorruptContentWithDirtyBit) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    DocumentProto document =
+        DocumentBuilder()
+            .SetKey("namespace1", "uri1")
+            .AddStringProperty("string_property", "foo", "bar")
+            .Build();
+
+    // Write and persist the protos
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
+                               proto_log->WriteProto(document));
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(document_offset),
+                IsOkAndHolds(EqualsProto(document)));
+  }
+
+  {
+    // "Corrupt" the content written in the log. Make the corrupt document
+    // smaller than our original one so we don't accidentally write past our
+    // file.
+    DocumentProto document =
+        DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+    std::string serialized_document = document.SerializeAsString();
+    ASSERT_TRUE(filesystem_.PWrite(
+        file_path_.c_str(),
+        PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes,
+        serialized_document.data(), serialized_document.size()));
+
+    Header header = ReadHeader(filesystem_, file_path_);
+
+    // Set dirty bit to true to reflect that something changed in the log.
+    header.SetDirtyFlag(true);
+    header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+    WriteHeader(filesystem_, file_path_, header);
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_TRUE(create_result.has_data_loss());
+    EXPECT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
+
+    // We had to recalculate the checksum to detect the corruption.
+    EXPECT_TRUE(create_result.recalculated_checksum);
+
+    // We lost everything, file size is back down to the header.
+    EXPECT_THAT(
+        filesystem_.GetFileSize(file_path_.c_str()),
+        Eq(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
+
+    // At least the log is no longer dirty.
+    Header header = ReadHeader(filesystem_, file_path_);
+    EXPECT_FALSE(header.GetDirtyFlag());
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, DirtyBitFalseAlarmKeepsData) {
+  DocumentProto document =
+      DocumentBuilder().SetKey("namespace1", "uri1").Build();
+  int64_t document_offset;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(document_offset,
+                               proto_log->WriteProto(document));
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(document_offset),
+                IsOkAndHolds(EqualsProto(document)));
+  }
+
+  {
+    Header header = ReadHeader(filesystem_, file_path_);
+
+    // Simulate the dirty flag set as true, but no data has been changed yet.
+    // Maybe we crashed between writing the dirty flag and erasing a proto.
+    header.SetDirtyFlag(true);
+    header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+    WriteHeader(filesystem_, file_path_, header);
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+
+    // Even though nothing changed, the false alarm dirty bit should have
+    // triggered us to recalculate our checksum.
+    EXPECT_TRUE(create_result.recalculated_checksum);
+
+    // Check that our document still exists even though dirty bit was true.
+    EXPECT_THAT(proto_log->ReadProto(document_offset),
+                IsOkAndHolds(EqualsProto(document)));
+
+    Header header = ReadHeader(filesystem_, file_path_);
+    EXPECT_FALSE(header.GetDirtyFlag());
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+       PersistToDiskKeepsPersistedDataAndTruncatesExtraData) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace1", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace2", "uri2").Build();
+  int document1_offset, document2_offset;
+  int log_size;
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Write and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
+                               proto_log->WriteProto(document1));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    // Write, but don't explicitly persist the second proto
+    ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
+                               proto_log->WriteProto(document2));
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(document1_offset),
+                IsOkAndHolds(EqualsProto(document1)));
+    ASSERT_THAT(proto_log->ReadProto(document2_offset),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    log_size = filesystem_.GetFileSize(file_path_.c_str());
+    ASSERT_GT(log_size, 0);
+
+    // PersistToDisk happens implicitly during the destructor.
+  }
+
+  {
+    // The header rewind position and checksum aren't updated in this "system
+    // crash" scenario.
+
+    std::string bad_proto =
+        "some incomplete proto that we didn't finish writing before the "
+        "system crashed";
+    filesystem_.PWrite(file_path_.c_str(), log_size, bad_proto.data(),
+                       bad_proto.size());
+
+    // Double check that we actually wrote something to the underlying file
+    ASSERT_GT(filesystem_.GetFileSize(file_path_.c_str()), log_size);
+  }
+
+  {
+    // We can recover, but we have data loss
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_TRUE(create_result.has_data_loss());
+    ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
+    ASSERT_FALSE(create_result.recalculated_checksum);
+
+    // Check that everything was persisted across instances
+    ASSERT_THAT(proto_log->ReadProto(document1_offset),
+                IsOkAndHolds(EqualsProto(document1)));
+    ASSERT_THAT(proto_log->ReadProto(document2_offset),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    // We correctly rewound to the last good state.
+    ASSERT_EQ(log_size, filesystem_.GetFileSize(file_path_.c_str()));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+       DirtyBitIsFalseAfterPutAndPersistToDisk) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    DocumentProto document =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    // Write and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
+                               proto_log->WriteProto(document));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(document_offset),
+                IsOkAndHolds(EqualsProto(document)));
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+
+    // We previously persisted to disk so everything should be in a perfect
+    // state.
+    EXPECT_FALSE(create_result.has_data_loss());
+    EXPECT_FALSE(create_result.recalculated_checksum);
+
+    Header header = ReadHeader(filesystem_, file_path_);
+    EXPECT_FALSE(header.GetDirtyFlag());
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+       DirtyBitIsFalseAfterDeleteAndPersistToDisk) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    DocumentProto document =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    // Write, delete, and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
+                               proto_log->WriteProto(document));
+    ICING_ASSERT_OK(proto_log->EraseProto(document_offset));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    // The proto has been erased.
+    ASSERT_THAT(proto_log->ReadProto(document_offset),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+
+    // We previously persisted to disk so everything should be in a perfect
+    // state.
+    EXPECT_FALSE(create_result.has_data_loss());
+    EXPECT_FALSE(create_result.recalculated_checksum);
+
+    Header header = ReadHeader(filesystem_, file_path_);
+    EXPECT_FALSE(header.GetDirtyFlag());
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, DirtyBitIsFalseAfterPutAndDestructor) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    DocumentProto document =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    // Write and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
+                               proto_log->WriteProto(document));
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(document_offset),
+                IsOkAndHolds(EqualsProto(document)));
+
+    // PersistToDisk is implicitly called as part of the destructor and
+    // PersistToDisk will clear the dirty bit.
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+
+    // We previously persisted to disk so everything should be in a perfect
+    // state.
+    EXPECT_FALSE(create_result.has_data_loss());
+    EXPECT_FALSE(create_result.recalculated_checksum);
+
+    Header header = ReadHeader(filesystem_, file_path_);
+    EXPECT_FALSE(header.GetDirtyFlag());
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest,
+       DirtyBitIsFalseAfterDeleteAndDestructor) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    DocumentProto document =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    // Write, delete, and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
+                               proto_log->WriteProto(document));
+    ICING_ASSERT_OK(proto_log->EraseProto(document_offset));
+
+    // The proto has been erased.
+    ASSERT_THAT(proto_log->ReadProto(document_offset),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+    // PersistToDisk is implicitly called as part of the destructor and
+    // PersistToDisk will clear the dirty bit.
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+
+    // We previously persisted to disk so everything should be in a perfect
+    // state.
+    EXPECT_FALSE(create_result.has_data_loss());
+    EXPECT_FALSE(create_result.recalculated_checksum);
+
+    Header header = ReadHeader(filesystem_, file_path_);
+    EXPECT_FALSE(header.GetDirtyFlag());
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, Iterator) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "uri2").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(
+              compress_, max_proto_size_, compression_level_)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  {
+    // Empty iterator
+    auto iterator = proto_log->GetIterator();
+    ASSERT_THAT(iterator.Advance(),
+                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  }
+
+  {
+    // Iterates through some documents
+    ICING_ASSERT_OK(proto_log->WriteProto(document1));
+    ICING_ASSERT_OK(proto_log->WriteProto(document2));
+    auto iterator = proto_log->GetIterator();
+    // 1st proto
+    ICING_ASSERT_OK(iterator.Advance());
+    ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
+                IsOkAndHolds(EqualsProto(document1)));
+    // 2nd proto
+    ICING_ASSERT_OK(iterator.Advance());
+    ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
+                IsOkAndHolds(EqualsProto(document2)));
+    // Tries to advance
+    ASSERT_THAT(iterator.Advance(),
+                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  }
+
+  {
+    // Iterator with bad filesystem
+    ScopedFd sfd(filesystem_.OpenForRead(file_path_.c_str()));
+    MockFilesystem mock_filesystem;
+    ON_CALL(mock_filesystem, GetFileSize(A<int>()))
+        .WillByDefault(Return(Filesystem::kBadFileSize));
+    PortableFileBackedProtoLog<DocumentProto>::Iterator bad_iterator(
+        mock_filesystem, sfd.get(), /*initial_offset=*/0);
+    ASSERT_THAT(bad_iterator.Advance(),
+                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ComputeChecksum) {
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+  Crc32 checksum;
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    ICING_EXPECT_OK(proto_log->WriteProto(document));
+
+    ICING_ASSERT_OK_AND_ASSIGN(checksum, proto_log->ComputeChecksum());
+
+    // Calling it twice with no changes should get us the same checksum
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Checksum should be consistent across instances
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+
+    // PersistToDisk shouldn't affect the checksum value
+    ICING_EXPECT_OK(proto_log->PersistToDisk());
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+
+    // Check that modifying the log leads to a different checksum
+    ICING_EXPECT_OK(proto_log->WriteProto(document));
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Not(Eq(checksum))));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldSetZero) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "uri1").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(
+              compress_, max_proto_size_, compression_level_)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  // Writes and erases proto
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+                             proto_log->WriteProto(document1));
+  ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+  // Checks if the erased area is set to 0.
+  int64_t file_size = filesystem_.GetFileSize(file_path_.c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      MemoryMappedFile mmapped_file,
+      MemoryMappedFile::Create(filesystem_, file_path_,
+                               MemoryMappedFile::Strategy::READ_ONLY));
+
+  // document1_offset + sizeof(int) is the start byte of the proto where
+  // sizeof(int) is the size of the proto metadata.
+  ICING_ASSERT_OK(
+      mmapped_file.Remap(document1_offset + sizeof(int), file_size - 1));
+  for (size_t i = 0; i < mmapped_file.region_size(); ++i) {
+    ASSERT_THAT(mmapped_file.region()[i], Eq(0));
+  }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldReturnNotFound) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "uri2").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      PortableFileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          PortableFileBackedProtoLog<DocumentProto>::Options(
+              compress_, max_proto_size_, compression_level_)));
+  auto proto_log = std::move(create_result.proto_log);
+  ASSERT_FALSE(create_result.has_data_loss());
+
+  // Writes 2 protos
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+                             proto_log->WriteProto(document1));
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t document2_offset,
+                             proto_log->WriteProto(document2));
+
+  // Erases the first proto
+  ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+  // The first proto has been erased.
+  ASSERT_THAT(proto_log->ReadProto(document1_offset),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  // The second proto should be returned.
+  ASSERT_THAT(proto_log->ReadProto(document2_offset),
+              IsOkAndHolds(EqualsProto(document2)));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "uri2").Build();
+  DocumentProto document3 =
+      DocumentBuilder().SetKey("namespace", "uri3").Build();
+  DocumentProto document4 =
+      DocumentBuilder().SetKey("namespace", "uri4").Build();
+
+  int64_t document2_offset;
+  int64_t document3_offset;
+
+  {
+    // Erase data after the rewind position. This won't update the checksum
+    // immediately.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Writes 3 protos
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+                               proto_log->WriteProto(document1));
+    ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
+                               proto_log->WriteProto(document2));
+    ICING_ASSERT_OK_AND_ASSIGN(document3_offset,
+                               proto_log->WriteProto(document3));
+
+    // Erases the 1st proto, checksum won't be updated immediately because the
+    // rewind position is 0.
+    ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+    EXPECT_THAT(proto_log->ComputeChecksum(),
+                IsOkAndHolds(Eq(Crc32(2175574628))));
+  }  // New checksum is updated in destructor.
+
+  {
+    // Erase data before the rewind position. This will update the checksum
+    // immediately.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Erases the 2nd proto that is now before the rewind position. Checksum
+    // is updated.
+    ICING_ASSERT_OK(proto_log->EraseProto(document2_offset));
+
+    EXPECT_THAT(proto_log->ComputeChecksum(),
+                IsOkAndHolds(Eq(Crc32(790877774))));
+  }
+
+  {
+    // Append data and erase data before the rewind position. This will update
+    // the checksum twice: in EraseProto() and destructor.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_FALSE(create_result.has_data_loss());
+
+    // Append a new document which is after the rewind position.
+    ICING_ASSERT_OK(proto_log->WriteProto(document4));
+
+    // Erases the 3rd proto that is now before the rewind position. Checksum
+    // is updated.
+    ICING_ASSERT_OK(proto_log->EraseProto(document3_offset));
+
+    EXPECT_THAT(proto_log->ComputeChecksum(),
+                IsOkAndHolds(Eq(Crc32(2344803210))));
+  }  // Checksum is updated with the newly appended document.
+
+  {
+    // A successful creation means that the checksum matches.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        PortableFileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            PortableFileBackedProtoLog<DocumentProto>::Options(
+                compress_, max_proto_size_, compression_level_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.has_data_loss());
+  }
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/posting_list/flash-index-storage-header.h b/icing/file/posting_list/flash-index-storage-header.h
new file mode 100644
index 0000000..6bbf1ba
--- /dev/null
+++ b/icing/file/posting_list/flash-index-storage-header.h
@@ -0,0 +1,122 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_POSTING_LIST_FLASH_INDEX_STORAGE_HEADER_H_
+#define ICING_FILE_POSTING_LIST_FLASH_INDEX_STORAGE_HEADER_H_
+
+#include <cstdint>
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/filesystem.h"
+
+namespace icing {
+namespace lib {
+
+// The class used to manage the flash block that contains the header for
+// FlashIndexStorage. This contains information about the index blocks that
+// store the posting lists.
+class HeaderBlock {
+ public:
+  // The class used to access the actual header.
+  struct Header {
+    // A magic used to mark the beginning of a valid header.
+    static constexpr int kMagic = 0xb0780cf4;
+    int magic;
+    int block_size;
+    int last_indexed_docid;
+    // The size of the index_block_infos array.
+    int num_index_block_infos;
+
+    struct IndexBlockInfo {
+      // The size of the posting lists that fit on all the index blocks in this
+      // chain. Each block on this posting list will have posting lists of size
+      // posting_list_bytes.
+      int posting_list_bytes;
+      // The block index of the first block in the free list chain.
+      int free_list_block_index;
+    };
+    // Variable-size array, num_index_block_infos long. Can have a max length
+    // of log(block_size). This array is used to maintain a free list for the
+    // available blocks.
+    IndexBlockInfo index_block_infos[0];
+  };
+
+  // Read HeaderBlock from the specified fd.
+  //
+  // RETURNS:
+  //  - HeaderBlock, on success
+  //  - INTERNAL if unable to read block_size bytes from fd.
+  static libtextclassifier3::StatusOr<HeaderBlock> Read(
+      const Filesystem* filesystem, int fd, int block_size) {
+    std::unique_ptr<uint8_t[]> buffer = std::make_unique<uint8_t[]>(block_size);
+    if (!filesystem->PRead(fd, buffer.get(), block_size, 0)) {
+      return absl_ports::InternalError("Unable to reader header block!");
+    }
+    return HeaderBlock(filesystem, std::move(buffer), block_size);
+  }
+
+  // Make a new HeaderBlock with the specified size.
+  explicit HeaderBlock(const Filesystem* filesystem, int block_size)
+      : HeaderBlock(filesystem, std::make_unique<uint8_t[]>(block_size),
+                    block_size) {
+    std::memset(header_buffer_.get(), 0, block_size);
+  }
+
+  Header* header() const {
+    return reinterpret_cast<Header*>(header_buffer_.get());
+  }
+
+  // Add another entry to the index_block_infos array and return a pointer to
+  // that entry. Returns a nullptr if the index_block_infos array is already
+  // at a max size.
+  Header::IndexBlockInfo* AddIndexBlockInfo() {
+    if (size() + sizeof(Header::IndexBlockInfo) > block_size_) {
+      return nullptr;
+    }
+    ++header()->num_index_block_infos;
+    return header()->index_block_infos + (header()->num_index_block_infos - 1);
+  }
+
+  // Returns the size of the header block currently in use.
+  int size() const {
+    return sizeof(Header) +
+           header()->num_index_block_infos * sizeof(Header::IndexBlockInfo);
+  }
+
+  // Writes the header to fd. Returns true on success.
+  bool Write(int fd) {
+    return filesystem_->PWrite(fd, 0, header_buffer_.get(), block_size_);
+  }
+
+ private:
+  explicit HeaderBlock(const Filesystem* filesystem,
+                       std::unique_ptr<uint8_t[]> buffer, int block_size)
+      : filesystem_(filesystem),
+        header_buffer_(std::move(buffer)),
+        block_size_(block_size) {}
+
+  const Filesystem* filesystem_;  // does NOT own!
+  std::unique_ptr<uint8_t[]> header_buffer_;
+  int block_size_;
+};
+static_assert(16 == sizeof(HeaderBlock::Header),
+              "Header has changed size. Consider how this change might affect "
+              "pre-existing indices.");
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_POSTING_LIST_FLASH_INDEX_STORAGE_HEADER_H_
diff --git a/icing/file/posting_list/flash-index-storage.cc b/icing/file/posting_list/flash-index-storage.cc
new file mode 100644
index 0000000..2198d2c
--- /dev/null
+++ b/icing/file/posting_list/flash-index-storage.cc
@@ -0,0 +1,661 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/posting_list/flash-index-storage.h"
+
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cerrno>
+#include <cinttypes>
+#include <cstdint>
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/posting_list/index-block.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/math-util.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::StatusOr<FlashIndexStorage> FlashIndexStorage::Create(
+    std::string index_filename, const Filesystem* filesystem,
+    PostingListSerializer* serializer, bool in_memory) {
+  ICING_RETURN_ERROR_IF_NULL(filesystem);
+  ICING_RETURN_ERROR_IF_NULL(serializer);
+
+  FlashIndexStorage storage(filesystem, std::move(index_filename), serializer,
+                            in_memory);
+  if (!storage.Init()) {
+    return absl_ports::InternalError(
+        "Unable to successfully read header block!");
+  }
+  return storage;
+}
+
+/* static */ libtextclassifier3::StatusOr<int>
+FlashIndexStorage::ReadHeaderMagic(const Filesystem* filesystem,
+                                   const std::string& index_filename) {
+  ICING_RETURN_ERROR_IF_NULL(filesystem);
+
+  if (!filesystem->FileExists(index_filename.c_str())) {
+    return absl_ports::NotFoundError("Flash index file doesn't exist");
+  }
+
+  ScopedFd sfd(filesystem->OpenForRead(index_filename.c_str()));
+  if (!sfd.is_valid()) {
+    return absl_ports::InternalError("Fail to open flash index file");
+  }
+
+  uint32_t block_size = SelectBlockSize();
+  // Read and validate header.
+  ICING_ASSIGN_OR_RETURN(HeaderBlock header_block,
+                         HeaderBlock::Read(filesystem, sfd.get(), block_size));
+  return header_block.header()->magic;
+}
+
+FlashIndexStorage::~FlashIndexStorage() {
+  if (header_block_ != nullptr) {
+    libtextclassifier3::Status status = FlushInMemoryFreeList();
+    if (!status.ok()) {
+      ICING_LOG(ERROR) << "Cannot flush in memory free list: "
+                       << status.error_message();
+    }
+    PersistToDisk();
+  }
+}
+
+/* static */ uint32_t FlashIndexStorage::SelectBlockSize() {
+  // This should be close to the flash page size.
+  static constexpr uint32_t kMinBlockSize = 4096;
+
+  // Determine a good block size.
+  uint32_t page_size = getpagesize();
+  uint32_t block_size = std::max(kMinBlockSize, page_size);
+
+  // Align up to the nearest page size.
+  return math_util::RoundUpTo(block_size, page_size);
+}
+
+bool FlashIndexStorage::Init() {
+  storage_sfd_ = ScopedFd(filesystem_->OpenForWrite(index_filename_.c_str()));
+  if (!storage_sfd_.is_valid()) {
+    return false;
+  }
+
+  // Read in or create the header.
+  return InitHeader();
+}
+
+bool FlashIndexStorage::InitHeader() {
+  // Look for an existing file size.
+  int64_t file_size = filesystem_->GetFileSize(storage_sfd_.get());
+  if (file_size == Filesystem::kBadFileSize) {
+    ICING_LOG(ERROR) << "Could not initialize main index. Bad file size.";
+    return false;
+  }
+
+  if (file_size == 0) {
+    if (!CreateHeader()) {
+      ICING_LOG(ERROR)
+          << "Could not initialize main index. Unable to create header.";
+      return false;
+    }
+  } else {
+    if (!OpenHeader(file_size)) {
+      ICING_LOG(ERROR)
+          << "Could not initialize main index. Unable to open header.";
+      return false;
+    }
+  }
+  in_memory_freelists_.resize(header_block_->header()->num_index_block_infos);
+
+  return true;
+}
+
+bool FlashIndexStorage::CreateHeader() {
+  uint32_t block_size = SelectBlockSize();
+  header_block_ = std::make_unique<HeaderBlock>(filesystem_, block_size);
+  // Initialize.
+  header_block_->header()->magic = HeaderBlock::Header::kMagic;
+  header_block_->header()->block_size = block_size;
+  header_block_->header()->last_indexed_docid = kInvalidDocumentId;
+
+  // Work down from the largest posting list that fits in
+  // block_size. We don't care about locality of blocks because this
+  // is a flash index.
+  for (uint32_t posting_list_bytes = max_posting_list_bytes();
+       posting_list_bytes >= serializer_->GetMinPostingListSize();
+       posting_list_bytes /= 2) {
+    uint32_t aligned_posting_list_bytes =
+        (posting_list_bytes / serializer_->GetDataTypeBytes()) *
+        serializer_->GetDataTypeBytes();
+    ICING_VLOG(1) << "Block size "
+                  << header_block_->header()->num_index_block_infos << ": "
+                  << aligned_posting_list_bytes;
+
+    // Initialize free list to empty.
+    HeaderBlock::Header::IndexBlockInfo* block_info =
+        header_block_->AddIndexBlockInfo();
+    if (block_info == nullptr) {
+      // This should never happen anyways. Min block size is 4k, so adding these
+      // IndexBlockInfos should never exceed the block size.
+      return false;
+    }
+    block_info->posting_list_bytes = aligned_posting_list_bytes;
+    block_info->free_list_block_index = kInvalidBlockIndex;
+  }
+
+  // Write the header.
+  if (!header_block_->Write(storage_sfd_.get())) {
+    filesystem_->Truncate(storage_sfd_.get(), 0);
+    return false;
+  }
+  num_blocks_ = 1;
+  return true;
+}
+
+bool FlashIndexStorage::OpenHeader(int64_t file_size) {
+  uint32_t block_size = SelectBlockSize();
+  // Read and validate header.
+  ICING_ASSIGN_OR_RETURN(
+      HeaderBlock read_header,
+      HeaderBlock::Read(filesystem_, storage_sfd_.get(), block_size), false);
+  if (read_header.header()->magic != HeaderBlock::Header::kMagic) {
+    ICING_LOG(ERROR) << "Index header block wrong magic";
+    return false;
+  }
+  if (file_size % read_header.header()->block_size != 0) {
+    ICING_LOG(ERROR) << "Index size " << file_size
+                     << " not a multiple of block size "
+                     << read_header.header()->block_size;
+    return false;
+  }
+
+  if (file_size < static_cast<int64_t>(read_header.header()->block_size)) {
+    ICING_LOG(ERROR) << "Index size " << file_size
+                     << " shorter than block size "
+                     << read_header.header()->block_size;
+    return false;
+  }
+
+  if (read_header.header()->block_size % getpagesize() != 0) {
+    ICING_LOG(ERROR) << "Block size " << read_header.header()->block_size
+                     << " is not a multiple of page size " << getpagesize();
+    return false;
+  }
+  num_blocks_ = file_size / read_header.header()->block_size;
+  if (block_size != read_header.header()->block_size) {
+    // The block_size changed? That's weird. But the old block_size is still
+    // valid (it must be some multiple of the new block_size). So reinitialize
+    // with that old block size. Using the old block size means that we can
+    // still use the main index, but reads/writes won't be as efficient in terms
+    // of flash IO because the 'blocks' that we're reading are actually multiple
+    // pages long.
+    ICING_LOG(ERROR) << "Block size of existing header ("
+                     << read_header.header()->block_size
+                     << ") does not match the requested block size ("
+                     << block_size << "). Defaulting to existing block size "
+                     << read_header.header()->block_size;
+    ICING_ASSIGN_OR_RETURN(HeaderBlock read_header,
+                           HeaderBlock::Read(filesystem_, storage_sfd_.get(),
+                                             read_header.header()->block_size),
+                           false);
+  }
+  header_block_ = std::make_unique<HeaderBlock>(std::move(read_header));
+
+  // Check for memory alignment on posting_list_bytes. See b/29983315.
+  // The issue of potential corruption to the header could also be handled by
+  // checksumming the header block.
+  for (int i = 0; i < header_block_->header()->num_index_block_infos; ++i) {
+    int posting_list_bytes =
+        header_block_->header()->index_block_infos[i].posting_list_bytes;
+    if (posting_list_bytes % serializer_->GetDataTypeBytes() != 0) {
+      ICING_LOG(ERROR)
+          << "Posting list size misaligned, index " << i << ", size "
+          << header_block_->header()->index_block_infos[i].posting_list_bytes
+          << ", data_type_bytes " << serializer_->GetDataTypeBytes()
+          << ", file_size " << file_size;
+      return false;
+    }
+  }
+  return true;
+}
+
+bool FlashIndexStorage::PersistToDisk() {
+  // First, write header.
+  if (!header_block_->Write(storage_sfd_.get())) {
+    ICING_LOG(ERROR) << "Write index header failed: " << strerror(errno);
+    return false;
+  }
+
+  // Then sync.
+  return filesystem_->DataSync(storage_sfd_.get());
+}
+
+libtextclassifier3::Status FlashIndexStorage::Reset() {
+  // Reset in-memory members to default values.
+  num_blocks_ = 0;
+  header_block_.reset();
+  storage_sfd_.reset();
+  in_memory_freelists_.clear();
+
+  // Delete the underlying file.
+  if (!filesystem_->DeleteFile(index_filename_.c_str())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Unable to delete file: ", index_filename_));
+  }
+
+  // Re-initialize.
+  if (!Init()) {
+    return absl_ports::InternalError(
+        "Unable to successfully read header block!");
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<PostingListHolder>
+FlashIndexStorage::GetPostingList(PostingListIdentifier id) const {
+  ICING_ASSIGN_OR_RETURN(IndexBlock block, GetIndexBlock(id.block_index()));
+  ICING_ASSIGN_OR_RETURN(
+      IndexBlock::PostingListAndBlockInfo pl_block_info,
+      block.GetAllocatedPostingList(id.posting_list_index()));
+  return PostingListHolder(std::move(pl_block_info.posting_list_used), id,
+                           pl_block_info.next_block_index);
+}
+
+libtextclassifier3::StatusOr<IndexBlock> FlashIndexStorage::GetIndexBlock(
+    uint32_t block_index) const {
+  if (block_index >= num_blocks_) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Unable to create an index block at index %" PRIu32
+        " when only %d blocks have been allocated.",
+        block_index, num_blocks_));
+  }
+  off_t offset = static_cast<off_t>(block_index) * block_size();
+  return IndexBlock::CreateFromPreexistingIndexBlockRegion(
+      filesystem_, serializer_, storage_sfd_.get(), offset, block_size());
+}
+
+libtextclassifier3::StatusOr<IndexBlock> FlashIndexStorage::CreateIndexBlock(
+    uint32_t block_index, uint32_t posting_list_size) const {
+  if (block_index >= num_blocks_) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Unable to create an index block at index %" PRIu32
+        " when only %d blocks have been allocated.",
+        block_index, num_blocks_));
+  }
+  off_t offset = static_cast<off_t>(block_index) * block_size();
+  return IndexBlock::CreateFromUninitializedRegion(
+      filesystem_, serializer_, storage_sfd_.get(), offset, block_size(),
+      posting_list_size);
+}
+
+int FlashIndexStorage::FindBestIndexBlockInfo(
+    uint32_t posting_list_bytes) const {
+  int i = header_block_->header()->num_index_block_infos - 1;
+  for (; i >= 0; i--) {
+    if (header_block_->header()->index_block_infos[i].posting_list_bytes >=
+        posting_list_bytes) {
+      return i;
+    }
+  }
+  return i;
+}
+
+libtextclassifier3::StatusOr<PostingListHolder>
+FlashIndexStorage::GetPostingListFromInMemoryFreeList(int block_info_index) {
+  // Get something from in memory free list.
+  ICING_ASSIGN_OR_RETURN(PostingListIdentifier posting_list_id,
+                         in_memory_freelists_[block_info_index].TryPop());
+  // Remember, posting lists stored on the in-memory free list were never
+  // actually freed. So it will still contain a valid PostingListUsed. First, we
+  // need to free this posting list.
+  ICING_ASSIGN_OR_RETURN(IndexBlock block,
+                         GetIndexBlock(posting_list_id.block_index()));
+  ICING_RETURN_IF_ERROR(
+      block.FreePostingList(posting_list_id.posting_list_index()));
+
+  // Now, we can allocate a posting list from the same index block. It may not
+  // be the same posting list that was just freed, but that's okay.
+  ICING_ASSIGN_OR_RETURN(IndexBlock::PostingListAndBlockInfo pl_block_info,
+                         block.AllocatePostingList());
+  posting_list_id = PostingListIdentifier(
+      posting_list_id.block_index(), pl_block_info.posting_list_index,
+      posting_list_id.posting_list_index_bits());
+
+  return PostingListHolder(std::move(pl_block_info.posting_list_used),
+                           posting_list_id, pl_block_info.next_block_index);
+}
+
+libtextclassifier3::StatusOr<PostingListHolder>
+FlashIndexStorage::GetPostingListFromOnDiskFreeList(int block_info_index) {
+  // Get something from the free list.
+  uint32_t block_index = header_block_->header()
+                             ->index_block_infos[block_info_index]
+                             .free_list_block_index;
+  if (block_index == kInvalidBlockIndex) {
+    return absl_ports::NotFoundError("No available entry in free list.");
+  }
+
+  // Get the index block
+  ICING_ASSIGN_OR_RETURN(IndexBlock block, GetIndexBlock(block_index));
+  ICING_ASSIGN_OR_RETURN(IndexBlock::PostingListAndBlockInfo pl_block_info,
+                         block.AllocatePostingList());
+  PostingListIdentifier posting_list_id =
+      PostingListIdentifier(block_index, pl_block_info.posting_list_index,
+                            block.posting_list_index_bits());
+  if (!pl_block_info.has_free_posting_lists) {
+    ICING_RETURN_IF_ERROR(
+        RemoveFromOnDiskFreeList(block_index, block_info_index, &block));
+  }
+
+  return PostingListHolder(std::move(pl_block_info.posting_list_used),
+                           posting_list_id, pl_block_info.next_block_index);
+}
+
+libtextclassifier3::StatusOr<PostingListHolder>
+FlashIndexStorage::AllocateNewPostingList(int block_info_index) {
+  uint32_t block_index = GrowIndex();
+  if (block_index == kInvalidBlockIndex) {
+    return absl_ports::ResourceExhaustedError(
+        "Unable to grow the index further!");
+  }
+  ICING_ASSIGN_OR_RETURN(
+      IndexBlock block,
+      CreateIndexBlock(block_index, header_block_->header()
+                                        ->index_block_infos[block_info_index]
+                                        .posting_list_bytes));
+  ICING_ASSIGN_OR_RETURN(IndexBlock::PostingListAndBlockInfo pl_block_info,
+                         block.AllocatePostingList());
+  PostingListIdentifier posting_list_id =
+      PostingListIdentifier(block_index, pl_block_info.posting_list_index,
+                            block.posting_list_index_bits());
+  if (pl_block_info.has_free_posting_lists) {
+    AddToOnDiskFreeList(block_index, block_info_index, &block);
+  }
+
+  return PostingListHolder(std::move(pl_block_info.posting_list_used),
+                           posting_list_id, pl_block_info.next_block_index);
+}
+
+libtextclassifier3::StatusOr<PostingListHolder>
+FlashIndexStorage::AllocatePostingList(uint32_t min_posting_list_bytes) {
+  int max_pl_size = max_posting_list_bytes();
+  if (min_posting_list_bytes > max_pl_size) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Requested posting list size %d exceeds max posting list size %d",
+        min_posting_list_bytes, max_pl_size));
+  }
+  int best_block_info_index = FindBestIndexBlockInfo(min_posting_list_bytes);
+
+  auto holder_or = GetPostingListFromInMemoryFreeList(best_block_info_index);
+  if (holder_or.ok()) {
+    return std::move(holder_or).ValueOrDie();
+  }
+
+  // Nothing in memory. Look for something in the block file.
+  holder_or = GetPostingListFromOnDiskFreeList(best_block_info_index);
+  if (holder_or.ok()) {
+    return std::move(holder_or).ValueOrDie();
+  }
+
+  return AllocateNewPostingList(best_block_info_index);
+}
+
+libtextclassifier3::StatusOr<PostingListHolder>
+FlashIndexStorage::AllocateAndChainMaxSizePostingList(
+    uint32_t prev_block_index) {
+  uint32_t max_pl_size = max_posting_list_bytes();
+  int best_block_info_index = FindBestIndexBlockInfo(max_pl_size);
+
+  auto holder_or = GetPostingListFromInMemoryFreeList(best_block_info_index);
+  if (!holder_or.ok()) {
+    // Nothing in memory. Look for something in the block file.
+    holder_or = GetPostingListFromOnDiskFreeList(best_block_info_index);
+  }
+
+  if (!holder_or.ok()) {
+    // Nothing in memory or block file. Allocate new block and posting list.
+    holder_or = AllocateNewPostingList(best_block_info_index);
+  }
+
+  if (!holder_or.ok()) {
+    return holder_or;
+  }
+
+  PostingListHolder holder = std::move(holder_or).ValueOrDie();
+  ICING_ASSIGN_OR_RETURN(IndexBlock block,
+                         GetIndexBlock(holder.id.block_index()));
+  ICING_RETURN_IF_ERROR(block.SetNextBlockIndex(prev_block_index));
+  holder.next_block_index = prev_block_index;
+  return holder;
+}
+
+void FlashIndexStorage::AddToOnDiskFreeList(uint32_t block_index,
+                                            int block_info_index,
+                                            IndexBlock* index_block) {
+  libtextclassifier3::Status status =
+      index_block->SetNextBlockIndex(header_block_->header()
+                                         ->index_block_infos[block_info_index]
+                                         .free_list_block_index);
+  if (!status.ok()) {
+    // If an error occurs, then simply skip this block. It just prevents us from
+    // allocating posting lists from this free block in the future and thus
+    // wastes at most one block, but the entire storage (including the
+    // FlashIndexStorage header) is still valid. Therefore, we can swallow
+    // errors here.
+    ICING_VLOG(1) << "Fail to set next block index to chain blocks with free "
+                     "lists on disk: "
+                  << status.error_message();
+    return;
+  }
+
+  header_block_->header()
+      ->index_block_infos[block_info_index]
+      .free_list_block_index = block_index;
+}
+
+libtextclassifier3::Status FlashIndexStorage::RemoveFromOnDiskFreeList(
+    uint32_t block_index, int block_info_index, IndexBlock* index_block) {
+  // Cannot be used anymore. Move free ptr to the next block.
+  ICING_ASSIGN_OR_RETURN(uint32_t next_block_index,
+                         index_block->GetNextBlockIndex());
+  ICING_RETURN_IF_ERROR(index_block->SetNextBlockIndex(kInvalidBlockIndex));
+  header_block_->header()
+      ->index_block_infos[block_info_index]
+      .free_list_block_index = next_block_index;
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status FlashIndexStorage::FreePostingList(
+    PostingListHolder&& holder) {
+  ICING_ASSIGN_OR_RETURN(IndexBlock block,
+                         GetIndexBlock(holder.id.block_index()));
+  if (block.posting_list_bytes() == max_posting_list_bytes()) {
+    ICING_RETURN_IF_ERROR(block.SetNextBlockIndex(kInvalidBlockIndex));
+  }
+
+  uint32_t posting_list_bytes = block.posting_list_bytes();
+  int best_block_info_index = FindBestIndexBlockInfo(posting_list_bytes);
+
+  // It *should* be guaranteed elsewhere that FindBestIndexBlockInfo will not
+  // return a value in >= in_memory_freelists_, but check regardless. If it
+  // doesn't fit for some reason, then put it in the Header free list instead.
+  if (has_in_memory_freelists_ &&
+      best_block_info_index < in_memory_freelists_.size()) {
+    in_memory_freelists_[best_block_info_index].Push(holder.id);
+  } else {
+    ICING_ASSIGN_OR_RETURN(bool was_not_full, block.HasFreePostingLists());
+    ICING_RETURN_IF_ERROR(
+        block.FreePostingList(holder.id.posting_list_index()));
+    // If this block was not already full, then it is already in the free list.
+    if (!was_not_full) {
+      AddToOnDiskFreeList(holder.id.block_index(), best_block_info_index,
+                          &block);
+    }
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status FlashIndexStorage::WritePostingListToDisk(
+    const PostingListHolder& holder) {
+  ICING_ASSIGN_OR_RETURN(IndexBlock block,
+                         GetIndexBlock(holder.id.block_index()));
+  return block.WritePostingListToDisk(holder.posting_list,
+                                      holder.id.posting_list_index());
+}
+
+int FlashIndexStorage::GrowIndex() {
+  if (num_blocks_ >= kMaxBlockIndex) {
+    ICING_VLOG(1) << "Reached max block index " << kMaxBlockIndex;
+    return kInvalidBlockIndex;
+  }
+
+  // Grow the index file.
+  if (!filesystem_->Grow(
+          storage_sfd_.get(),
+          static_cast<uint64_t>(num_blocks_ + 1) * block_size())) {
+    ICING_VLOG(1) << "Error growing index file: " << strerror(errno);
+    return kInvalidBlockIndex;
+  }
+
+  return num_blocks_++;
+}
+
+libtextclassifier3::Status FlashIndexStorage::FlushInMemoryFreeList() {
+  for (int i = 0; i < in_memory_freelists_.size(); ++i) {
+    FreeList& freelist = in_memory_freelists_.at(i);
+    auto freelist_elt_or = freelist.TryPop();
+    while (freelist_elt_or.ok()) {
+      PostingListIdentifier freelist_elt = freelist_elt_or.ValueOrDie();
+      // Remember, posting lists stored on the in-memory free list were never
+      // actually freed. So it will still contain a valid PostingListUsed.
+      // First, we need to free this posting list.
+      auto block_or = GetIndexBlock(freelist_elt.block_index());
+      if (!block_or.ok()) {
+        // Can't read the block. Nothing to do here. This posting list will have
+        // to leak. Just proceed to the next freelist element.
+        freelist_elt_or = freelist.TryPop();
+        continue;
+      }
+      IndexBlock block = std::move(block_or).ValueOrDie();
+      ICING_ASSIGN_OR_RETURN(bool was_not_full, block.HasFreePostingLists());
+      ICING_RETURN_IF_ERROR(
+          block.FreePostingList(freelist_elt.posting_list_index()));
+      // If this block was not already full, then it is already in the free
+      // list.
+      if (!was_not_full) {
+        AddToOnDiskFreeList(freelist_elt.block_index(), /*block_info_index=*/i,
+                            &block);
+      }
+      freelist_elt_or = freelist.TryPop();
+    }
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+void FlashIndexStorage::GetDebugInfo(DebugInfoVerbosity::Code verbosity,
+                                     std::string* out) const {
+  // Dump and check integrity of the index block free lists.
+  out->append("Free lists:\n");
+  for (size_t i = 0; i < header_block_->header()->num_index_block_infos; ++i) {
+    // TODO(tjbarron) Port over StringAppendFormat to migrate off of this legacy
+    // util.
+    IcingStringUtil::SStringAppendF(
+        out, 100, "Posting list bytes %u: ",
+        header_block_->header()->index_block_infos[i].posting_list_bytes);
+    uint32_t block_index =
+        header_block_->header()->index_block_infos[i].free_list_block_index;
+    int count = 0;
+    while (block_index != kInvalidBlockIndex) {
+      auto block_or = GetIndexBlock(block_index);
+      IcingStringUtil::SStringAppendF(out, 100, "%u ", block_index);
+      ++count;
+
+      block_index = kInvalidBlockIndex;
+      if (block_or.ok()) {
+        auto block_index_or = block_or.ValueOrDie().GetNextBlockIndex();
+        if (block_index_or.ok()) {
+          block_index = block_index_or.ValueOrDie();
+        }
+      }
+    }
+    IcingStringUtil::SStringAppendF(out, 100, "(count=%d)\n", count);
+  }
+
+  out->append("In memory free lists:\n");
+  if (in_memory_freelists_.size() ==
+      header_block_->header()->num_index_block_infos) {
+    for (size_t i = 0; i < in_memory_freelists_.size(); ++i) {
+      IcingStringUtil::SStringAppendF(
+          out, 100, "Posting list bytes %u %s\n",
+          header_block_->header()->index_block_infos[i].posting_list_bytes,
+          in_memory_freelists_.at(i).DebugString().c_str());
+    }
+  } else {
+    IcingStringUtil::SStringAppendF(
+        out, 100,
+        "In memory free list size %zu doesn't match index block infos size "
+        "%d\n",
+        in_memory_freelists_.size(),
+        header_block_->header()->num_index_block_infos);
+  }
+}
+
+// FreeList.
+void FlashIndexStorage::FreeList::Push(PostingListIdentifier id) {
+  if (free_list_.size() >= kMaxSize) {
+    ICING_LOG(WARNING)
+        << "Freelist for posting lists of size (block_size / "
+        << (1u << id.posting_list_index_bits())
+        << ") has reached max size. Dropping freed posting list [block_index:"
+        << id.block_index()
+        << ", posting_list_index:" << id.posting_list_index() << "]";
+    ++num_dropped_free_list_entries_;
+    return;
+  }
+
+  free_list_.push_back(id);
+  free_list_size_high_watermark_ = std::max(
+      free_list_size_high_watermark_, static_cast<int>(free_list_.size()));
+}
+
+libtextclassifier3::StatusOr<PostingListIdentifier>
+FlashIndexStorage::FreeList::TryPop() {
+  if (free_list_.empty()) {
+    return absl_ports::NotFoundError("No available entry in free list.");
+  }
+
+  PostingListIdentifier id = free_list_.back();
+  free_list_.pop_back();
+  return id;
+}
+
+std::string FlashIndexStorage::FreeList::DebugString() const {
+  return IcingStringUtil::StringPrintf(
+      "size %zu max %d dropped %d", free_list_.size(),
+      free_list_size_high_watermark_, num_dropped_free_list_entries_);
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/posting_list/flash-index-storage.h b/icing/file/posting_list/flash-index-storage.h
new file mode 100644
index 0000000..378b2dc
--- /dev/null
+++ b/icing/file/posting_list/flash-index-storage.h
@@ -0,0 +1,381 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_POSTING_LIST_FLASH_INDEX_STORAGE_H_
+#define ICING_FILE_POSTING_LIST_FLASH_INDEX_STORAGE_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage-header.h"
+#include "icing/file/posting_list/index-block.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// PostingListHolder: group PostingListUsed, id, and some other useful info for
+// callers.
+struct PostingListHolder {
+  // PostingListUsed owns an in-memory posting list data buffer. The data being
+  // interpreted is initialized via PRead from the storage. As such, we should
+  // sync it to disk after modifying it.
+  PostingListUsed posting_list;
+
+  // The PostingListIdentifier, which identifies both the block index and the
+  // posting list index on that block, is also returned for convenience.
+  PostingListIdentifier id;
+
+  // Next block index is also returned for convenience. If PostingListUsed is a
+  // max-sized posting list, then the caller has to use this value to handle
+  // chained max-sized posting list blocks.
+  uint32_t next_block_index;
+
+  explicit PostingListHolder(PostingListUsed&& posting_list_in,
+                             PostingListIdentifier id_in,
+                             uint32_t next_block_index_in)
+      : posting_list(std::move(posting_list_in)),
+        id(id_in),
+        next_block_index(next_block_index_in) {}
+};
+
+// The FlashIndexStorage class manages the actual file that makes up blocks for
+// posting lists. It allocates IndexBlocks as needed and maintains freelists to
+// prevent excessive block fragmentation.
+//
+// It maintains two types of free lists:
+//   1. On-disk, Header free list - This free list is stored in the Header
+//      block. There is a free list for every possible posting list size. Each
+//      entry for a posting list size contains the block_index of the
+//      IndexBlock that starts the free list chain. Each IndexBlock in the free
+//      list chain stores the index of the next IndexBlock in the chain.
+//   2. In-memory free list - Like the Header free list, there is a free list of
+//      every possible posting list size. This free list contains not just the
+//      block_index of the available IndexBlock, but also the posting_list_index
+//      of the available PostingListUsed within the IndexBlock. This is because,
+//      unlike the Header free list, PostingListUseds are not actually freed
+//      when added to this free list.
+//
+// Whether or not the in-memory free list is used can be chosen via the
+// in_memory param to the Create factory function.
+//
+// The advantage of using the in-memory free list is that it reduces the amount
+// of flash writes made while editing the index (because actually freeing the
+// PostingLists would require writing to that flash block). The disadvantage is
+// that it introduces code complexity and potentially leaks blocks if power is
+// lost or if FlashIndexStorage is destroyed before emptying the free list.
+class FlashIndexStorage {
+ public:
+  // Creates a FlashIndexStorage at index_filename. in_memory determines whether
+  // or not the FlashIndexStorage maintains an in-memory freelist in order to
+  // avoid writes to the on-disk freelist.
+  //
+  // RETURNS:
+  //   - On success, a valid instance of FlashIndexStorage
+  //   - FAILED_PRECONDITION_ERROR if filesystem or serializer is null
+  //   - INTERNAL_ERROR if unable to create a new header or read the existing
+  //     one from disk.
+  static libtextclassifier3::StatusOr<FlashIndexStorage> Create(
+      std::string index_filename, const Filesystem* filesystem,
+      PostingListSerializer* serializer, bool in_memory = true);
+
+  // Reads magic from existing file header. We need this during Icing
+  // initialization phase to determine the version.
+  //
+  // RETURNS:
+  //   - On success, a valid magic
+  //   - FAILED_PRECONDITION_ERROR if filesystem is null
+  //   - NOT_FOUND_ERROR if the flash index file doesn't exist
+  //   - INTERNAL_ERROR on I/O error
+  static libtextclassifier3::StatusOr<int> ReadHeaderMagic(
+      const Filesystem* filesystem, const std::string& index_filename);
+
+  FlashIndexStorage(FlashIndexStorage&&) = default;
+  FlashIndexStorage(const FlashIndexStorage&) = delete;
+  FlashIndexStorage& operator=(FlashIndexStorage&&) = default;
+  FlashIndexStorage& operator=(const FlashIndexStorage&) = delete;
+
+  ~FlashIndexStorage();
+
+  // Selects block size to use.
+  static uint32_t SelectBlockSize();
+
+  // Retrieves the PostingList referred to by PostingListIdentifier. This
+  // posting list must have been previously allocated by a prior call to
+  // AllocatePostingList.
+  //
+  // RETURNS:
+  //   - On success, a valid instance of PostingListHolder containing the
+  //     requested PostingListUsed.
+  //   - Any IndexBlock errors
+  libtextclassifier3::StatusOr<PostingListHolder> GetPostingList(
+      PostingListIdentifier id) const;
+
+  // Allocates and returns a PostingListHolder containing a PostingListUsed that
+  // can fit min_posting_list_bytes.
+  //
+  // RETURNS:
+  //   - On success, a valid instance of PostingListHolder containing the
+  //     requested PostingListUsed.
+  //   - INVALID_ARGUMENT_ERROR if min_posting_list_bytes >
+  //     max_posting_list_bytes()
+  //   - RESOURCE_EXHAUSTED_ERROR if unable to grow the index to create a
+  //     PostingListUsed of the requested size.
+  //   - Any IndexBlock errors
+  libtextclassifier3::StatusOr<PostingListHolder> AllocatePostingList(
+      uint32_t min_posting_list_bytes);
+
+  // Allocates a new IndexBlock with a single max-sized PostingListUsed. This
+  // chains index blocks by setting the next_block_index field of this new
+  // block's header to be prev_block_index and returns a PostingListHolder
+  // containing a max-sized PostingListUsed.
+  //
+  // RETURNS:
+  //   - On success, a valid instance of PostingListHolder containing the
+  //     requested PostingListUsed.
+  //   - RESOURCE_EXHAUSTED_ERROR if unable to grow the index to create a
+  //     PostingListUsed of max size
+  //   - Any IndexBlock errors
+  libtextclassifier3::StatusOr<PostingListHolder>
+  AllocateAndChainMaxSizePostingList(uint32_t prev_block_index);
+
+  // Frees the PostingListUsed that this holder holds.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - Any IndexBlock errors
+  libtextclassifier3::Status FreePostingList(PostingListHolder&& holder);
+
+  // Writes back the PostingListUsed that this holder holds to disk.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - Any IndexBlock errors
+  libtextclassifier3::Status WritePostingListToDisk(
+      const PostingListHolder& holder);
+
+  // Discards all existing data by deleting the existing file and
+  // re-initializing a new one.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR if unable to delete existing files or initialize a new
+  //     file with header
+  libtextclassifier3::Status Reset();
+
+  // Used to track the largest docid indexed in the index.
+  DocumentId get_last_indexed_docid() const {
+    return header_block_->header()->last_indexed_docid;
+  }
+  void set_last_indexed_docid(DocumentId docid) {
+    header_block_->header()->last_indexed_docid = docid;
+  }
+
+  // Updates the header and persists all changes to the index to disk. Returns
+  // true on success.
+  bool PersistToDisk();
+
+  // Returns the size of the index file in bytes.
+  int64_t GetDiskUsage() const {
+    return filesystem_->GetDiskUsage(storage_sfd_.get());
+  }
+
+  // Returns the size of the index file used to contains data.
+  uint64_t GetElementsSize() const {
+    // Element size is the same as disk size excluding the header block.
+    return GetDiskUsage() - block_size();
+  }
+
+  int num_blocks() const { return num_blocks_; }
+
+  // Gets the byte size of max sized posting list.
+  uint32_t max_posting_list_bytes() const {
+    return IndexBlock::CalculateMaxPostingListBytes(
+        block_size(), serializer_->GetDataTypeBytes());
+  }
+
+  // Info about the index based on the block size.
+  int block_size() const { return header_block_->header()->block_size; }
+
+  // Num blocks starts at 1 since the first block is the header.
+  bool empty() const { return num_blocks_ <= 1; }
+
+  // The percentage of the maximum index size that is free. Allocated blocks are
+  // treated as fully used, even if they are only partially used. In this way,
+  // min_free_fraction is a lower bound of available space.
+  double min_free_fraction() const {
+    return 1.0 - static_cast<double>(num_blocks_) / kMaxBlockIndex;
+  }
+
+  const PostingListSerializer* serializer() const { return serializer_; }
+  PostingListSerializer* serializer() { return serializer_; }
+
+  // TODO(b/222349894) Convert the string output to a protocol buffer instead.
+  void GetDebugInfo(DebugInfoVerbosity::Code verbosity, std::string* out) const;
+
+ private:
+  explicit FlashIndexStorage(const Filesystem* filesystem,
+                             std::string&& index_filename,
+                             PostingListSerializer* serializer,
+                             bool has_in_memory_freelists)
+      : filesystem_(filesystem),
+        index_filename_(std::move(index_filename)),
+        serializer_(serializer),
+        num_blocks_(0),
+        has_in_memory_freelists_(has_in_memory_freelists) {}
+
+  // Init the index from persistence. Create if file does not exist. We do not
+  // erase corrupt files.
+  //
+  // Returns false if unable to create a new header or if the existing one is
+  // corrupt.
+  bool Init();
+
+  // Create or open the header block. Returns true on success.
+  bool InitHeader();
+
+  // Create a new header block for an empty index file.
+  bool CreateHeader();
+
+  // Loads the header stored at the beginning of the index file and validates
+  // the values stored in it.
+  bool OpenHeader(int64_t file_size);
+
+  // Adds the IndexBlock referred to by block_index in the on-disk free list
+  // with index block_info_index.
+  void AddToOnDiskFreeList(uint32_t block_index, int block_info_index,
+                           IndexBlock* index_block);
+
+  // Removes the IndexBlock referred to by block_index from the Header free list
+  // with index block_info_index.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - Any IndexBlock errors
+  libtextclassifier3::Status RemoveFromOnDiskFreeList(uint32_t block_index,
+                                                      int block_info_index,
+                                                      IndexBlock* index_block);
+
+  // RETURNS:
+  //   - On success, a valid PostingListHolder created from the first entry of
+  //     the in-memory freelist at block_info_index
+  //   - OUT_OF_RANGE_ERROR if in_memory_freelists_ contains
+  //     PostingListIdentifier with block_index >= num_blocks_
+  //   - NOT_FOUND_ERROR if there was no entry in the freelist
+  //   - Any IndexBlock errors
+  libtextclassifier3::StatusOr<PostingListHolder>
+  GetPostingListFromInMemoryFreeList(int block_info_index);
+
+  // RETURNS:
+  //   - On success, a valid PostingListHolder created from the first entry of
+  //     the on-disk freelist at block_info_index
+  //   - OUT_OF_RANGE_ERROR if header()->index_block_infos[block_info_index]
+  //     contains block_index >= num_blocks_
+  //   - NOT_FOUND_ERROR if there was no entry in the freelist
+  //   - Any IndexBlock errors
+  libtextclassifier3::StatusOr<PostingListHolder>
+  GetPostingListFromOnDiskFreeList(int block_info_index);
+
+  // Returns:
+  //   - On success, a valid PostingListHolder created from a newly allocated
+  //     IndexBlock.
+  //   - RESOURCE_EXHAUSTED if the index couldn't be grown to fit a new
+  //     IndexBlock.
+  //   - Any IndexBlock errors
+  libtextclassifier3::StatusOr<PostingListHolder> AllocateNewPostingList(
+      int block_info_index);
+
+  // Returns:
+  //   - On success, a newly created IndexBlock at block_index with posting
+  //     lists of size posting_list_size
+  //   - OUT_OF_RANGE_ERROR if block_index >= num_blocks_
+  //   - Any IndexBlock errors
+  libtextclassifier3::StatusOr<IndexBlock> CreateIndexBlock(
+      uint32_t block_index, uint32_t posting_list_size) const;
+
+  // Returns:
+  //   - On success, the IndexBlock that exists at block_index
+  //   - OUT_OF_RANGE_ERROR if block_index >= num_blocks_
+  //   - Any IndexBlock errors
+  libtextclassifier3::StatusOr<IndexBlock> GetIndexBlock(
+      uint32_t block_index) const;
+
+  // Add a new block to the end of the file and return its block
+  // index. Returns kInvalidBlockIndex if unable to grow the index file.
+  int GrowIndex();
+
+  // Return the index into index_block_infos of the smallest posting_list free
+  // list that can fit posting_list_bytes or -1 if posting_list_bytes exceeds
+  // the max-sized posting list.
+  int FindBestIndexBlockInfo(uint32_t posting_list_bytes) const;
+
+  // Flushes the in-memory free list to disk.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - Any IndexBlock errors
+  libtextclassifier3::Status FlushInMemoryFreeList();
+
+  const Filesystem* filesystem_;  // not owned; can't be null
+  std::string index_filename_;
+
+  PostingListSerializer* serializer_;  // not owned; can't be null
+
+  // We open the index file into this fd.
+  ScopedFd storage_sfd_;
+
+  int num_blocks_;  // can be inferred from index file size
+
+  std::unique_ptr<HeaderBlock> header_block_;
+
+  // In-memory cache of free posting lists.
+  struct FreeList {
+    // Experimentally determined that high watermark for largest
+    // freelist was ~3500.
+    static constexpr size_t kMaxSize = 4096;
+
+    // Push a new PostingListIdentifier if there is space.
+    void Push(PostingListIdentifier id);
+
+    // Attempt to pop a PostingListIdentifier.
+    //
+    // RETURNS:
+    //  - identifier of a free posting list, on success
+    //  - NOT_FOUND if there are no free posting lists on this free list.
+    libtextclassifier3::StatusOr<PostingListIdentifier> TryPop();
+
+    std::string DebugString() const;
+
+   private:
+    std::vector<PostingListIdentifier> free_list_;
+    int free_list_size_high_watermark_ = 0;
+    int num_dropped_free_list_entries_ = 0;
+  };
+  std::vector<FreeList> in_memory_freelists_;
+
+  bool has_in_memory_freelists_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_POSTING_LIST_FLASH_INDEX_STORAGE_H_
diff --git a/icing/file/posting_list/flash-index-storage_test.cc b/icing/file/posting_list/flash-index-storage_test.cc
new file mode 100644
index 0000000..ef60037
--- /dev/null
+++ b/icing/file/posting_list/flash-index-storage_test.cc
@@ -0,0 +1,610 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/posting_list/flash-index-storage.h"
+
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstdlib>
+#include <limits>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage-header.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Ne;
+using ::testing::Not;
+
+class FlashIndexStorageTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/test_dir";
+    file_name_ = test_dir_ + "/test_file.idx.index";
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
+
+    // TODO(b/249829533): test different serializers
+    serializer_ = std::make_unique<PostingListHitSerializer>();
+  }
+
+  void TearDown() override {
+    serializer_.reset();
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+  }
+
+ protected:
+  std::string test_dir_;
+  std::string file_name_;
+  Filesystem filesystem_;
+  std::unique_ptr<PostingListHitSerializer> serializer_;
+};
+
+TEST_F(FlashIndexStorageTest, ReadHeaderMagic) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FlashIndexStorage flash_index_storage,
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+  }
+  EXPECT_THAT(FlashIndexStorage::ReadHeaderMagic(&filesystem_, file_name_),
+              IsOkAndHolds(HeaderBlock::Header::kMagic));
+}
+
+TEST_F(FlashIndexStorageTest, ReadHeaderMagicOldVersion) {
+  int block_size;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FlashIndexStorage flash_index_storage,
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+    block_size = flash_index_storage.block_size();
+  }
+
+  int old_magic = 0x6dfba6ae;
+  ASSERT_THAT(old_magic, Ne(HeaderBlock::Header::kMagic));
+  {
+    // Manually modify the header magic.
+    ScopedFd sfd(filesystem_.OpenForWrite(file_name_.c_str()));
+    ASSERT_THAT(sfd.is_valid(), IsTrue());
+
+    // Read and validate header.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        HeaderBlock header_block,
+        HeaderBlock::Read(&filesystem_, sfd.get(), block_size));
+    header_block.header()->magic = old_magic;
+    ASSERT_THAT(header_block.Write(sfd.get()), IsTrue());
+  }
+
+  EXPECT_THAT(FlashIndexStorage::ReadHeaderMagic(&filesystem_, file_name_),
+              IsOkAndHolds(old_magic));
+}
+
+TEST_F(FlashIndexStorageTest,
+       ReadHeaderMagicNonExistingFileShouldGetNotFoundError) {
+  EXPECT_THAT(FlashIndexStorage::ReadHeaderMagic(&filesystem_, file_name_),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(FlashIndexStorageTest, CorruptHeader) {
+  {
+    // Create the header file
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FlashIndexStorage flash_index_storage,
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+  }
+  {
+    // Read the valid header - should pass
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FlashIndexStorage flash_index_storage,
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+  }
+  {
+    // Corrupt the header file by changing pl_bytes
+    ScopedFd sfd(filesystem_.OpenForWrite(file_name_.c_str()));
+    off_t offset = 16;
+    uint32_t pl_bytes = sizeof(Hit) - 1;  // This is intentionally invalid
+    filesystem_.PWrite(sfd.get(), offset, &pl_bytes, sizeof(uint32_t));
+  }
+  {
+    // Read the header file - should fail because pl_bytes is not divisible
+    // by sizeof(Hit), which is 5 as of writing
+    ASSERT_THAT(
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()),
+        StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+  {
+    // Correct the pl_bytes header alignment
+    ScopedFd sfd(filesystem_.OpenForWrite(file_name_.c_str()));
+    off_t offset = 16;
+    uint32_t pl_bytes = 2 * sizeof(Hit);  // Should be valid
+    filesystem_.PWrite(sfd.get(), offset, &pl_bytes, sizeof(uint32_t));
+  }
+  {
+    // Read the valid header - should pass
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FlashIndexStorage flash_index_storage,
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+  }
+
+  // Delete the file
+  filesystem_.DeleteFile(file_name_.c_str());
+}
+
+TEST_F(FlashIndexStorageTest, EmptyStorage) {
+  {
+    // Create the header file
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FlashIndexStorage flash_index_storage,
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+    // An 'empty' FlashIndexStorage should have:
+    //   1. One block allocated for the header
+    EXPECT_THAT(flash_index_storage.num_blocks(), Eq(1));
+    EXPECT_THAT(flash_index_storage.empty(), IsTrue());
+    //   2. The invalid DocumentId stored in its header
+    EXPECT_THAT(flash_index_storage.get_last_indexed_docid(),
+                Eq(kInvalidDocumentId));
+    //   3. It's disk usage should be the equivalent of one block.
+    EXPECT_THAT(flash_index_storage.GetDiskUsage(),
+                Eq(flash_index_storage.block_size()));
+  }
+  {
+    // Read the valid header. All functions should return the same values.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FlashIndexStorage flash_index_storage,
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+    EXPECT_THAT(flash_index_storage.num_blocks(), Eq(1));
+    EXPECT_THAT(flash_index_storage.empty(), IsTrue());
+    EXPECT_THAT(flash_index_storage.get_last_indexed_docid(),
+                Eq(kInvalidDocumentId));
+    EXPECT_THAT(flash_index_storage.GetDiskUsage(),
+                Eq(flash_index_storage.block_size()));
+  }
+}
+
+TEST_F(FlashIndexStorageTest, FreeListInMemory) {
+  // Create the header file
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+  {
+    // 1. Request a PL that is 1/2 block size. Remember that block size also
+    // includes the BlockHeader. The BlockHeader isn't publicly visible, so we
+    // subtract 100 bytes to be sure. AllocatePostingList will round up from
+    // kHalfBlockPostingListSize to whatever the correct size is.
+    const int kHalfBlockPostingListSize =
+        (flash_index_storage.block_size() - 100) / 2;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PostingListHolder posting_list_holder1,
+        flash_index_storage.AllocatePostingList(kHalfBlockPostingListSize));
+    // We expect:
+    //   1. FlashIndexStorage will return a valid id.
+    PostingListIdentifier id1 = posting_list_holder1.id;
+    EXPECT_THAT(id1.is_valid(), IsTrue());
+    //   2. The index file should have grown by exactly one flash block.
+    EXPECT_THAT(flash_index_storage.num_blocks(), Eq(2));
+    EXPECT_THAT(flash_index_storage.empty(), IsFalse());
+
+    std::vector<Hit> hits1 = {
+        Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
+        Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
+        Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
+        Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
+    for (const Hit& hit : hits1) {
+      ICING_ASSERT_OK(
+          serializer_->PrependHit(&posting_list_holder1.posting_list, hit));
+    }
+    EXPECT_THAT(serializer_->GetHits(&posting_list_holder1.posting_list),
+                IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
+
+    // 2. Get another PL. This should be on the same flash block. There should
+    // be no allocation.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PostingListHolder posting_list_holder2,
+        flash_index_storage.AllocatePostingList(kHalfBlockPostingListSize));
+    // We expect:
+    //   1. FlashIndexStorage will return a valid id.
+    EXPECT_THAT(posting_list_holder2.id.is_valid(), IsTrue());
+    //   2. The index file should not have grown.
+    EXPECT_THAT(flash_index_storage.num_blocks(), Eq(2));
+    EXPECT_THAT(flash_index_storage.empty(), IsFalse());
+
+    std::vector<Hit> hits2 = {
+        Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
+        Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
+        Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
+        Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
+    for (const Hit& hit : hits2) {
+      ICING_ASSERT_OK(
+          serializer_->PrependHit(&posting_list_holder2.posting_list, hit));
+    }
+    EXPECT_THAT(serializer_->GetHits(&posting_list_holder2.posting_list),
+                IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
+
+    // 3. Now, free the first posting list. This should add it to the free list
+    ICING_ASSERT_OK(
+        flash_index_storage.FreePostingList(std::move(posting_list_holder1)));
+
+    // 4. Request another posting list. This should NOT grow the index because
+    // the first posting list is free.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PostingListHolder posting_list_holder3,
+        flash_index_storage.AllocatePostingList(kHalfBlockPostingListSize));
+    // We expect:
+    //   1. FlashIndexStorage will return a valid id.
+    EXPECT_THAT(posting_list_holder3.id.is_valid(), IsTrue());
+    //   2. The index file should not have grown.
+    EXPECT_THAT(flash_index_storage.num_blocks(), Eq(2));
+    EXPECT_THAT(flash_index_storage.empty(), IsFalse());
+    //   3. The returned posting list holder should have the same id as the
+    //   first posting list holder.
+    EXPECT_THAT(posting_list_holder3.id.posting_list_index(),
+                Eq(id1.posting_list_index()));
+    EXPECT_THAT(posting_list_holder3.id.block_index(), Eq(id1.block_index()));
+    // Make sure this pl is empty. The hits that used to be there should be
+    // gone.
+    EXPECT_THAT(serializer_->GetHits(&posting_list_holder3.posting_list),
+                IsOkAndHolds(IsEmpty()));
+    std::vector<Hit> hits3 = {
+        Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62),
+        Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45),
+        Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12),
+        Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)};
+    for (const Hit& hit : hits3) {
+      ICING_ASSERT_OK(
+          serializer_->PrependHit(&posting_list_holder3.posting_list, hit));
+    }
+    EXPECT_THAT(serializer_->GetHits(&posting_list_holder3.posting_list),
+                IsOkAndHolds(ElementsAreArray(hits3.rbegin(), hits3.rend())));
+  }
+  EXPECT_THAT(flash_index_storage.GetDiskUsage(),
+              Eq(2 * flash_index_storage.block_size()));
+}
+
+TEST_F(FlashIndexStorageTest, FreeListNotInMemory) {
+  // Create the header file
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get(),
+                                /*in_memory=*/false));
+
+  {
+    // 1. Request a PL that is 1/2 block size. Remember that block size also
+    // includes the BlockHeader. The BlockHeader isn't publicly visible, so we
+    // subtract 100 bytes to be sure. AllocatePostingList will round up from
+    // kHalfBlockPostingListSize to whatever the correct size is.
+    const int kHalfBlockPostingListSize =
+        (flash_index_storage.block_size() - 100) / 2;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PostingListHolder posting_list_holder1,
+        flash_index_storage.AllocatePostingList(kHalfBlockPostingListSize));
+    // We expect:
+    //   1. FlashIndexStorage will return a valid id.
+    PostingListIdentifier id1 = posting_list_holder1.id;
+    EXPECT_THAT(id1.is_valid(), IsTrue());
+    //   2. The index file should have grown by exactly one flash block.
+    EXPECT_THAT(flash_index_storage.num_blocks(), Eq(2));
+    EXPECT_THAT(flash_index_storage.empty(), IsFalse());
+
+    std::vector<Hit> hits1 = {
+        Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
+        Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
+        Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
+        Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
+    for (const Hit& hit : hits1) {
+      ICING_ASSERT_OK(
+          serializer_->PrependHit(&posting_list_holder1.posting_list, hit));
+    }
+    EXPECT_THAT(serializer_->GetHits(&posting_list_holder1.posting_list),
+                IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
+
+    // 2. Get another PL. This should be on the same flash block. There should
+    // be no allocation.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PostingListHolder posting_list_holder2,
+        flash_index_storage.AllocatePostingList(kHalfBlockPostingListSize));
+    // We expect:
+    //   1. FlashIndexStorage will return a valid id.
+    EXPECT_THAT(posting_list_holder2.id.is_valid(), IsTrue());
+    //   2. The index file should not have grown.
+    EXPECT_THAT(flash_index_storage.num_blocks(), Eq(2));
+    EXPECT_THAT(flash_index_storage.empty(), IsFalse());
+
+    std::vector<Hit> hits2 = {
+        Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
+        Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
+        Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
+        Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
+    for (const Hit& hit : hits2) {
+      ICING_ASSERT_OK(
+          serializer_->PrependHit(&posting_list_holder2.posting_list, hit));
+    }
+    EXPECT_THAT(serializer_->GetHits(&posting_list_holder2.posting_list),
+                IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
+
+    // 3. Now, free the first posting list. This should add it to the free list
+    ICING_ASSERT_OK(
+        flash_index_storage.FreePostingList(std::move(posting_list_holder1)));
+
+    // 4. Request another posting list. This should NOT grow the index because
+    // the first posting list is free.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PostingListHolder posting_list_holder3,
+        flash_index_storage.AllocatePostingList(kHalfBlockPostingListSize));
+    // We expect:
+    //   1. FlashIndexStorage will return a valid id.
+    EXPECT_THAT(posting_list_holder3.id.is_valid(), IsTrue());
+    //   2. The index file should not have grown.
+    EXPECT_THAT(flash_index_storage.num_blocks(), Eq(2));
+    EXPECT_THAT(flash_index_storage.empty(), IsFalse());
+    //   3. The returned posting list holder should have the same id as the
+    //   first posting list holder.
+    EXPECT_THAT(posting_list_holder3.id.posting_list_index(),
+                Eq(id1.posting_list_index()));
+    EXPECT_THAT(posting_list_holder3.id.block_index(), Eq(id1.block_index()));
+    // Make sure this pl is empty. The hits that used to be there should be
+    // gone.
+    EXPECT_THAT(serializer_->GetHits(&posting_list_holder3.posting_list),
+                IsOkAndHolds(IsEmpty()));
+    std::vector<Hit> hits3 = {
+        Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62),
+        Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45),
+        Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12),
+        Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)};
+    for (const Hit& hit : hits3) {
+      ICING_ASSERT_OK(
+          serializer_->PrependHit(&posting_list_holder3.posting_list, hit));
+    }
+    EXPECT_THAT(serializer_->GetHits(&posting_list_holder3.posting_list),
+                IsOkAndHolds(ElementsAreArray(hits3.rbegin(), hits3.rend())));
+  }
+  EXPECT_THAT(flash_index_storage.GetDiskUsage(),
+              Eq(2 * flash_index_storage.block_size()));
+}
+
+TEST_F(FlashIndexStorageTest, FreeListInMemoryPersistence) {
+  PostingListIdentifier id1 = PostingListIdentifier::kInvalid;
+  int half_block_posting_list_size = 0;
+  {
+    // Create the header file
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FlashIndexStorage flash_index_storage,
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+
+    {
+      // 1. Request a PL that is 1/2 block size. Remember that block size also
+      // includes the BlockHeader. The BlockHeader isn't publicly visible, so we
+      // subtract 100 bytes to be sure. AllocatePostingList will round up from
+      // kHalfBlockPostingListSize to whatever the correct size is.
+      half_block_posting_list_size =
+          (flash_index_storage.block_size() - 100) / 2;
+      ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder posting_list_holder1,
+                                 flash_index_storage.AllocatePostingList(
+                                     half_block_posting_list_size));
+      // We expect:
+      //   1. FlashIndexStorage will return a valid id.
+      id1 = posting_list_holder1.id;
+      EXPECT_THAT(id1.is_valid(), IsTrue());
+      //   2. The index file should have grown by exactly one flash block.
+      EXPECT_THAT(flash_index_storage.num_blocks(), Eq(2));
+      EXPECT_THAT(flash_index_storage.empty(), IsFalse());
+
+      std::vector<Hit> hits1 = {
+          Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
+          Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
+          Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
+          Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
+      for (const Hit& hit : hits1) {
+        ICING_ASSERT_OK(
+            serializer_->PrependHit(&posting_list_holder1.posting_list, hit));
+      }
+      EXPECT_THAT(serializer_->GetHits(&posting_list_holder1.posting_list),
+                  IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
+
+      // 2. Get another PL. This should be on the same flash block. There should
+      // be no allocation.
+      ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder posting_list_holder2,
+                                 flash_index_storage.AllocatePostingList(
+                                     half_block_posting_list_size));
+      // We expect:
+      //   1. FlashIndexStorage will return a valid id.
+      EXPECT_THAT(posting_list_holder2.id.is_valid(), IsTrue());
+      //   2. The index file should not have grown.
+      EXPECT_THAT(flash_index_storage.num_blocks(), Eq(2));
+      EXPECT_THAT(flash_index_storage.empty(), IsFalse());
+
+      std::vector<Hit> hits2 = {
+          Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
+          Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
+          Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
+          Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
+      for (const Hit& hit : hits2) {
+        ICING_ASSERT_OK(
+            serializer_->PrependHit(&posting_list_holder2.posting_list, hit));
+      }
+      EXPECT_THAT(serializer_->GetHits(&posting_list_holder2.posting_list),
+                  IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
+
+      // 3. Now, free the first posting list. This should add it to the free
+      // list
+      ICING_ASSERT_OK(
+          flash_index_storage.FreePostingList(std::move(posting_list_holder1)));
+    }
+
+    EXPECT_THAT(flash_index_storage.GetDiskUsage(),
+                Eq(2 * flash_index_storage.block_size()));
+    // 4. The FlashIndexStorage should go out of scope and flush the in-memory
+    // posting list to disk
+  }
+
+  {
+    // Recreate the flash index.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FlashIndexStorage flash_index_storage,
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+
+    {
+      // 5. Request another posting list. This should NOT grow the index because
+      // the first posting list is free.
+      ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder posting_list_holder3,
+                                 flash_index_storage.AllocatePostingList(
+                                     half_block_posting_list_size));
+      // We expect:
+      //   1. FlashIndexStorage will return a valid id.
+      EXPECT_THAT(posting_list_holder3.id.is_valid(), IsTrue());
+      //   2. The index file should not have grown.
+      EXPECT_THAT(flash_index_storage.num_blocks(), Eq(2));
+      EXPECT_THAT(flash_index_storage.empty(), IsFalse());
+      //   3. The returned posting list holder should have the same id as the
+      //   first posting list holder.
+      EXPECT_THAT(posting_list_holder3.id.posting_list_index(),
+                  Eq(id1.posting_list_index()));
+      EXPECT_THAT(posting_list_holder3.id.block_index(), Eq(id1.block_index()));
+      // Make sure this pl is empty. The hits that used to be there should be
+      // gone.
+      EXPECT_THAT(serializer_->GetHits(&posting_list_holder3.posting_list),
+                  IsOkAndHolds(IsEmpty()));
+      std::vector<Hit> hits3 = {
+          Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62),
+          Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45),
+          Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12),
+          Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)};
+      for (const Hit& hit : hits3) {
+        ICING_ASSERT_OK(
+            serializer_->PrependHit(&posting_list_holder3.posting_list, hit));
+      }
+      EXPECT_THAT(serializer_->GetHits(&posting_list_holder3.posting_list),
+                  IsOkAndHolds(ElementsAreArray(hits3.rbegin(), hits3.rend())));
+    }
+    EXPECT_THAT(flash_index_storage.GetDiskUsage(),
+                Eq(2 * flash_index_storage.block_size()));
+  }
+}
+
+TEST_F(FlashIndexStorageTest, DifferentSizedPostingLists) {
+  // Create the header file
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+  {
+    // 1. Request a PL that is 1/2 block size. Remember that block size also
+    // includes the BlockHeader. The BlockHeader isn't publicly visible, so we
+    // subtract 100 bytes to be sure. AllocatePostingList will round up from
+    // kHalfBlockPostingListSize to whatever the correct size is.
+    const int kHalfBlockPostingListSize =
+        (flash_index_storage.block_size() - 100) / 2;
+    const int kQuarterBlockPostingListSize =
+        (flash_index_storage.block_size() - 100) / 4;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PostingListHolder posting_list_holder1,
+        flash_index_storage.AllocatePostingList(kHalfBlockPostingListSize));
+    // We expect:
+    //   1. FlashIndexStorage will return a valid id.
+    PostingListIdentifier id1 = posting_list_holder1.id;
+    EXPECT_THAT(id1.is_valid(), IsTrue());
+    //   2. The index file should have grown by exactly one flash block.
+    EXPECT_THAT(flash_index_storage.num_blocks(), Eq(2));
+    EXPECT_THAT(flash_index_storage.empty(), IsFalse());
+
+    std::vector<Hit> hits1 = {
+        Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
+        Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
+        Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
+        Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
+    for (const Hit& hit : hits1) {
+      ICING_ASSERT_OK(
+          serializer_->PrependHit(&posting_list_holder1.posting_list, hit));
+    }
+    EXPECT_THAT(serializer_->GetHits(&posting_list_holder1.posting_list),
+                IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
+
+    // 2. Get a PL that is 1/4 block size. Even though a 1/4 block PL could
+    // theoretically fit in the same block, we'll allocate a new one because PLs
+    // on a block are required to be the same size.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PostingListHolder posting_list_holder2,
+        flash_index_storage.AllocatePostingList(kQuarterBlockPostingListSize));
+    // We expect:
+    //   1. FlashIndexStorage will return a valid id.
+    EXPECT_THAT(posting_list_holder2.id.is_valid(), IsTrue());
+    //   2. The index file should have grown by one block.
+    EXPECT_THAT(posting_list_holder2.id.block_index(),
+                Not(Eq(id1.block_index())));
+    EXPECT_THAT(flash_index_storage.num_blocks(), Eq(3));
+    EXPECT_THAT(flash_index_storage.empty(), IsFalse());
+
+    std::vector<Hit> hits2 = {
+        Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
+        Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
+        Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
+        Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
+    for (const Hit& hit : hits2) {
+      ICING_ASSERT_OK(
+          serializer_->PrependHit(&posting_list_holder2.posting_list, hit));
+    }
+    EXPECT_THAT(serializer_->GetHits(&posting_list_holder2.posting_list),
+                IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
+
+    // 3. Request another 1/4 block-size posting list. This should NOT grow the
+    // index because there should be three free posting lists on block2.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PostingListHolder posting_list_holder3,
+        flash_index_storage.AllocatePostingList(kQuarterBlockPostingListSize));
+    // We expect:
+    //   1. FlashIndexStorage will return a valid id.
+    EXPECT_THAT(posting_list_holder3.id.is_valid(), IsTrue());
+    //   2. The index file should have remained the same size as before and the
+    //      third posting list holder should use the same block as the second
+    //      posting list holder.
+    EXPECT_THAT(posting_list_holder3.id.block_index(),
+                Eq(posting_list_holder2.id.block_index()));
+    EXPECT_THAT(flash_index_storage.num_blocks(), Eq(3));
+    EXPECT_THAT(flash_index_storage.empty(), IsFalse());
+  }
+  EXPECT_THAT(flash_index_storage.GetDiskUsage(),
+              Eq(3 * flash_index_storage.block_size()));
+}
+
+TEST_F(FlashIndexStorageTest, AllocateTooLargePostingList) {
+  // Create the header file
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+
+  // Request a PL that is 2x block size.
+  const int kDoubleBlockSize = flash_index_storage.block_size() * 2;
+  EXPECT_THAT(flash_index_storage.AllocatePostingList(kDoubleBlockSize),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/posting_list/index-block.cc b/icing/file/posting_list/index-block.cc
new file mode 100644
index 0000000..3fa397c
--- /dev/null
+++ b/icing/file/posting_list/index-block.cc
@@ -0,0 +1,333 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/posting_list/index-block.h"
+
+#include <sys/types.h>
+
+#include <cstdint>
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-free.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/file/posting_list/posting-list-utils.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+libtextclassifier3::Status ValidatePostingListBytes(
+    PostingListSerializer* serializer, uint32_t posting_list_bytes,
+    uint32_t block_size) {
+  if (posting_list_bytes > IndexBlock::CalculateMaxPostingListBytes(
+                               block_size, serializer->GetDataTypeBytes()) ||
+      !posting_list_utils::IsValidPostingListSize(
+          posting_list_bytes, serializer->GetDataTypeBytes(),
+          serializer->GetMinPostingListSize())) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Requested posting list size %d is illegal for a flash block with max "
+        "posting list size of %d",
+        posting_list_bytes,
+        IndexBlock::CalculateMaxPostingListBytes(
+            block_size, serializer->GetDataTypeBytes())));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace
+
+/* static */ libtextclassifier3::StatusOr<IndexBlock>
+IndexBlock::CreateFromPreexistingIndexBlockRegion(
+    const Filesystem* filesystem, PostingListSerializer* serializer, int fd,
+    off_t block_file_offset, uint32_t block_size) {
+  if (block_size < sizeof(BlockHeader)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Provided block_size %d is too small to fit even the BlockHeader!",
+        block_size));
+  }
+
+  BlockHeader header;
+  if (!filesystem->PRead(fd, &header, sizeof(BlockHeader), block_file_offset)) {
+    return absl_ports::InternalError("PRead block header error");
+  }
+
+  ICING_RETURN_IF_ERROR(ValidatePostingListBytes(
+      serializer, header.posting_list_bytes, block_size));
+
+  return IndexBlock(filesystem, serializer, fd, block_file_offset, block_size,
+                    header.posting_list_bytes);
+}
+
+/* static */ libtextclassifier3::StatusOr<IndexBlock>
+IndexBlock::CreateFromUninitializedRegion(const Filesystem* filesystem,
+                                          PostingListSerializer* serializer,
+                                          int fd, off_t block_file_offset,
+                                          uint32_t block_size,
+                                          uint32_t posting_list_bytes) {
+  if (block_size < sizeof(BlockHeader)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Provided block_size %d is too small to fit even the BlockHeader!",
+        block_size));
+  }
+
+  ICING_RETURN_IF_ERROR(
+      ValidatePostingListBytes(serializer, posting_list_bytes, block_size));
+  IndexBlock block(filesystem, serializer, fd, block_file_offset, block_size,
+                   posting_list_bytes);
+  ICING_RETURN_IF_ERROR(block.Reset());
+
+  return block;
+}
+
+libtextclassifier3::StatusOr<IndexBlock::PostingListAndBlockInfo>
+IndexBlock::GetAllocatedPostingList(PostingListIndex posting_list_index) {
+  if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Cannot get posting list with index %d in IndexBlock with only %d "
+        "posting lists.",
+        posting_list_index, max_num_posting_lists()));
+  }
+
+  // Read out the header from disk.
+  ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+
+  // Read out the allocated posting list from disk.
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
+                         ReadPostingList(posting_list_index));
+
+  ICING_ASSIGN_OR_RETURN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
+          serializer_, std::move(posting_list_buffer), posting_list_bytes_));
+  return PostingListAndBlockInfo(
+      std::move(pl_used), posting_list_index, header.next_block_index,
+      /*has_free_posting_lists_in=*/header.free_list_posting_list_index !=
+          kInvalidPostingListIndex);
+}
+
+libtextclassifier3::StatusOr<IndexBlock::PostingListAndBlockInfo>
+IndexBlock::AllocatePostingList() {
+  // Read out the header from disk.
+  ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+
+  if (header.free_list_posting_list_index == kInvalidPostingListIndex) {
+    return absl_ports::ResourceExhaustedError(
+        "No available posting lists to allocate.");
+  }
+
+  // Pull one off the free list.
+  PostingListIndex posting_list_index = header.free_list_posting_list_index;
+
+  // Read out the posting list from disk.
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
+                         ReadPostingList(posting_list_index));
+  // Step 1: get the next (chained) free posting list index and set it to block
+  //         header.
+  ICING_ASSIGN_OR_RETURN(
+      PostingListFree pl_free,
+      PostingListFree::CreateFromPreexistingPostingListFreeRegion(
+          posting_list_buffer.get(), posting_list_bytes_,
+          serializer_->GetDataTypeBytes(),
+          serializer_->GetMinPostingListSize()));
+  header.free_list_posting_list_index = pl_free.get_next_posting_list_index();
+  if (header.free_list_posting_list_index != kInvalidPostingListIndex &&
+      header.free_list_posting_list_index >= max_num_posting_lists()) {
+    ICING_LOG(ERROR)
+        << "Free Posting List points to an invalid posting list index!";
+    header.free_list_posting_list_index = kInvalidPostingListIndex;
+  }
+
+  // Step 2: create PostingListUsed instance. The original content in the above
+  //         posting_list_buffer is not important now because
+  //         PostingListUsed::CreateFromUnitializedRegion will wipe it out, and
+  //         we only need to sync it to disk after initializing.
+  ICING_ASSIGN_OR_RETURN(PostingListUsed pl_used,
+                         PostingListUsed::CreateFromUnitializedRegion(
+                             serializer_, posting_list_bytes_));
+
+  // Sync the initialized posting list (overwrite the original content of
+  // PostingListFree) and header to disk.
+  ICING_RETURN_IF_ERROR(
+      WritePostingList(posting_list_index, pl_used.posting_list_buffer()));
+  ICING_RETURN_IF_ERROR(WriteHeader(header));
+
+  return PostingListAndBlockInfo(
+      std::move(pl_used), posting_list_index, header.next_block_index,
+      /*has_free_posting_lists_in=*/header.free_list_posting_list_index !=
+          kInvalidPostingListIndex);
+}
+
+libtextclassifier3::Status IndexBlock::FreePostingList(
+    PostingListIndex posting_list_index) {
+  if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Cannot free posting list with index %d in IndexBlock with only %d "
+        "posting lists.",
+        posting_list_index, max_num_posting_lists()));
+  }
+
+  ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+  ICING_RETURN_IF_ERROR(FreePostingListImpl(header, posting_list_index));
+  ICING_RETURN_IF_ERROR(WriteHeader(header));
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IndexBlock::WritePostingListToDisk(
+    const PostingListUsed& posting_list_used,
+    PostingListIndex posting_list_index) {
+  if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Cannot write posting list with index %d in IndexBlock with only %d "
+        "posting lists.",
+        posting_list_index, max_num_posting_lists()));
+  }
+
+  if (posting_list_used.size_in_bytes() != posting_list_bytes_) {
+    return absl_ports::InvalidArgumentError(
+        "Cannot write posting list into a block with different posting list "
+        "bytes");
+  }
+
+  if (!posting_list_used.is_dirty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  // Write the allocated posting list to disk.
+  return WritePostingList(posting_list_index,
+                          posting_list_used.posting_list_buffer());
+}
+
+libtextclassifier3::StatusOr<uint32_t> IndexBlock::GetNextBlockIndex() const {
+  ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+  return header.next_block_index;
+}
+
+libtextclassifier3::Status IndexBlock::SetNextBlockIndex(
+    uint32_t next_block_index) {
+  ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+  header.next_block_index = next_block_index;
+  ICING_RETURN_IF_ERROR(WriteHeader(header));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<bool> IndexBlock::HasFreePostingLists() const {
+  ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
+  return header.free_list_posting_list_index != kInvalidPostingListIndex;
+}
+
+libtextclassifier3::Status IndexBlock::Reset() {
+  BlockHeader header;
+  header.free_list_posting_list_index = kInvalidPostingListIndex;
+  header.next_block_index = kInvalidBlockIndex;
+  header.posting_list_bytes = posting_list_bytes_;
+
+  // Starting with the last posting list, prepend each posting list to the free
+  // list. At the end, the beginning of the free list should be the first
+  // posting list.
+  for (PostingListIndex posting_list_index = max_num_posting_lists() - 1;
+       posting_list_index >= 0; --posting_list_index) {
+    // Adding the posting list at posting_list_index to the free list will
+    // modify both the posting list and also
+    // header.free_list_posting_list_index.
+    ICING_RETURN_IF_ERROR(FreePostingListImpl(header, posting_list_index));
+  }
+
+  // Sync the header to disk.
+  ICING_RETURN_IF_ERROR(WriteHeader(header));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IndexBlock::FreePostingListImpl(
+    BlockHeader& header, PostingListIndex posting_list_index) {
+  // Read out the posting list from disk.
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
+                         ReadPostingList(posting_list_index));
+
+  ICING_ASSIGN_OR_RETURN(PostingListFree plfree,
+                         PostingListFree::CreateFromUnitializedRegion(
+                             posting_list_buffer.get(), posting_list_bytes(),
+                             serializer_->GetDataTypeBytes(),
+                             serializer_->GetMinPostingListSize()));
+
+  // Put at the head of the list.
+  plfree.set_next_posting_list_index(header.free_list_posting_list_index);
+  header.free_list_posting_list_index = posting_list_index;
+
+  // Sync the posting list to disk.
+  ICING_RETURN_IF_ERROR(
+      WritePostingList(posting_list_index, posting_list_buffer.get()));
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<IndexBlock::BlockHeader> IndexBlock::ReadHeader()
+    const {
+  BlockHeader header;
+  if (!filesystem_->PRead(fd_, &header, sizeof(BlockHeader),
+                          block_file_offset_)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("PRead block header error: ", strerror(errno)));
+  }
+  if (header.posting_list_bytes != posting_list_bytes_) {
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Inconsistent posting list bytes between block header (%d) and class "
+        "instance (%d)",
+        header.posting_list_bytes, posting_list_bytes_));
+  }
+  return header;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<uint8_t[]>>
+IndexBlock::ReadPostingList(PostingListIndex posting_list_index) const {
+  auto posting_list_buffer = std::make_unique<uint8_t[]>(posting_list_bytes_);
+  if (!filesystem_->PRead(fd_, posting_list_buffer.get(), posting_list_bytes_,
+                          get_posting_list_file_offset(posting_list_index))) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("PRead posting list error: ", strerror(errno)));
+  }
+  return posting_list_buffer;
+}
+
+libtextclassifier3::Status IndexBlock::WriteHeader(const BlockHeader& header) {
+  if (!filesystem_->PWrite(fd_, block_file_offset_, &header,
+                           sizeof(BlockHeader))) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("PWrite block header error: ", strerror(errno)));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IndexBlock::WritePostingList(
+    PostingListIndex posting_list_index, const uint8_t* posting_list_buffer) {
+  if (!filesystem_->PWrite(fd_,
+                           get_posting_list_file_offset(posting_list_index),
+                           posting_list_buffer, posting_list_bytes_)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("PWrite posting list error: ", strerror(errno)));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/posting_list/index-block.h b/icing/file/posting_list/index-block.h
new file mode 100644
index 0000000..21ad13f
--- /dev/null
+++ b/icing/file/posting_list/index-block.h
@@ -0,0 +1,369 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_POSTING_LIST_INDEX_BLOCK_H_
+#define ICING_FILE_POSTING_LIST_INDEX_BLOCK_H_
+
+#include <sys/types.h>
+
+#include <cstdint>
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/legacy/index/icing-bit-util.h"
+
+namespace icing {
+namespace lib {
+
+// This class is used to manage I/O to a single flash block and to manage the
+// division of that flash block into PostingLists. It provides an interface to
+// allocate, free and read posting lists. Note that IndexBlock is stateless:
+// - Any changes to block header will be synced to disk before the method
+//   returns.
+// - Any posting list allocation/freeing will be synced to disk before the
+//   method returns.
+// - When getting an allocated posting list, it PReads the contents from disk to
+//   a buffer and transfer the ownership to PostingListUsed. Any changes to
+//   PostingListUsed will not be visible to other instances until calling
+//   WritePostingListToDisk.
+//
+// An IndexBlock contains a small header and an array of fixed-size posting list
+// buffers. Initially, all posting lists are chained in a singly-linked free
+// list.
+//
+// When we want to get a new PostingList from an IndexBlock, we just pull one
+// off the free list. When the user wants to return the PostingList to the free
+// pool, we prepend it to the free list.
+//
+// Read-write the same block is NOT thread safe. If we try to read-write the
+// same block at the same time (either by the same or different IndexBlock
+// instances), then it causes race condition and the behavior is undefined.
+class IndexBlock {
+ public:
+  // What is the maximum posting list size in bytes that can be stored in this
+  // block.
+  static uint32_t CalculateMaxPostingListBytes(uint32_t block_size_in_bytes,
+                                               uint32_t data_type_bytes) {
+    return (block_size_in_bytes - sizeof(BlockHeader)) / data_type_bytes *
+           data_type_bytes;
+  }
+
+  // Creates an IndexBlock to reference the previously used region of the file
+  // descriptor starting at block_file_offset with size block_size.
+  //
+  // - serializer: for reading/writing posting list. Also some additional
+  //               information (e.g. data size) should be provided by the
+  //               serializer.
+  // - fd: a valid file descriptor opened for write by the caller.
+  // - block_file_offset: absolute offset of the file (fd).
+  // - block_size: byte size of this block.
+  //
+  // Unlike CreateFromUninitializedRegion, a pre-existing index block has
+  // already determined and written posting list bytes into block header, so it
+  // will be read from block header and the caller doesn't have to provide.
+  //
+  // RETURNS:
+  //   - A valid IndexBlock instance on success
+  //   - INVALID_ARGUMENT_ERROR
+  //     - If block_size is too small for even just the BlockHeader
+  //     - If the posting list size stored in the region is not a valid posting
+  //       list size (e.g. exceeds max_posting_list_bytes(size))
+  //   - INTERNAL_ERROR on I/O error
+  static libtextclassifier3::StatusOr<IndexBlock>
+  CreateFromPreexistingIndexBlockRegion(const Filesystem* filesystem,
+                                        PostingListSerializer* serializer,
+                                        int fd, off_t block_file_offset,
+                                        uint32_t block_size);
+
+  // Creates an IndexBlock to reference an uninitialized region of the file
+  // descriptor starting at block_file_offset with size block_size. The
+  // IndexBlock will initialize the region to be an empty IndexBlock with
+  // posting lists of size posting_list_bytes.
+  //
+  // - serializer: for reading/writing posting list. Also some additional
+  //               information (e.g. data size) should be provided by the
+  //               serializer.
+  // - fd: a valid file descriptor opened for write by the caller.
+  // - block_file_offset: absolute offset of the file (fd).
+  // - block_size: byte size of this block.
+  // - posting_list_bytes: byte size of all posting lists in this block. This
+  //   information will be written into block header.
+  //
+  // RETURNS:
+  //   - A valid IndexBlock instance on success
+  //   - INVALID_ARGUMENT_ERROR
+  //     - If block_size is too small for even just the BlockHeader
+  //     - If the posting list size stored in the region is not a valid posting
+  //       list size (e.g. exceeds max_posting_list_bytes(size))
+  //   - INTERNAL_ERROR on I/O error
+  static libtextclassifier3::StatusOr<IndexBlock> CreateFromUninitializedRegion(
+      const Filesystem* filesystem, PostingListSerializer* serializer, int fd,
+      off_t block_file_offset, uint32_t block_size,
+      uint32_t posting_list_bytes);
+
+  IndexBlock(const IndexBlock&) = delete;
+  IndexBlock& operator=(const IndexBlock&) = delete;
+  IndexBlock(IndexBlock&&) = default;
+  IndexBlock& operator=(IndexBlock&&) = default;
+
+  ~IndexBlock() = default;
+
+  struct PostingListAndBlockInfo {
+    PostingListUsed posting_list_used;
+    PostingListIndex posting_list_index;
+
+    uint32_t next_block_index;
+
+    // Flag indicating if there are any free posting lists available after this
+    // allocation request.
+    bool has_free_posting_lists;
+
+    explicit PostingListAndBlockInfo(PostingListUsed&& posting_list_used_in,
+                                     PostingListIndex posting_list_index_in,
+                                     uint32_t next_block_index_in,
+                                     bool has_free_posting_lists_in)
+        : posting_list_used(std::move(posting_list_used_in)),
+          posting_list_index(posting_list_index_in),
+          next_block_index(next_block_index_in),
+          has_free_posting_lists(has_free_posting_lists_in) {}
+  };
+
+  // PReads existing posting list content at posting_list_index, instantiates a
+  // PostingListUsed, and returns it with some additional index block info.
+  //
+  // RETURNS:
+  //   - A valid PostingListAndBlockInfo on success
+  //   - INVALID_ARGUMENT_ERROR if posting_list_index < 0 or posting_list_index
+  //     >= max_num_posting_lists()
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::StatusOr<PostingListAndBlockInfo> GetAllocatedPostingList(
+      PostingListIndex posting_list_index);
+
+  // Allocates a PostingListUsed in the IndexBlock, initializes the content
+  // (by serializer), and returns the initialized PostingListUsed instance,
+  // PostingListIndex, and some additional index block info.
+  //
+  // RETURNS:
+  //   - A valid PostingListAndBlockInfo instance on success
+  //   - RESOURCE_EXHAUSTED_ERROR if there is already no free posting list
+  //     available, i.e. !HasFreePostingLists()
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::StatusOr<PostingListAndBlockInfo> AllocatePostingList();
+
+  // Frees a posting list at posting_list_index, adds it into the free list
+  // chain and updates block header. Both changes on posting list free and
+  // header will be synced to disk.
+  //
+  // It is considered an error to "double-free" a posting list. You should never
+  // call FreePostingList(index) with the same index twice, unless that index
+  // was returned by an intervening AllocatePostingList() call.
+  //
+  // Ex.
+  //   PostingListIndex index = block.AllocatePostingList();
+  //   DoSomething(block.GetAllocatedPostingList(index));
+  //   block.FreePostingList(index);
+  //   block.FreePostingList(index);  // Argh! What are you doing?!
+  //   ...
+  //   PostingListIndex index = block.AllocatePostingList();
+  //   DoSomething(block.GetAllocatedPostingList(index));
+  //   block.FreePostingList(index);
+  //   index = block.AllocatePostingList();
+  //   DoSomethingElse(block.GetAllocatedPostingList(index));
+  //   // A-Ok! We called AllocatePostingList() since the last FreePostingList()
+  //   // call.
+  //   block.FreePostingList(index);
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INVALID_ARGUMENT_ERROR if posting_list_index < 0 or posting_list_index
+  //     >= max_num_posting_lists()
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status FreePostingList(
+      PostingListIndex posting_list_index);
+
+  // Writes back an allocated posting list (PostingListUsed) at
+  // posting_list_index to disk.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INVALID_ARGUMENT_ERROR
+  //     - If posting_list_index < 0 or posting_list_index >=
+  //       max_num_posting_lists()
+  //     - If posting_list_used.size_in_bytes() != posting_list_bytes_
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status WritePostingListToDisk(
+      const PostingListUsed& posting_list_used,
+      PostingListIndex posting_list_index);
+
+  // PReads to get the index of next block from block header. Blocks can be
+  // chained, and the interpretation of the chaining is up to the caller.
+  //
+  // RETURNS:
+  //   - Next block index on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::StatusOr<uint32_t> GetNextBlockIndex() const;
+
+  // PWrites block header to set the index of next block.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status SetNextBlockIndex(uint32_t next_block_index);
+
+  // PReads to get whether or not there are available posting lists in the free
+  // list.
+  //
+  // RETURNS:
+  //   - A bool value on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::StatusOr<bool> HasFreePostingLists() const;
+
+  // Retrieves the size (in bytes) of the posting lists in this IndexBlock.
+  uint32_t posting_list_bytes() const { return posting_list_bytes_; }
+
+  // Retrieves maximum number of posting lists in the block.
+  uint32_t max_num_posting_lists() const {
+    return total_posting_lists_bytes() / posting_list_bytes_;
+  }
+
+  // Retrieves number of bits required to store the largest PostingListIndex in
+  // this block.
+  int posting_list_index_bits() const {
+    return BitsToStore(max_num_posting_lists());
+  }
+
+ private:
+  struct BlockHeader {
+    // Index of the next block if this block is being chained or part of a free
+    // list.
+    uint32_t next_block_index;
+
+    // Index to the first PostingListFree in the IndexBlock. This is the start
+    // of the free list.
+    PostingListIndex free_list_posting_list_index;
+
+    // The size of each posting list in the IndexBlock. This value will be
+    // initialized when calling CreateFromUninitializedRegion once and remain
+    // unchanged.
+    uint32_t posting_list_bytes;
+  };
+
+  // Assumes that fd has been opened for write.
+  explicit IndexBlock(const Filesystem* filesystem,
+                      PostingListSerializer* serializer, int fd,
+                      off_t block_file_offset, uint32_t block_size_in_bytes,
+                      uint32_t posting_list_bytes)
+      : filesystem_(filesystem),
+        serializer_(serializer),
+        fd_(fd),
+        block_file_offset_(block_file_offset),
+        block_size_in_bytes_(block_size_in_bytes),
+        posting_list_bytes_(posting_list_bytes) {}
+
+  // Resets IndexBlock to hold posting lists of posting_list_bytes size and adds
+  // all posting lists to the free list.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status Reset();
+
+  // Frees a posting list at posting_list_index, adds it into the free list
+  // chain and updates (sets) the given block header instance.
+  //
+  // - This function is served to avoid redundant block header PWrite when
+  //   freeing multiple posting lists.
+  // - The caller should provide a BlockHeader instance for updating the free
+  //   list chain, and finally sync it to disk.
+  //
+  // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes()
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status FreePostingListImpl(
+      BlockHeader& header, PostingListIndex posting_list_index);
+
+  // PReads block header from the file.
+  //
+  // RETURNS:
+  //   - A BlockHeader instance on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::StatusOr<BlockHeader> ReadHeader() const;
+
+  // PReads posting list content at posting_list_index. Note that it can be a
+  // freed or allocated posting list.
+  //
+  // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes()
+  //
+  // RETURNS:
+  //   - A data buffer with size = posting_list_bytes_ on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::StatusOr<std::unique_ptr<uint8_t[]>> ReadPostingList(
+      PostingListIndex posting_list_index) const;
+
+  // PWrites block header to the file.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status WriteHeader(const BlockHeader& header);
+
+  // PWrites posting list content at posting_list_index. Note that it can be a
+  // freed or allocated posting list.
+  //
+  // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes() and size of
+  //   posting_list_buffer is posting_list_bytes_.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status WritePostingList(
+      PostingListIndex posting_list_index, const uint8_t* posting_list_buffer);
+
+  // Retrieves the absolute file (fd) offset of a posting list at
+  // posting_list_index.
+  //
+  // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes()
+  off_t get_posting_list_file_offset(
+      PostingListIndex posting_list_index) const {
+    return block_file_offset_ + sizeof(BlockHeader) +
+           posting_list_bytes_ * posting_list_index;
+  }
+
+  // Retrieves the byte size in the block available for posting lists (excluding
+  // the size of block header).
+  uint32_t total_posting_lists_bytes() const {
+    return block_size_in_bytes_ - sizeof(BlockHeader);
+  }
+
+  const Filesystem* filesystem_;  // Does not own.
+
+  PostingListSerializer* serializer_;  // Does not own.
+
+  int fd_;  // Does not own.
+
+  off_t block_file_offset_;
+  uint32_t block_size_in_bytes_;
+  uint32_t posting_list_bytes_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_POSTING_LIST_INDEX_BLOCK_H_
diff --git a/icing/file/posting_list/index-block_test.cc b/icing/file/posting_list/index-block_test.cc
new file mode 100644
index 0000000..ebc9ba4
--- /dev/null
+++ b/icing/file/posting_list/index-block_test.cc
@@ -0,0 +1,357 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/posting_list/index-block.h"
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+
+static constexpr int kBlockSize = 4096;
+
+class IndexBlockTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/flash";
+    flash_file_ = test_dir_ + "/0";
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
+
+    sfd_ = std::make_unique<ScopedFd>(
+        filesystem_.OpenForWrite(flash_file_.c_str()));
+    ASSERT_TRUE(sfd_->is_valid());
+
+    // Grow the file by one block for the IndexBlock to use.
+    ASSERT_TRUE(filesystem_.Grow(sfd_->get(), kBlockSize));
+
+    // TODO: test different serializers
+    serializer_ = std::make_unique<PostingListHitSerializer>();
+  }
+
+  void TearDown() override {
+    serializer_.reset();
+    sfd_.reset();
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+  }
+
+  Filesystem filesystem_;
+  std::string test_dir_;
+  std::string flash_file_;
+  std::unique_ptr<ScopedFd> sfd_;
+  std::unique_ptr<PostingListHitSerializer> serializer_;
+};
+
+TEST_F(IndexBlockTest, CreateFromUninitializedRegionProducesEmptyBlock) {
+  constexpr int kPostingListBytes = 20;
+
+  {
+    // Create an IndexBlock from this newly allocated file block.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
+                              &filesystem_, serializer_.get(), sfd_->get(),
+                              /*offset=*/0, kBlockSize, kPostingListBytes));
+    EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
+  }
+}
+
+TEST_F(IndexBlockTest, SizeAccessorsWorkCorrectly) {
+  constexpr int kPostingListBytes1 = 20;
+
+  // Create an IndexBlock from this newly allocated file block.
+  ICING_ASSERT_OK_AND_ASSIGN(IndexBlock block,
+                             IndexBlock::CreateFromUninitializedRegion(
+                                 &filesystem_, serializer_.get(), sfd_->get(),
+                                 /*offset=*/0, kBlockSize, kPostingListBytes1));
+  EXPECT_THAT(block.posting_list_bytes(), Eq(kPostingListBytes1));
+  // There should be (4096 - 12) / 20 = 204 posting lists
+  // (sizeof(BlockHeader)==12). We can store a PostingListIndex of 203 in only 8
+  // bits.
+  EXPECT_THAT(block.max_num_posting_lists(), Eq(204));
+  EXPECT_THAT(block.posting_list_index_bits(), Eq(8));
+
+  constexpr int kPostingListBytes2 = 200;
+
+  // Create an IndexBlock from this newly allocated file block.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      block, IndexBlock::CreateFromUninitializedRegion(
+                 &filesystem_, serializer_.get(), sfd_->get(), /*offset=*/0,
+                 kBlockSize, kPostingListBytes2));
+  EXPECT_THAT(block.posting_list_bytes(), Eq(kPostingListBytes2));
+  // There should be (4096 - 12) / 200 = 20 posting lists
+  // (sizeof(BlockHeader)==12). We can store a PostingListIndex of 19 in only 5
+  // bits.
+  EXPECT_THAT(block.max_num_posting_lists(), Eq(20));
+  EXPECT_THAT(block.posting_list_index_bits(), Eq(5));
+}
+
+TEST_F(IndexBlockTest, IndexBlockChangesPersistAcrossInstances) {
+  constexpr int kPostingListBytes = 2000;
+
+  std::vector<Hit> test_hits{
+      Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99),
+      Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
+      Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
+  };
+  PostingListIndex allocated_index;
+  {
+    // Create an IndexBlock from this newly allocated file block.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
+                              &filesystem_, serializer_.get(), sfd_->get(),
+                              /*offset=*/0, kBlockSize, kPostingListBytes));
+    // Add hits to the first posting list.
+    ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info,
+                               block.AllocatePostingList());
+    for (const Hit& hit : test_hits) {
+      ICING_ASSERT_OK(
+          serializer_->PrependHit(&alloc_info.posting_list_used, hit));
+    }
+    EXPECT_THAT(
+        serializer_->GetHits(&alloc_info.posting_list_used),
+        IsOkAndHolds(ElementsAreArray(test_hits.rbegin(), test_hits.rend())));
+
+    ICING_ASSERT_OK(block.WritePostingListToDisk(
+        alloc_info.posting_list_used, alloc_info.posting_list_index));
+    allocated_index = alloc_info.posting_list_index;
+  }
+  {
+    // Create an IndexBlock from the previously allocated file block.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        IndexBlock block, IndexBlock::CreateFromPreexistingIndexBlockRegion(
+                              &filesystem_, serializer_.get(), sfd_->get(),
+                              /*offset=*/0, kBlockSize));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        IndexBlock::PostingListAndBlockInfo pl_block_info,
+        block.GetAllocatedPostingList(allocated_index));
+    EXPECT_THAT(
+        serializer_->GetHits(&pl_block_info.posting_list_used),
+        IsOkAndHolds(ElementsAreArray(test_hits.rbegin(), test_hits.rend())));
+    EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
+  }
+}
+
+TEST_F(IndexBlockTest, IndexBlockMultiplePostingLists) {
+  constexpr int kPostingListBytes = 2000;
+
+  std::vector<Hit> hits_in_posting_list1{
+      Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99),
+      Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
+      Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
+  };
+  std::vector<Hit> hits_in_posting_list2{
+      Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88),
+      Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2),
+      Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12),
+      Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency),
+  };
+  PostingListIndex allocated_index_1;
+  PostingListIndex allocated_index_2;
+  {
+    // Create an IndexBlock from this newly allocated file block.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
+                              &filesystem_, serializer_.get(), sfd_->get(),
+                              /*offset=*/0, kBlockSize, kPostingListBytes));
+
+    // Add hits to the first posting list.
+    ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_1,
+                               block.AllocatePostingList());
+    for (const Hit& hit : hits_in_posting_list1) {
+      ICING_ASSERT_OK(
+          serializer_->PrependHit(&alloc_info_1.posting_list_used, hit));
+    }
+    EXPECT_THAT(serializer_->GetHits(&alloc_info_1.posting_list_used),
+                IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
+                                              hits_in_posting_list1.rend())));
+
+    // Add hits to the second posting list.
+    ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_2,
+                               block.AllocatePostingList());
+    for (const Hit& hit : hits_in_posting_list2) {
+      ICING_ASSERT_OK(
+          serializer_->PrependHit(&alloc_info_2.posting_list_used, hit));
+    }
+    EXPECT_THAT(serializer_->GetHits(&alloc_info_2.posting_list_used),
+                IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
+                                              hits_in_posting_list2.rend())));
+
+    EXPECT_THAT(block.AllocatePostingList(),
+                StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+    EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
+
+    // Write both posting lists to disk.
+    ICING_ASSERT_OK(block.WritePostingListToDisk(
+        alloc_info_1.posting_list_used, alloc_info_1.posting_list_index));
+    ICING_ASSERT_OK(block.WritePostingListToDisk(
+        alloc_info_2.posting_list_used, alloc_info_2.posting_list_index));
+    allocated_index_1 = alloc_info_1.posting_list_index;
+    allocated_index_2 = alloc_info_2.posting_list_index;
+  }
+  {
+    // Create an IndexBlock from the previously allocated file block.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        IndexBlock block, IndexBlock::CreateFromPreexistingIndexBlockRegion(
+                              &filesystem_, serializer_.get(), sfd_->get(),
+                              /*offset=*/0, kBlockSize));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        IndexBlock::PostingListAndBlockInfo pl_block_info_1,
+        block.GetAllocatedPostingList(allocated_index_1));
+    EXPECT_THAT(serializer_->GetHits(&pl_block_info_1.posting_list_used),
+                IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
+                                              hits_in_posting_list1.rend())));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        IndexBlock::PostingListAndBlockInfo pl_block_info_2,
+        block.GetAllocatedPostingList(allocated_index_2));
+    EXPECT_THAT(serializer_->GetHits(&pl_block_info_2.posting_list_used),
+                IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
+                                              hits_in_posting_list2.rend())));
+    EXPECT_THAT(block.AllocatePostingList(),
+                StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+    EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
+  }
+}
+
+TEST_F(IndexBlockTest, IndexBlockReallocatingPostingLists) {
+  constexpr int kPostingListBytes = 2000;
+
+  // Create an IndexBlock from this newly allocated file block.
+  ICING_ASSERT_OK_AND_ASSIGN(IndexBlock block,
+                             IndexBlock::CreateFromUninitializedRegion(
+                                 &filesystem_, serializer_.get(), sfd_->get(),
+                                 /*offset=*/0, kBlockSize, kPostingListBytes));
+
+  // Add hits to the first posting list.
+  std::vector<Hit> hits_in_posting_list1{
+      Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99),
+      Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
+      Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
+  };
+  ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_1,
+                             block.AllocatePostingList());
+  for (const Hit& hit : hits_in_posting_list1) {
+    ICING_ASSERT_OK(
+        serializer_->PrependHit(&alloc_info_1.posting_list_used, hit));
+  }
+  EXPECT_THAT(serializer_->GetHits(&alloc_info_1.posting_list_used),
+              IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
+                                            hits_in_posting_list1.rend())));
+
+  // Add hits to the second posting list.
+  std::vector<Hit> hits_in_posting_list2{
+      Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88),
+      Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2),
+      Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12),
+      Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency),
+  };
+  ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_2,
+                             block.AllocatePostingList());
+  for (const Hit& hit : hits_in_posting_list2) {
+    ICING_ASSERT_OK(
+        serializer_->PrependHit(&alloc_info_2.posting_list_used, hit));
+  }
+  EXPECT_THAT(serializer_->GetHits(&alloc_info_2.posting_list_used),
+              IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
+                                            hits_in_posting_list2.rend())));
+
+  EXPECT_THAT(block.AllocatePostingList(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
+
+  // Now free the first posting list. Then, reallocate it and fill it with a
+  // different set of hits.
+  ICING_ASSERT_OK(block.FreePostingList(alloc_info_1.posting_list_index));
+  EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
+
+  std::vector<Hit> hits_in_posting_list3{
+      Hit(/*section_id=*/12, /*document_id=*/0, /*term_frequency=*/88),
+      Hit(/*section_id=*/17, /*document_id=*/1, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/0, /*document_id=*/2, /*term_frequency=*/2),
+  };
+  ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_3,
+                             block.AllocatePostingList());
+  EXPECT_THAT(alloc_info_3.posting_list_index,
+              Eq(alloc_info_3.posting_list_index));
+  for (const Hit& hit : hits_in_posting_list3) {
+    ICING_ASSERT_OK(
+        serializer_->PrependHit(&alloc_info_3.posting_list_used, hit));
+  }
+  EXPECT_THAT(serializer_->GetHits(&alloc_info_3.posting_list_used),
+              IsOkAndHolds(ElementsAreArray(hits_in_posting_list3.rbegin(),
+                                            hits_in_posting_list3.rend())));
+  EXPECT_THAT(block.AllocatePostingList(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
+}
+
+TEST_F(IndexBlockTest, IndexBlockNextBlockIndex) {
+  constexpr int kPostingListBytes = 2000;
+  constexpr int kSomeBlockIndex = 22;
+
+  {
+    // Create an IndexBlock from this newly allocated file block and set the
+    // next block index.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
+                              &filesystem_, serializer_.get(), sfd_->get(),
+                              /*offset=*/0, kBlockSize, kPostingListBytes));
+    EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kInvalidBlockIndex));
+    EXPECT_THAT(block.SetNextBlockIndex(kSomeBlockIndex), IsOk());
+    EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kSomeBlockIndex));
+  }
+  {
+    // Create an IndexBlock from this previously allocated file block and make
+    // sure that next_block_index is still set properly.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        IndexBlock block, IndexBlock::CreateFromPreexistingIndexBlockRegion(
+                              &filesystem_, serializer_.get(), sfd_->get(),
+                              /*offset=*/0, kBlockSize));
+    EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kSomeBlockIndex));
+  }
+  {
+    // Create an IndexBlock, treating this file block as uninitialized. This
+    // reset the next_block_index to kInvalidBlockIndex.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
+                              &filesystem_, serializer_.get(), sfd_->get(),
+                              /*offset=*/0, kBlockSize, kPostingListBytes));
+    EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kInvalidBlockIndex));
+  }
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/posting_list/posting-list-accessor.cc b/icing/file/posting_list/posting-list-accessor.cc
new file mode 100644
index 0000000..a7cdb17
--- /dev/null
+++ b/icing/file/posting_list/posting-list-accessor.cc
@@ -0,0 +1,136 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/posting_list/posting-list-accessor.h"
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::Status PostingListAccessor::FlushPreexistingPostingList() {
+  if (preexisting_posting_list_->posting_list.size_in_bytes() ==
+      storage_->max_posting_list_bytes()) {
+    // If this is a max-sized posting list, then sync to disk and keep track of
+    // the id.
+    ICING_RETURN_IF_ERROR(
+        storage_->WritePostingListToDisk(*preexisting_posting_list_));
+    prev_block_identifier_ = preexisting_posting_list_->id;
+  } else {
+    // If this is NOT a max-sized posting list, then our data have outgrown this
+    // particular posting list. Move the data into the in-memory posting list
+    // and free this posting list.
+    //
+    // Move will always succeed since in_memory_posting_list_ is max_pl_bytes.
+    ICING_RETURN_IF_ERROR(GetSerializer()->MoveFrom(
+        /*dst=*/&in_memory_posting_list_,
+        /*src=*/&preexisting_posting_list_->posting_list));
+
+    // Now that all the contents of this posting list have been copied, there's
+    // no more use for it. Make it available to be used for another posting
+    // list.
+    ICING_RETURN_IF_ERROR(
+        storage_->FreePostingList(std::move(*preexisting_posting_list_)));
+  }
+  preexisting_posting_list_.reset();
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status PostingListAccessor::FlushInMemoryPostingList() {
+  // We exceeded max_pl_bytes(). Need to flush in_memory_posting_list_ and
+  // update the chain.
+  ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
+                         storage_->AllocateAndChainMaxSizePostingList(
+                             prev_block_identifier_.block_index()));
+  ICING_RETURN_IF_ERROR(
+      GetSerializer()->MoveFrom(/*dst=*/&holder.posting_list,
+                                /*src=*/&in_memory_posting_list_));
+  ICING_RETURN_IF_ERROR(storage_->WritePostingListToDisk(holder));
+
+  // Set prev block id only if persist to disk succeeded.
+  prev_block_identifier_ = holder.id;
+  return libtextclassifier3::Status::OK;
+}
+
+PostingListAccessor::FinalizeResult PostingListAccessor::Finalize() && {
+  if (preexisting_posting_list_ != nullptr) {
+    // Sync to disk.
+    return FinalizeResult(
+        storage_->WritePostingListToDisk(*preexisting_posting_list_),
+        preexisting_posting_list_->id);
+  }
+
+  if (GetSerializer()->GetBytesUsed(&in_memory_posting_list_) <= 0) {
+    return FinalizeResult(absl_ports::InvalidArgumentError(
+                              "Can't finalize an empty PostingListAccessor. "
+                              "There's nothing to Finalize!"),
+                          PostingListIdentifier::kInvalid);
+  }
+
+  libtextclassifier3::StatusOr<PostingListHolder> holder_or;
+  if (prev_block_identifier_.is_valid()) {
+    // If prev_block_identifier_ is valid, then it means there was a max-sized
+    // posting list, so we have to allocate another new max size posting list
+    // and chain them together.
+    holder_or = storage_->AllocateAndChainMaxSizePostingList(
+        prev_block_identifier_.block_index());
+  } else {
+    // Otherwise, it is the first posting list, and we can use smaller size pl.
+    // Note that even if it needs a max-sized posting list here, it is ok to
+    // call AllocatePostingList without setting next block index since we don't
+    // have any previous posting list to chain and AllocatePostingList will set
+    // next block index to kInvalidBlockIndex.
+    uint32_t posting_list_bytes =
+        GetSerializer()->GetMinPostingListSizeToFit(&in_memory_posting_list_);
+    holder_or = storage_->AllocatePostingList(posting_list_bytes);
+  }
+
+  if (!holder_or.ok()) {
+    return FinalizeResult(std::move(holder_or).status(),
+                          prev_block_identifier_);
+  }
+  PostingListHolder holder = std::move(holder_or).ValueOrDie();
+
+  // Move to allocated area. This should never actually return an error. We know
+  // that editor.posting_list() is valid because it wouldn't have successfully
+  // returned by AllocatePostingList if it wasn't. We know
+  // in_memory_posting_list_ is valid because we created it in-memory. And
+  // finally, we know that the data from in_memory_posting_list_ will fit in
+  // editor.posting_list() because we requested it be at at least
+  // posting_list_bytes large.
+  auto status = GetSerializer()->MoveFrom(/*dst=*/&holder.posting_list,
+                                          /*src=*/&in_memory_posting_list_);
+  if (!status.ok()) {
+    return FinalizeResult(std::move(status), prev_block_identifier_);
+  }
+
+  status = storage_->WritePostingListToDisk(holder);
+  if (!status.ok()) {
+    return FinalizeResult(std::move(status), prev_block_identifier_);
+  }
+  return FinalizeResult(libtextclassifier3::Status::OK, holder.id);
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/posting_list/posting-list-accessor.h b/icing/file/posting_list/posting-list-accessor.h
new file mode 100644
index 0000000..91f1f2d
--- /dev/null
+++ b/icing/file/posting_list/posting-list-accessor.h
@@ -0,0 +1,118 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_POSTING_LIST_POSTING_LIST_ACCESSOR_H_
+#define ICING_FILE_POSTING_LIST_POSTING_LIST_ACCESSOR_H_
+
+#include <cstdint>
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/file/posting_list/posting-list-used.h"
+
+namespace icing {
+namespace lib {
+
+// This class serves to:
+//  1. Expose PostingListUseds to clients of FlashIndexStorage
+//  2. Handles flushing posting list properly, including choosing the most
+//     efficient size of PL, chaining max-sized PL correctly, etc.
+//  3. Ensure that PostingListUseds can only be freed by calling methods which
+//     will also properly maintain the FlashIndexStorage free list and prevent
+//     callers from modifying the Posting List after freeing.
+class PostingListAccessor {
+ public:
+  virtual ~PostingListAccessor() = default;
+
+  struct FinalizeResult {
+    //   - OK on success
+    //   - INVALID_ARGUMENT if there was no pre-existing posting list and no
+    //     data were added
+    //   - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a
+    //     new posting list.
+    libtextclassifier3::Status status;
+    // Id of the posting list chain that was finalized. Guaranteed to be valid
+    // if status is OK. May be valid if status is non-OK, but previous blocks
+    // were written.
+    PostingListIdentifier id;
+
+    explicit FinalizeResult(libtextclassifier3::Status status_in,
+                            PostingListIdentifier id_in)
+        : status(std::move(status_in)), id(std::move(id_in)) {}
+  };
+  // Write all accumulated data to storage.
+  //
+  // If accessor points to a posting list chain with multiple posting lists in
+  // the chain and unable to write the last posting list in the chain, Finalize
+  // will return the error and also populate id with the id of the
+  // second-to-last posting list.
+  FinalizeResult Finalize() &&;
+
+  virtual PostingListSerializer* GetSerializer() = 0;
+
+ protected:
+  explicit PostingListAccessor(FlashIndexStorage* storage,
+                               PostingListUsed in_memory_posting_list)
+      : storage_(storage),
+        prev_block_identifier_(PostingListIdentifier::kInvalid),
+        in_memory_posting_list_(std::move(in_memory_posting_list)),
+        has_reached_posting_list_chain_end_(false) {}
+
+  // Flushes preexisting_posting_list_ to disk if it's a max-sized posting list
+  // and populates prev_block_identifier.
+  // If it's not a max-sized posting list, moves the contents of
+  // preexisting_posting_list_ to in_memory_posting_list_ and frees
+  // preexisting_posting_list_.
+  // Sets preexisting_posting_list_ to nullptr.
+  libtextclassifier3::Status FlushPreexistingPostingList();
+
+  // Flushes in_memory_posting_list_ to a max-sized posting list on disk, chains
+  // the newly allocated max-size posting list block by setting its next pointer
+  // to prev_block_identifier_, and updates prev_block_identifier_ to point to
+  // the newly allocated posting list.
+  libtextclassifier3::Status FlushInMemoryPostingList();
+
+  // Frees all posting lists in the posting list chain starting at
+  // prev_block_identifier_.
+  libtextclassifier3::Status FreePostingListChain();
+
+  FlashIndexStorage* storage_;  // Does not own.
+
+  // The PostingListIdentifier of the first max-sized posting list in the
+  // posting list chain or PostingListIdentifier::kInvalid if there is no
+  // posting list chain.
+  PostingListIdentifier prev_block_identifier_;
+
+  // An editor to an existing posting list on disk. If available (non-NULL),
+  // we'll try to add all data to this posting list. Once this posting list
+  // fills up, we'll either 1) chain it (if a max-sized posting list) and put
+  // future data in in_memory_posting_list_ or 2) copy all of its data into
+  // in_memory_posting_list_ and free this pl (if not a max-sized posting list).
+  // TODO(tjbarron) provide a benchmark to demonstrate the effects that re-using
+  // existing posting lists has on latency.
+  std::unique_ptr<PostingListHolder> preexisting_posting_list_;
+
+  // In-memory posting list used to buffer data before writing them to the
+  // smallest on-disk posting list that will fit them.
+  PostingListUsed in_memory_posting_list_;
+
+  bool has_reached_posting_list_chain_end_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_POSTING_LIST_POSTING_LIST_ACCESSOR_H_
diff --git a/icing/file/posting_list/posting-list-common.h b/icing/file/posting_list/posting-list-common.h
new file mode 100644
index 0000000..44c6dd2
--- /dev/null
+++ b/icing/file/posting_list/posting-list-common.h
@@ -0,0 +1,33 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_POSTING_LIST_POSTING_LIST_COMMON_H_
+#define ICING_FILE_POSTING_LIST_POSTING_LIST_COMMON_H_
+
+#include <cstdint>
+
+namespace icing {
+namespace lib {
+
+// A FlashIndexBlock can contain multiple posting lists. This specifies which
+// PostingList in the FlashIndexBlock we want to refer to.
+using PostingListIndex = int32_t;
+inline constexpr PostingListIndex kInvalidPostingListIndex = ~0U;
+
+inline constexpr uint32_t kInvalidBlockIndex = 0;
+
+}  //  namespace lib
+}  //  namespace icing
+
+#endif  // ICING_FILE_POSTING_LIST_POSTING_LIST_COMMON_H_
diff --git a/icing/index/posting-list-free.h b/icing/file/posting_list/posting-list-free.h
index a2eba82..073e344 100644
--- a/icing/index/posting-list-free.h
+++ b/icing/file/posting_list/posting-list-free.h
@@ -12,30 +12,22 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef ICING_INDEX_POSTING_LIST_FREE_H_
-#define ICING_INDEX_POSTING_LIST_FREE_H_
-
-#include <string.h>
-#include <sys/mman.h>
+#ifndef ICING_FILE_POSTING_LIST_POSTING_LIST_FREE_H_
+#define ICING_FILE_POSTING_LIST_POSTING_LIST_FREE_H_
 
 #include <cstdint>
+#include <cstring>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
-#include "icing/index/hit/hit.h"
-#include "icing/index/posting-list-utils.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-utils.h"
 #include "icing/legacy/core/icing-string-util.h"
-#include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
 namespace lib {
 
-// A FlashIndexBlock can contain multiple posting lists. This specifies which
-// PostingList in the FlashIndexBlock we want to refer to.
-using PostingListIndex = uint32_t;
-inline constexpr PostingListIndex kInvalidPostingListIndex = ~0U;
-
 // A posting list in the index block's free list.
 //
 // We re-use the first sizeof(PostingListIndex) bytes of the posting list
@@ -51,14 +43,17 @@ class PostingListFree {
   //
   // RETURNS:
   //   - A valid PostingListFree on success
-  //   - INVALID_ARGUMENT if size_in_bytes < min_posting_list_size()
-  //       || size_in_bytes % sizeof(Hit) != 0.
+  //   - INVALID_ARGUMENT if posting_list_utils::IsValidPostingListSize check
+  //       fails
   //   - FAILED_PRECONDITION if posting_list_buffer is null
   static libtextclassifier3::StatusOr<PostingListFree>
-  CreateFromPreexistingPostingListFreeRegion(void *posting_list_buffer,
-                                             uint32_t size_in_bytes) {
+  CreateFromPreexistingPostingListFreeRegion(void* posting_list_buffer,
+                                             uint32_t size_in_bytes,
+                                             uint32_t data_type_bytes,
+                                             uint32_t min_posting_list_size) {
     ICING_RETURN_ERROR_IF_NULL(posting_list_buffer);
-    if (!posting_list_utils::IsValidPostingListSize(size_in_bytes)) {
+    if (!posting_list_utils::IsValidPostingListSize(
+            size_in_bytes, data_type_bytes, min_posting_list_size)) {
       return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
           "Requested posting list size %d is invalid!", size_in_bytes));
     }
@@ -74,15 +69,17 @@ class PostingListFree {
   //
   // RETURNS:
   //   - A valid PostingListFree on success
-  //   - INVALID_ARGUMENT if size_in_bytes < min_size() || size_in_bytes %
-  //   sizeof(Hit) != 0.
+  //   - INVALID_ARGUMENT if posting_list_utils::IsValidPostingListSize check
+  //       fails
   //   - FAILED_PRECONDITION if posting_list_buffer is null
   static libtextclassifier3::StatusOr<PostingListFree>
-  CreateFromUnitializedRegion(void *posting_list_buffer,
-                              uint32_t size_in_bytes) {
+  CreateFromUnitializedRegion(void* posting_list_buffer, uint32_t size_in_bytes,
+                              uint32_t data_type_bytes,
+                              uint32_t min_posting_list_size) {
     ICING_ASSIGN_OR_RETURN(PostingListFree posting_list_free,
                            CreateFromPreexistingPostingListFreeRegion(
-                               posting_list_buffer, size_in_bytes));
+                               posting_list_buffer, size_in_bytes,
+                               data_type_bytes, min_posting_list_size));
     posting_list_free.Clear();
     return posting_list_free;
   }
@@ -101,8 +98,8 @@ class PostingListFree {
   }
 
  private:
-  PostingListFree(void *posting_list_buffer, uint32_t size_in_bytes)
-      : posting_list_buffer_(static_cast<uint8_t *>(posting_list_buffer)),
+  explicit PostingListFree(void* posting_list_buffer, uint32_t size_in_bytes)
+      : posting_list_buffer_(static_cast<uint8_t*>(posting_list_buffer)),
         size_in_bytes_(size_in_bytes) {}
 
   // Reset the current free posting list as unchained free posting list so that
@@ -114,16 +111,11 @@ class PostingListFree {
   // A byte array of size size_in_bytes_. The first sizeof(PostingListIndex)
   // bytes which will store the next posting list index, the rest are unused and
   // can be anything.
-  uint8_t *posting_list_buffer_;
-  uint32_t size_in_bytes_;
-
-  static_assert(sizeof(PostingListIndex) <=
-                    posting_list_utils::min_posting_list_size(),
-                "PostingListIndex must be small enough to fit in a "
-                "minimum-sized Posting List.");
+  uint8_t* posting_list_buffer_;
+  [[maybe_unused]] uint32_t size_in_bytes_;
 };
 
 }  // namespace lib
 }  // namespace icing
 
-#endif  // ICING_INDEX_POSTING_LIST_FREE_H_
+#endif  // ICING_FILE_POSTING_LIST_POSTING_LIST_FREE_H_
diff --git a/icing/index/posting-list-free_test.cc b/icing/file/posting_list/posting-list-free_test.cc
index 80b8957..503012d 100644
--- a/icing/index/posting-list-free_test.cc
+++ b/icing/file/posting_list/posting-list-free_test.cc
@@ -12,14 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "icing/index/posting-list-free.h"
+#include "icing/file/posting_list/posting-list-free.h"
 
 #include <cstdint>
 #include <memory>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "gtest/gtest.h"
-#include "icing/index/posting-list-utils.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
 #include "icing/testing/common-matchers.h"
 
 namespace icing {
@@ -27,55 +27,76 @@ namespace lib {
 
 namespace {
 
+// TODO(b/249829533): test different serializers
+
 TEST(PostingListTest, PostingListFree) {
+  PostingListHitSerializer serializer;
   static const size_t kHitsSize = 2551 * sizeof(Hit);
 
   std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize);
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListFree pl_free,
       PostingListFree::CreateFromUnitializedRegion(
-          static_cast<void *>(hits_buf.get()), kHitsSize));
+          static_cast<void *>(hits_buf.get()), kHitsSize,
+          serializer.GetDataTypeBytes(), serializer.GetMinPostingListSize()));
   EXPECT_EQ(pl_free.get_next_posting_list_index(), kInvalidPostingListIndex);
 }
 
 TEST(PostingListTest, PostingListTooSmallInvalidArgument) {
-  static const size_t kHitSizeTooSmall =
-      posting_list_utils::min_posting_list_size() - sizeof(Hit);
+  PostingListHitSerializer serializer;
+  const size_t kHitSizeTooSmall =
+      serializer.GetMinPostingListSize() - sizeof(Hit);
 
   std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitSizeTooSmall);
-  EXPECT_THAT(PostingListFree::CreateFromUnitializedRegion(
-                  static_cast<void *>(hits_buf.get()), kHitSizeTooSmall),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(PostingListFree::CreateFromPreexistingPostingListFreeRegion(
-                  static_cast<void *>(hits_buf.get()), kHitSizeTooSmall),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(
+      PostingListFree::CreateFromUnitializedRegion(
+          static_cast<void *>(hits_buf.get()), kHitSizeTooSmall,
+          serializer.GetDataTypeBytes(), serializer.GetMinPostingListSize()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(
+      PostingListFree::CreateFromPreexistingPostingListFreeRegion(
+          static_cast<void *>(hits_buf.get()), kHitSizeTooSmall,
+          serializer.GetDataTypeBytes(), serializer.GetMinPostingListSize()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
 TEST(PostingListTest, PostingListNotAlignedInvalidArgument) {
-  static const size_t kHitSizeNotAligned =
-      posting_list_utils::min_posting_list_size() + 1;
+  PostingListHitSerializer serializer;
+  const size_t kHitSizeNotAligned = serializer.GetMinPostingListSize() + 1;
 
   std::unique_ptr<char[]> hits_buf =
       std::make_unique<char[]>(kHitSizeNotAligned);
-  EXPECT_THAT(PostingListFree::CreateFromUnitializedRegion(
-                  static_cast<void *>(hits_buf.get()), kHitSizeNotAligned),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(PostingListFree::CreateFromPreexistingPostingListFreeRegion(
-                  static_cast<void *>(hits_buf.get()), kHitSizeNotAligned),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(
+      PostingListFree::CreateFromUnitializedRegion(
+          static_cast<void *>(hits_buf.get()), kHitSizeNotAligned,
+          serializer.GetDataTypeBytes(), serializer.GetMinPostingListSize()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(
+      PostingListFree::CreateFromPreexistingPostingListFreeRegion(
+          static_cast<void *>(hits_buf.get()), kHitSizeNotAligned,
+          serializer.GetDataTypeBytes(), serializer.GetMinPostingListSize()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
 TEST(PostingListTest, PostingListNullBufferFailedPrecondition) {
-  static const size_t kHitSize = posting_list_utils::min_posting_list_size();
-  EXPECT_THAT(PostingListFree::CreateFromUnitializedRegion(
-                  /*posting_list_buffer=*/nullptr, kHitSize),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-  EXPECT_THAT(PostingListFree::CreateFromPreexistingPostingListFreeRegion(
-                  /*posting_list_buffer=*/nullptr, kHitSize),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  PostingListHitSerializer serializer;
+  const size_t kHitSize = serializer.GetMinPostingListSize();
+
+  // nullptr posting_list_buffer
+  EXPECT_THAT(
+      PostingListFree::CreateFromUnitializedRegion(
+          /*posting_list_buffer=*/nullptr, kHitSize,
+          serializer.GetDataTypeBytes(), serializer.GetMinPostingListSize()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(
+      PostingListFree::CreateFromPreexistingPostingListFreeRegion(
+          /*posting_list_buffer=*/nullptr, kHitSize,
+          serializer.GetDataTypeBytes(), serializer.GetMinPostingListSize()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
 TEST(PostingListTest, PostingListFreePreexistingRegion) {
+  PostingListHitSerializer serializer;
   constexpr PostingListIndex kOtherPostingListIndex = 12;
   static const size_t kHitsSize = 2551 * sizeof(Hit);
 
@@ -85,7 +106,8 @@ TEST(PostingListTest, PostingListFreePreexistingRegion) {
     ICING_ASSERT_OK_AND_ASSIGN(
         PostingListFree pl_free,
         PostingListFree::CreateFromUnitializedRegion(
-            static_cast<void *>(hits_buf.get()), kHitsSize));
+            static_cast<void *>(hits_buf.get()), kHitsSize,
+            serializer.GetDataTypeBytes(), serializer.GetMinPostingListSize()));
     pl_free.set_next_posting_list_index(kOtherPostingListIndex);
     EXPECT_EQ(pl_free.get_next_posting_list_index(), kOtherPostingListIndex);
   }
@@ -95,12 +117,14 @@ TEST(PostingListTest, PostingListFreePreexistingRegion) {
     ICING_ASSERT_OK_AND_ASSIGN(
         PostingListFree pl_free,
         PostingListFree::CreateFromPreexistingPostingListFreeRegion(
-            static_cast<void *>(hits_buf.get()), kHitsSize));
+            static_cast<void *>(hits_buf.get()), kHitsSize,
+            serializer.GetDataTypeBytes(), serializer.GetMinPostingListSize()));
     EXPECT_EQ(pl_free.get_next_posting_list_index(), kOtherPostingListIndex);
   }
 }
 
 TEST(PostingListTest, PostingListFreeUninitializedRegion) {
+  PostingListHitSerializer serializer;
   constexpr PostingListIndex kOtherPostingListIndex = 12;
   static const size_t kHitsSize = 2551 * sizeof(Hit);
 
@@ -110,7 +134,8 @@ TEST(PostingListTest, PostingListFreeUninitializedRegion) {
     ICING_ASSERT_OK_AND_ASSIGN(
         PostingListFree pl_free,
         PostingListFree::CreateFromUnitializedRegion(
-            static_cast<void *>(hits_buf.get()), kHitsSize));
+            static_cast<void *>(hits_buf.get()), kHitsSize,
+            serializer.GetDataTypeBytes(), serializer.GetMinPostingListSize()));
     pl_free.set_next_posting_list_index(kOtherPostingListIndex);
     EXPECT_EQ(pl_free.get_next_posting_list_index(), kOtherPostingListIndex);
   }
@@ -120,7 +145,8 @@ TEST(PostingListTest, PostingListFreeUninitializedRegion) {
     ICING_ASSERT_OK_AND_ASSIGN(
         PostingListFree pl_free,
         PostingListFree::CreateFromUnitializedRegion(
-            static_cast<void *>(hits_buf.get()), kHitsSize));
+            static_cast<void *>(hits_buf.get()), kHitsSize,
+            serializer.GetDataTypeBytes(), serializer.GetMinPostingListSize()));
     EXPECT_EQ(pl_free.get_next_posting_list_index(), kInvalidPostingListIndex);
   }
 }
diff --git a/icing/file/posting_list/posting-list-identifier.cc b/icing/file/posting_list/posting-list-identifier.cc
new file mode 100644
index 0000000..4491c38
--- /dev/null
+++ b/icing/file/posting_list/posting-list-identifier.cc
@@ -0,0 +1,27 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/posting_list/posting-list-identifier.h"
+
+#include "icing/file/posting_list/posting-list-common.h"
+
+namespace icing {
+namespace lib {
+
+PostingListIdentifier PostingListIdentifier::kInvalid(
+    kInvalidBlockIndex, /*posting_list_index=*/0,
+    PostingListIdentifier::kEncodedPostingListIndexBits - 1);
+
+}  //  namespace lib
+}  //  namespace icing
diff --git a/icing/file/posting_list/posting-list-identifier.h b/icing/file/posting_list/posting-list-identifier.h
new file mode 100644
index 0000000..8a0229b
--- /dev/null
+++ b/icing/file/posting_list/posting-list-identifier.h
@@ -0,0 +1,120 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_POSTING_LIST_POSTING_LIST_IDENTIFIER_H_
+#define ICING_FILE_POSTING_LIST_POSTING_LIST_IDENTIFIER_H_
+
+#include <cstdint>
+
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/legacy/index/icing-bit-util.h"
+
+namespace icing {
+namespace lib {
+
+// 1M blocks * 4K page size = 4GB index
+inline constexpr int kBlockIndexBits = 20;
+inline constexpr int kMaxBlockIndex = (1u << kBlockIndexBits) - 1;
+
+// Class used to store information necessary to identify any posting list within
+// the index.
+//
+// The 20 leftmost bits in this identifier encode the block index. The 12
+// rightmost bits encode both the posting list index and the maximum number of
+// bits required to encode a posting list index on that block.
+//
+// Ex. An index block containing a max of 68 posting lists each of size 60
+// bytes (and thus 7 posting list bits), with a block index of 13 and a posting
+// list index of 5.
+//   0000  0000  0000  0000  1101  1111  0000  0101
+//  |__________block-index_______|__pad__|_pl-index_|
+//
+// "pad" is some region starting at kEncodedPostingListIndexBits (12) bit and
+// continuing rightward until reaching a terminating "0". This padding encodes
+// the posting list bits value - posting list bits value is the number of bits
+// after the terminating '0' of the "pad" region.
+//
+// This value will eventually be stored in the Main Lexicon.
+class PostingListIdentifier {
+  // 1 bit is wasted to encode max pl index bits so there can be at most 2^11
+  // posting lists per block. Block size would have to be >=40020 bytes for
+  // there to be more than 2K+ posting lists in a block.
+  static constexpr int kEncodedPostingListIndexBits = 12;
+  static_assert(kEncodedPostingListIndexBits + kBlockIndexBits <=
+                    8 * sizeof(uint32_t),
+                "Not enough room in PostingListIdentifier value to encode "
+                "block index and posting list index.");
+
+ public:
+  static PostingListIdentifier kInvalid;
+
+  explicit PostingListIdentifier() { *this = kInvalid; }
+
+  // 1. block_index - the index of this block within the FlashIndexStorage file
+  // 2. posting_list_index - the index of this posting list within the block
+  // 3. posting_list_index_bits - the number of bits needed to encode the
+  //    largest posting_list_index that this block can have.
+  explicit PostingListIdentifier(uint32_t block_index,
+                                 PostingListIndex posting_list_index,
+                                 int posting_list_index_bits) {
+    val_ = 0;
+    BITFIELD_OR(val_, /*offset=*/0, /*len=*/posting_list_index_bits,
+                /*val=*/static_cast<uint64_t>(posting_list_index));
+    BITFIELD_OR(
+        val_, /*offset=*/posting_list_index_bits + 1,
+        /*len=*/kEncodedPostingListIndexBits - posting_list_index_bits - 1,
+        /*val=*/~0u);
+    BITFIELD_OR(val_, /*offset=*/kEncodedPostingListIndexBits,
+                /*len=*/kBlockIndexBits,
+                /*val=*/block_index);
+  }
+
+  uint32_t block_index() const {
+    return BITFIELD_GET(val_, kEncodedPostingListIndexBits, kBlockIndexBits);
+  }
+
+  PostingListIndex posting_list_index() const {
+    return BITFIELD_GET(val_, 0, posting_list_index_bits());
+  }
+
+  // Returns the maximum number of bits that a posting list index on the block
+  // referred to by block_index could use.
+  int posting_list_index_bits() const {
+    for (int bits = kEncodedPostingListIndexBits - 1; bits >= 0; --bits) {
+      if (((1u << bits) & val_) == 0) {
+        // Got to the zero bit. This is the start of pl index.
+        return bits;
+      }
+    }
+    return -1;
+  }
+
+  bool is_valid() const { return *this != kInvalid; }
+
+  bool operator==(const PostingListIdentifier& rhs) const {
+    return val_ == rhs.val_;
+  }
+  bool operator!=(const PostingListIdentifier& rhs) const {
+    return !(*this == rhs);
+  }
+
+ private:
+  uint32_t val_;
+} __attribute__((packed));
+static_assert(sizeof(PostingListIdentifier) == 4, "");
+
+}  //  namespace lib
+}  //  namespace icing
+
+#endif  // ICING_FILE_POSTING_LIST_POSTING_LIST_IDENTIFIER_H_
diff --git a/icing/file/posting_list/posting-list-used.cc b/icing/file/posting_list/posting-list-used.cc
new file mode 100644
index 0000000..d049b13
--- /dev/null
+++ b/icing/file/posting_list/posting-list-used.cc
@@ -0,0 +1,58 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/posting_list/posting-list-used.h"
+
+#include <cstdint>
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/posting_list/posting-list-utils.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::StatusOr<PostingListUsed>
+PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
+    PostingListSerializer* serializer,
+    std::unique_ptr<uint8_t[]> posting_list_buffer, uint32_t size_in_bytes) {
+  ICING_RETURN_ERROR_IF_NULL(serializer);
+  ICING_RETURN_ERROR_IF_NULL(posting_list_buffer);
+
+  if (!posting_list_utils::IsValidPostingListSize(
+          size_in_bytes, serializer->GetDataTypeBytes(),
+          serializer->GetMinPostingListSize())) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Requested posting list size %d is invalid!", size_in_bytes));
+  }
+  return PostingListUsed(std::move(posting_list_buffer), size_in_bytes);
+}
+
+libtextclassifier3::StatusOr<PostingListUsed>
+PostingListUsed::CreateFromUnitializedRegion(PostingListSerializer* serializer,
+                                             uint32_t size_in_bytes) {
+  ICING_ASSIGN_OR_RETURN(
+      PostingListUsed posting_list_used,
+      CreateFromPreexistingPostingListUsedRegion(
+          serializer, std::make_unique<uint8_t[]>(size_in_bytes),
+          size_in_bytes));
+  serializer->Clear(&posting_list_used);
+  return posting_list_used;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/posting_list/posting-list-used.h b/icing/file/posting_list/posting-list-used.h
new file mode 100644
index 0000000..980d210
--- /dev/null
+++ b/icing/file/posting_list/posting-list-used.h
@@ -0,0 +1,174 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_POSTING_LIST_POSTING_LIST_USED_H_
+#define ICING_FILE_POSTING_LIST_POSTING_LIST_USED_H_
+
+#include <cstdint>
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+
+namespace icing {
+namespace lib {
+
+class PostingListUsed;
+
+// Interface for PostingListUsed data serialization and deserialization.
+// - It contains several common methods used by lower level of posting list
+//   management related classes (e.g. FlashIndexStorage, IndexBlock,
+//   PostingListUsed, etc).
+// - Higher level classes (e.g. MainIndex) create their desired serializers
+//   according to the data type they're dealing with, and pass the instance down
+//   to all posting list management related classes.
+// - Data specific methods can also be implemented in each serializer. They
+//   won't be used by posting list management related classes, but higher level
+//   classes are able to call it and deal with the specific data type.
+//
+// E.g. main index stores 'Hit' data into posting lists.
+// - MainIndex creates PostingListUsedHitSerializer instance and uses hit data
+//   related methods to serialize/deserialize Hit data to/from posting lists.
+// - FlashIndexStorage, IndexBlock, PostingListUsed use the serializer created
+//   by MainIndex, but hold the reference/pointer in the interface format
+//   (PostingListSerializer) and only use common interface methods to manage
+//   posting list.
+class PostingListSerializer {
+ public:
+  // Special data is either a DataType instance or data_start_offset.
+  template <typename DataType>
+  union SpecialData {
+    explicit SpecialData(const DataType& data) : data_(data) {}
+
+    explicit SpecialData(uint32_t data_start_offset)
+        : data_start_offset_(data_start_offset) {}
+
+    const DataType& data() const { return data_; }
+
+    uint32_t data_start_offset() const { return data_start_offset_; }
+    void set_data_start_offset(uint32_t data_start_offset) {
+      data_start_offset_ = data_start_offset;
+    }
+
+   private:
+    DataType data_;
+    uint32_t data_start_offset_;
+  } __attribute__((packed));
+
+  static constexpr uint32_t kNumSpecialData = 2;
+
+  virtual ~PostingListSerializer() = default;
+
+  // Returns byte size of the data type.
+  virtual uint32_t GetDataTypeBytes() const = 0;
+
+  // Returns minimum posting list size allowed.
+  //
+  // Note that min posting list size should also be large enough to store a
+  // single PostingListIndex (for posting list management usage), so we have to
+  // add static_assert in each serializer implementation.
+  // E.g.
+  // static constexpr uint32_t kMinPostingListSize = kSpecialHitsSize;
+  // static_assert(sizeof(PostingListIndex) <= kMinPostingListSize, "");
+  virtual uint32_t GetMinPostingListSize() const = 0;
+
+  // Returns minimum size of posting list that can fit these used bytes
+  // (see MoveFrom).
+  virtual uint32_t GetMinPostingListSizeToFit(
+      const PostingListUsed* posting_list_used) const = 0;
+
+  // Returns bytes used by actual data.
+  virtual uint32_t GetBytesUsed(
+      const PostingListUsed* posting_list_used) const = 0;
+
+  // Clears the posting list. It is usually used for initializing a newly
+  // allocated (or reclaimed from free posting list chain) posting list.
+  virtual void Clear(PostingListUsed* posting_list_used) const = 0;
+
+  // Moves contents from posting list 'src' to 'dst'. Clears 'src'.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INVALID_ARGUMENT if 'src' is not valid or 'src' is too large to fit in
+  //       'dst'.
+  //   - FAILED_PRECONDITION if 'dst' posting list is in a corrupted state.
+  virtual libtextclassifier3::Status MoveFrom(PostingListUsed* dst,
+                                              PostingListUsed* src) const = 0;
+};
+
+// A posting list with in-memory data. The caller should sync it to disk via
+// FlashIndexStorage. Layout depends on the serializer.
+class PostingListUsed {
+ public:
+  // Creates a PostingListUsed that takes over the ownership of
+  // posting_list_buffer with size_in_bytes bytes. 'Preexisting' means that
+  // the data in posting_list_buffer was previously modified by another instance
+  // of PostingListUsed, and the caller should read the data from disk to
+  // posting_list_buffer.
+  //
+  // RETURNS:
+  //   - A valid PostingListUsed if successful
+  //   - INVALID_ARGUMENT if posting_list_utils::IsValidPostingListSize check
+  //     fails
+  //   - FAILED_PRECONDITION if serializer or posting_list_buffer is null
+  static libtextclassifier3::StatusOr<PostingListUsed>
+  CreateFromPreexistingPostingListUsedRegion(
+      PostingListSerializer* serializer,
+      std::unique_ptr<uint8_t[]> posting_list_buffer, uint32_t size_in_bytes);
+
+  // Creates a PostingListUsed that owns a buffer of size_in_bytes bytes and
+  // initializes the content of the buffer so that the returned PostingListUsed
+  // is empty.
+  //
+  // RETURNS:
+  //   - A valid PostingListUsed if successful
+  //   - INVALID_ARGUMENT if posting_list_utils::IsValidPostingListSize check
+  //     fails
+  //   - FAILED_PRECONDITION if serializer is null
+  static libtextclassifier3::StatusOr<PostingListUsed>
+  CreateFromUnitializedRegion(PostingListSerializer* serializer,
+                              uint32_t size_in_bytes);
+
+  uint8_t* posting_list_buffer() {
+    is_dirty_ = true;
+    return posting_list_buffer_.get();
+  }
+
+  const uint8_t* posting_list_buffer() const {
+    return posting_list_buffer_.get();
+  }
+
+  uint32_t size_in_bytes() const { return size_in_bytes_; }
+
+  bool is_dirty() const { return is_dirty_; }
+
+ private:
+  explicit PostingListUsed(std::unique_ptr<uint8_t[]> posting_list_buffer,
+                           uint32_t size_in_bytes)
+      : posting_list_buffer_(std::move(posting_list_buffer)),
+        size_in_bytes_(size_in_bytes),
+        is_dirty_(false) {}
+
+  // A byte array of size size_in_bytes_ containing encoded data for this
+  // posting list.
+  std::unique_ptr<uint8_t[]> posting_list_buffer_;
+  uint32_t size_in_bytes_;
+
+  bool is_dirty_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_POSTING_LIST_POSTING_LIST_USED_H_
diff --git a/icing/index/posting-list-utils.cc b/icing/file/posting_list/posting-list-utils.cc
index b0e2929..2adbc26 100644
--- a/icing/index/posting-list-utils.cc
+++ b/icing/file/posting_list/posting-list-utils.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "icing/index/posting-list-utils.h"
+#include "icing/file/posting_list/posting-list-utils.h"
 
 #include "icing/legacy/index/icing-bit-util.h"
 #include "icing/util/logging.h"
@@ -22,27 +22,28 @@ namespace lib {
 
 namespace posting_list_utils {
 
-bool IsValidPostingListSize(uint32_t size_in_bytes) {
-  // size must be sizeof(Hit) aligned. Otherwise, we can have serious
+bool IsValidPostingListSize(uint32_t size_in_bytes, uint32_t data_type_bytes,
+                            uint32_t min_posting_list_size) {
+  // size must be data_type_bytes aligned. Otherwise, we can have serious
   // wasted space in the worst case.
-  if (size_in_bytes % sizeof(Hit) != 0) {
-    ICING_LOG(ERROR) << "Size " << size_in_bytes << " hit " << sizeof(Hit);
+  if (size_in_bytes % data_type_bytes != 0) {
+    ICING_LOG(ERROR) << "Size " << size_in_bytes << " data " << data_type_bytes;
     return false;
   }
 
   // Must be able to store the min information.
-  if (size_in_bytes < min_posting_list_size()) {
+  if (size_in_bytes < min_posting_list_size) {
     ICING_LOG(ERROR) << "Size " << size_in_bytes << " is less than min size "
-                     << min_posting_list_size();
+                     << min_posting_list_size;
     return false;
   }
 
-  // We re-use the first two hits as pointers into the posting list
-  // so the posting list size must fit in sizeof(Hit).
-  if (BitsToStore(size_in_bytes) > sizeof(Hit::Value) * 8) {
+  // We re-use the first two data as pointers into the posting list
+  // so the posting list size must fit in data_type_bytes.
+  if (BitsToStore(size_in_bytes) > data_type_bytes * 8) {
     ICING_LOG(ERROR)
         << "Posting list size must be small enough to store the offset in "
-        << sizeof(Hit::Value) * 8 << " bytes.";
+        << data_type_bytes << " bytes.";
     return false;
   }
 
diff --git a/icing/index/posting-list-utils.h b/icing/file/posting_list/posting-list-utils.h
index fc90d64..6a1e28c 100644
--- a/icing/index/posting-list-utils.h
+++ b/icing/file/posting_list/posting-list-utils.h
@@ -12,33 +12,26 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef ICING_INDEX_POSTING_LIST_UTILS_H_
-#define ICING_INDEX_POSTING_LIST_UTILS_H_
+#ifndef ICING_FILE_POSTING_LIST_POSTING_LIST_UTILS_H_
+#define ICING_FILE_POSTING_LIST_POSTING_LIST_UTILS_H_
 
 #include <cstdint>
 
-#include "icing/index/hit/hit.h"
-
 namespace icing {
 namespace lib {
 
 namespace posting_list_utils {
 
-// Represents the byte length of the two special hits described
-// in the private section of posting-list-used.h.
-static constexpr uint32_t kSpecialHitsSize = sizeof(Hit) * 2;
-
-constexpr uint32_t min_posting_list_size() { return kSpecialHitsSize; }
-
 // For a posting list size to be valid, it must:
-//   1) be sizeof(Hit) aligned
+//   1) be data_type_bytes aligned
 //   2) be equal to or larger than min_posting_list_size
-//   3) be small enough to be encoded within a single Hit (5 bytes)
-bool IsValidPostingListSize(uint32_t size_in_bytes);
+//   3) be small enough to be encoded within a single data (data_type_bytes)
+bool IsValidPostingListSize(uint32_t size_in_bytes, uint32_t data_type_bytes,
+                            uint32_t min_posting_list_size);
 
 }  // namespace posting_list_utils
 
 }  // namespace lib
 }  // namespace icing
 
-#endif  // ICING_INDEX_POSTING_LIST_UTILS_H_
+#endif  // ICING_FILE_POSTING_LIST_POSTING_LIST_UTILS_H_
diff --git a/icing/file/version-util.cc b/icing/file/version-util.cc
new file mode 100644
index 0000000..dd233e0
--- /dev/null
+++ b/icing/file/version-util.cc
@@ -0,0 +1,150 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/version-util.h"
+
+#include <cstdint>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/index.h"
+
+namespace icing {
+namespace lib {
+
+namespace version_util {
+
+libtextclassifier3::StatusOr<VersionInfo> ReadVersion(
+    const Filesystem& filesystem, const std::string& version_file_path,
+    const std::string& index_base_dir) {
+  // 1. Read the version info.
+  VersionInfo existing_version_info(-1, -1);
+  if (filesystem.FileExists(version_file_path.c_str()) &&
+      !filesystem.PRead(version_file_path.c_str(), &existing_version_info,
+                        sizeof(VersionInfo), /*offset=*/0)) {
+    return absl_ports::InternalError("Fail to read version");
+  }
+
+  // 2. Check the Index magic to see if we're actually on version 0.
+  libtextclassifier3::StatusOr<int> existing_flash_index_magic_or =
+      Index::ReadFlashIndexMagic(&filesystem, index_base_dir);
+  if (!existing_flash_index_magic_or.ok()) {
+    if (absl_ports::IsNotFound(existing_flash_index_magic_or.status())) {
+      // Flash index magic doesn't exist. In this case, we're unable to
+      // determine the version change state correctly (regardless of the
+      // existence of the version file), so invalidate VersionInfo by setting
+      // version to -1, but still keep the max_version value read in step 1.
+      existing_version_info.version = -1;
+      return existing_version_info;
+    }
+    // Real error.
+    return std::move(existing_flash_index_magic_or).status();
+  }
+  if (existing_flash_index_magic_or.ValueOrDie() ==
+      kVersionZeroFlashIndexMagic) {
+    existing_version_info.version = 0;
+    if (existing_version_info.max_version == -1) {
+      existing_version_info.max_version = 0;
+    }
+  }
+
+  return existing_version_info;
+}
+
+libtextclassifier3::Status WriteVersion(const Filesystem& filesystem,
+                                        const std::string& version_file_path,
+                                        const VersionInfo& version_info) {
+  ScopedFd scoped_fd(filesystem.OpenForWrite(version_file_path.c_str()));
+  if (!scoped_fd.is_valid() ||
+      !filesystem.PWrite(scoped_fd.get(), /*offset=*/0, &version_info,
+                         sizeof(VersionInfo)) ||
+      !filesystem.DataSync(scoped_fd.get())) {
+    return absl_ports::InternalError("Fail to write version");
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
+                                  int32_t curr_version) {
+  if (!existing_version_info.IsValid()) {
+    return StateChange::kUndetermined;
+  }
+
+  if (existing_version_info.version == 0) {
+    return (existing_version_info.max_version == existing_version_info.version)
+               ? StateChange::kVersionZeroUpgrade
+               : StateChange::kVersionZeroRollForward;
+  }
+
+  if (existing_version_info.version == curr_version) {
+    return StateChange::kCompatible;
+  } else if (existing_version_info.version > curr_version) {
+    return StateChange::kRollBack;
+  } else {  // existing_version_info.version < curr_version
+    return (existing_version_info.max_version == existing_version_info.version)
+               ? StateChange::kUpgrade
+               : StateChange::kRollForward;
+  }
+}
+
+bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
+                               int32_t curr_version) {
+  StateChange state_change =
+      GetVersionStateChange(existing_version_info, curr_version);
+  switch (state_change) {
+    case StateChange::kCompatible:
+      return false;
+    case StateChange::kUndetermined:
+      [[fallthrough]];
+    case StateChange::kRollBack:
+      [[fallthrough]];
+    case StateChange::kRollForward:
+      [[fallthrough]];
+    case StateChange::kVersionZeroRollForward:
+      [[fallthrough]];
+    case StateChange::kVersionZeroUpgrade:
+      return true;
+    case StateChange::kUpgrade:
+      break;
+  }
+
+  bool should_rebuild = false;
+  int32_t existing_version = existing_version_info.version;
+  while (existing_version < curr_version) {
+    switch (existing_version) {
+      case 1: {
+        // version 1 -> version 2 upgrade, no need to rebuild
+        break;
+      }
+      case 2: {
+        // version 2 -> version 3 upgrade, no need to rebuild
+        break;
+      }
+      default:
+        // This should not happen. Rebuild anyway if unsure.
+        should_rebuild |= true;
+    }
+    ++existing_version;
+  }
+  return should_rebuild;
+}
+
+}  // namespace version_util
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/file/version-util.h b/icing/file/version-util.h
new file mode 100644
index 0000000..b2d51df
--- /dev/null
+++ b/icing/file/version-util.h
@@ -0,0 +1,115 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_VERSION_UTIL_H_
+#define ICING_FILE_VERSION_UTIL_H_
+
+#include <cstdint>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+
+namespace icing {
+namespace lib {
+
+namespace version_util {
+
+// - Version 0: Android T base. Can be identified only by flash index magic.
+// - Version 1: Android U base and M-2023-08.
+// - Version 2: M-2023-09, M-2023-11, M-2024-01. Schema is compatible with v1.
+//   (There were no M-2023-10, M-2023-12).
+// - Version 3: M-2024-02. Schema is compatible with v1 and v2.
+//
+// LINT.IfChange(kVersion)
+inline static constexpr int32_t kVersion = 3;
+// LINT.ThenChange(//depot/google3/icing/schema/schema-store.cc:min_overlay_version_compatibility)
+inline static constexpr int32_t kVersionOne = 1;
+inline static constexpr int32_t kVersionTwo = 2;
+inline static constexpr int32_t kVersionThree = 3;
+
+inline static constexpr int kVersionZeroFlashIndexMagic = 0x6dfba6ae;
+
+struct VersionInfo {
+  int32_t version;
+  int32_t max_version;
+
+  explicit VersionInfo(int32_t version_in, int32_t max_version_in)
+      : version(version_in), max_version(max_version_in) {}
+
+  bool IsValid() const { return version >= 0 && max_version >= 0; }
+
+  bool operator==(const VersionInfo& other) const {
+    return version == other.version && max_version == other.max_version;
+  }
+} __attribute__((packed));
+static_assert(sizeof(VersionInfo) == 8, "");
+
+enum class StateChange {
+  kUndetermined,
+  kCompatible,
+  kRollForward,
+  kRollBack,
+  kUpgrade,
+  kVersionZeroUpgrade,
+  kVersionZeroRollForward,
+};
+
+// Helper method to read version info (using version file and flash index header
+// magic) from the existing data. If the state is invalid (e.g. flash index
+// header file is missing), then return an invalid VersionInfo.
+//
+// RETURNS:
+//   - Existing data's VersionInfo on success
+//   - INTERNAL_ERROR on I/O errors
+libtextclassifier3::StatusOr<VersionInfo> ReadVersion(
+    const Filesystem& filesystem, const std::string& version_file_path,
+    const std::string& index_base_dir);
+
+// Helper method to write version file.
+//
+// RETURNS:
+//   - OK on success
+//   - INTERNAL_ERROR on I/O errors
+libtextclassifier3::Status WriteVersion(const Filesystem& filesystem,
+                                        const std::string& version_file_path,
+                                        const VersionInfo& version_info);
+
+// Helper method to determine the change state between the existing data version
+// and the current code version.
+//
+// REQUIRES: curr_version > 0. We implement version checking in version 1, so
+//   the callers (except unit tests) will always use a version # greater than 0.
+//
+// RETURNS: StateChange
+StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
+                                  int32_t curr_version = kVersion);
+
+// Helper method to determine whether Icing should rebuild all derived files.
+// Sometimes it is not required to rebuild derived files when
+// roll-forward/upgrading. This function "encodes" upgrade paths and checks if
+// the roll-forward/upgrading requires derived files to be rebuilt or not.
+//
+// REQUIRES: curr_version > 0. We implement version checking in version 1, so
+//   the callers (except unit tests) will always use a version # greater than 0.
+bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
+                               int32_t curr_version = kVersion);
+
+}  // namespace version_util
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_VERSION_UTIL_H_
diff --git a/icing/file/version-util_test.cc b/icing/file/version-util_test.cc
new file mode 100644
index 0000000..9dedb1d
--- /dev/null
+++ b/icing/file/version-util_test.cc
@@ -0,0 +1,484 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/version-util.h"
+
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage-header.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+namespace version_util {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+
+struct VersionUtilReadVersionTestParam {
+  std::optional<VersionInfo> existing_version_info;
+  std::optional<int> existing_flash_index_magic;
+  VersionInfo expected_version_info;
+
+  explicit VersionUtilReadVersionTestParam(
+      std::optional<VersionInfo> existing_version_info_in,
+      std::optional<int> existing_flash_index_magic_in,
+      VersionInfo expected_version_info_in)
+      : existing_version_info(std::move(existing_version_info_in)),
+        existing_flash_index_magic(std::move(existing_flash_index_magic_in)),
+        expected_version_info(std::move(expected_version_info_in)) {}
+};
+
+class VersionUtilReadVersionTest
+    : public ::testing::TestWithParam<VersionUtilReadVersionTestParam> {
+ protected:
+  void SetUp() override {
+    base_dir_ = GetTestTempDir() + "/version_util_test";
+    version_file_path_ = base_dir_ + "/version";
+    index_path_ = base_dir_ + "/index";
+
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()));
+  }
+
+  void TearDown() override {
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(base_dir_.c_str()));
+  }
+
+  const Filesystem& filesystem() const { return filesystem_; }
+
+  Filesystem filesystem_;
+  std::string base_dir_;
+  std::string version_file_path_;
+  std::string index_path_;
+};
+
+TEST_P(VersionUtilReadVersionTest, ReadVersion) {
+  const VersionUtilReadVersionTestParam& param = GetParam();
+
+  // Prepare version file and flash index file.
+  if (param.existing_version_info.has_value()) {
+    ICING_ASSERT_OK(WriteVersion(filesystem_, version_file_path_,
+                                 param.existing_version_info.value()));
+  }
+
+  if (param.existing_flash_index_magic.has_value()) {
+    HeaderBlock header_block(&filesystem_, /*block_size=*/4096);
+    header_block.header()->magic = param.existing_flash_index_magic.value();
+
+    std::string main_index_dir = index_path_ + "/idx/main";
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(main_index_dir.c_str()));
+    std::string flash_index_file_path = main_index_dir + "/main_index";
+
+    ScopedFd sfd(filesystem_.OpenForWrite(flash_index_file_path.c_str()));
+    ASSERT_TRUE(sfd.is_valid());
+    ASSERT_TRUE(header_block.Write(sfd.get()));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      VersionInfo version_info,
+      ReadVersion(filesystem_, version_file_path_, index_path_));
+  EXPECT_THAT(version_info, Eq(param.expected_version_info));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    VersionUtilReadVersionTest, VersionUtilReadVersionTest,
+    testing::Values(
+        // - Version file doesn't exist
+        // - Flash index doesn't exist
+        // - Result: version -1, max_version -1 (invalid)
+        VersionUtilReadVersionTestParam(
+            /*existing_version_info_in=*/std::nullopt,
+            /*existing_flash_index_magic_in=*/std::nullopt,
+            /*expected_version_info_in=*/
+            VersionInfo(/*version_in=*/-1, /*max_version=*/-1)),
+
+        // - Version file doesn't exist
+        // - Flash index exists with version 0 magic
+        // - Result: version 0, max_version 0
+        VersionUtilReadVersionTestParam(
+            /*existing_version_info_in=*/std::nullopt,
+            /*existing_flash_index_magic_in=*/
+            std::make_optional<int>(kVersionZeroFlashIndexMagic),
+            /*expected_version_info_in=*/
+            VersionInfo(/*version_in=*/0, /*max_version=*/0)),
+
+        // - Version file doesn't exist
+        // - Flash index exists with non version 0 magic
+        // - Result: version -1, max_version -1 (invalid)
+        VersionUtilReadVersionTestParam(
+            /*existing_version_info_in=*/std::nullopt,
+            /*existing_flash_index_magic_in=*/
+            std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
+            /*expected_version_info_in=*/
+            VersionInfo(/*version_in=*/-1, /*max_version=*/-1)),
+
+        // - Version file exists
+        // - Flash index doesn't exist
+        // - Result: version -1, max_version 1 (invalid)
+        VersionUtilReadVersionTestParam(
+            /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+                /*version_in=*/1, /*max_version=*/1),
+            /*existing_flash_index_magic_in=*/std::nullopt,
+            /*expected_version_info_in=*/
+            VersionInfo(/*version_in=*/-1, /*max_version=*/1)),
+
+        // - Version file exists: version 1, max_version 1
+        // - Flash index exists with version 0 magic
+        // - Result: version 0, max_version 1
+        VersionUtilReadVersionTestParam(
+            /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+                /*version_in=*/1, /*max_version=*/1),
+            /*existing_flash_index_magic_in=*/
+            std::make_optional<int>(kVersionZeroFlashIndexMagic),
+            /*expected_version_info_in=*/
+            VersionInfo(/*version_in=*/0, /*max_version=*/1)),
+
+        // - Version file exists: version 2, max_version 3
+        // - Flash index exists with version 0 magic
+        // - Result: version 0, max_version 3
+        VersionUtilReadVersionTestParam(
+            /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+                /*version_in=*/2, /*max_version=*/3),
+            /*existing_flash_index_magic_in=*/
+            std::make_optional<int>(kVersionZeroFlashIndexMagic),
+            /*expected_version_info_in=*/
+            VersionInfo(/*version_in=*/0, /*max_version=*/3)),
+
+        // - Version file exists: version 1, max_version 1
+        // - Flash index exists with non version 0 magic
+        // - Result: version 1, max_version 1
+        VersionUtilReadVersionTestParam(
+            /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+                /*version_in=*/1, /*max_version=*/1),
+            /*existing_flash_index_magic_in=*/
+            std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
+            /*expected_version_info_in=*/
+            VersionInfo(/*version_in=*/1, /*max_version=*/1)),
+
+        // - Version file exists: version 2, max_version 3
+        // - Flash index exists with non version 0 magic
+        // - Result: version 2, max_version 3
+        VersionUtilReadVersionTestParam(
+            /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+                /*version_in=*/2, /*max_version=*/3),
+            /*existing_flash_index_magic_in=*/
+            std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
+            /*expected_version_info_in=*/
+            VersionInfo(/*version_in=*/2, /*max_version=*/3))));
+
+struct VersionUtilStateChangeTestParam {
+  VersionInfo existing_version_info;
+  int32_t curr_version;
+  StateChange expected_state_change;
+
+  explicit VersionUtilStateChangeTestParam(VersionInfo existing_version_info_in,
+                                           int32_t curr_version_in,
+                                           StateChange expected_state_change_in)
+      : existing_version_info(std::move(existing_version_info_in)),
+        curr_version(curr_version_in),
+        expected_state_change(expected_state_change_in) {}
+};
+
+class VersionUtilStateChangeTest
+    : public ::testing::TestWithParam<VersionUtilStateChangeTestParam> {};
+
+TEST_P(VersionUtilStateChangeTest, GetVersionStateChange) {
+  const VersionUtilStateChangeTestParam& param = GetParam();
+
+  EXPECT_THAT(
+      GetVersionStateChange(param.existing_version_info, param.curr_version),
+      Eq(param.expected_state_change));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    VersionUtilStateChangeTest, VersionUtilStateChangeTest,
+    testing::Values(
+        // - version -1, max_version -1 (invalid)
+        // - Current version = 1
+        // - Result: undetermined
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(-1, -1),
+            /*curr_version_in=*/1,
+            /*expected_state_change_in=*/StateChange::kUndetermined),
+
+        // - version -1, max_version 1 (invalid)
+        // - Current version = 1
+        // - Result: undetermined
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(-1, 1),
+            /*curr_version_in=*/1,
+            /*expected_state_change_in=*/StateChange::kUndetermined),
+
+        // - version -1, max_version -1 (invalid)
+        // - Current version = 2
+        // - Result: undetermined
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(-1, -1),
+            /*curr_version_in=*/2,
+            /*expected_state_change_in=*/StateChange::kUndetermined),
+
+        // - version -1, max_version 1 (invalid)
+        // - Current version = 2
+        // - Result: undetermined
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(-1, 1),
+            /*curr_version_in=*/2,
+            /*expected_state_change_in=*/StateChange::kUndetermined),
+
+        // - version 0, max_version 0
+        // - Current version = 1
+        // - Result: version 0 upgrade
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(0, 0),
+            /*curr_version_in=*/1,
+            /*expected_state_change_in=*/StateChange::kVersionZeroUpgrade),
+
+        // - version 0, max_version 1
+        // - Current version = 1
+        // - Result: version 0 roll forward
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(0, 1),
+            /*curr_version_in=*/1,
+            /*expected_state_change_in=*/StateChange::kVersionZeroRollForward),
+
+        // - version 0, max_version 2
+        // - Current version = 1
+        // - Result: version 0 roll forward
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(0, 2),
+            /*curr_version_in=*/1,
+            /*expected_state_change_in=*/StateChange::kVersionZeroRollForward),
+
+        // - version 0, max_version 0
+        // - Current version = 2
+        // - Result: version 0 upgrade
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(0, 0),
+            /*curr_version_in=*/2,
+            /*expected_state_change_in=*/StateChange::kVersionZeroUpgrade),
+
+        // - version 0, max_version 1
+        // - Current version = 2
+        // - Result: version 0 upgrade
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(0, 1),
+            /*curr_version_in=*/2,
+            /*expected_state_change_in=*/StateChange::kVersionZeroRollForward),
+
+        // - version 0, max_version 2
+        // - Current version = 2
+        // - Result: version 0 roll forward
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(0, 2),
+            /*curr_version_in=*/2,
+            /*expected_state_change_in=*/StateChange::kVersionZeroRollForward),
+
+        // - version 1, max_version 1
+        // - Current version = 1
+        // - Result: compatible
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(1, 1),
+            /*curr_version_in=*/1,
+            /*expected_state_change_in=*/StateChange::kCompatible),
+
+        // - version 1, max_version 2
+        // - Current version = 1
+        // - Result: compatible
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(1, 2),
+            /*curr_version_in=*/1,
+            /*expected_state_change_in=*/StateChange::kCompatible),
+
+        // - version 2, max_version 2
+        // - Current version = 1
+        // - Result: roll back
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(2, 2),
+            /*curr_version_in=*/1,
+            /*expected_state_change_in=*/StateChange::kRollBack),
+
+        // - version 2, max_version 3
+        // - Current version = 1
+        // - Result: roll back
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(2, 3),
+            /*curr_version_in=*/1,
+            /*expected_state_change_in=*/StateChange::kRollBack),
+
+        // - version 1, max_version 1
+        // - Current version = 2
+        // - Result: upgrade
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(1, 1),
+            /*curr_version_in=*/2,
+            /*expected_state_change_in=*/StateChange::kUpgrade),
+
+        // - version 1, max_version 2
+        // - Current version = 2
+        // - Result: roll forward
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(1, 2),
+            /*curr_version_in=*/2,
+            /*expected_state_change_in=*/StateChange::kRollForward),
+
+        // - version 1, max_version 2
+        // - Current version = 3
+        // - Result: roll forward
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(1, 2),
+            /*curr_version_in=*/3,
+            /*expected_state_change_in=*/StateChange::kRollForward),
+
+        // - version 1, max_version 3
+        // - Current version = 2
+        // - Result: roll forward
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(1, 3),
+            /*curr_version_in=*/2,
+            /*expected_state_change_in=*/StateChange::kRollForward),
+
+        // - version 2, max_version 2
+        // - Current version = 2
+        // - Result: compatible
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(2, 2),
+            /*curr_version_in=*/2,
+            /*expected_state_change_in=*/StateChange::kCompatible),
+
+        // - version 2, max_version 3
+        // - Current version = 2
+        // - Result: compatible
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(2, 3),
+            /*curr_version_in=*/2,
+            /*expected_state_change_in=*/StateChange::kCompatible),
+
+        // - version 3, max_version 3
+        // - Current version = 2
+        // - Result: rollback
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(3, 3),
+            /*curr_version_in=*/2,
+            /*expected_state_change_in=*/StateChange::kRollBack),
+
+        // - version 3, max_version 4
+        // - Current version = 2
+        // - Result: rollback
+        VersionUtilStateChangeTestParam(
+            /*existing_version_info_in=*/VersionInfo(3, 4),
+            /*curr_version_in=*/2,
+            /*expected_state_change_in=*/StateChange::kRollBack)));
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesUndeterminedVersion) {
+  EXPECT_THAT(
+      ShouldRebuildDerivedFiles(VersionInfo(-1, -1), /*curr_version=*/1),
+      IsTrue());
+  EXPECT_THAT(
+      ShouldRebuildDerivedFiles(VersionInfo(-1, -1), /*curr_version=*/2),
+      IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesVersionZeroUpgrade) {
+  // 0 -> 1
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 0), /*curr_version=*/1),
+              IsTrue());
+
+  // 0 -> 2
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 0), /*curr_version=*/2),
+              IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesVersionZeroRollForward) {
+  // (1 -> 0), 0 -> 1
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 1), /*curr_version=*/1),
+              IsTrue());
+
+  // (1 -> 0), 0 -> 2
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 1), /*curr_version=*/2),
+              IsTrue());
+
+  // (2 -> 0), 0 -> 1
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 2), /*curr_version=*/1),
+              IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesRollBack) {
+  // 2 -> 1
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(2, 2), /*curr_version=*/1),
+              IsTrue());
+
+  // 3 -> 1
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(3, 3), /*curr_version=*/1),
+              IsTrue());
+
+  // (3 -> 2), 2 -> 1
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(2, 3), /*curr_version=*/1),
+              IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesRollForward) {
+  // (2 -> 1), 1 -> 2
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(1, 2), /*curr_version=*/2),
+              IsTrue());
+
+  // (2 -> 1), 1 -> 3
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(1, 2), /*curr_version=*/3),
+              IsTrue());
+
+  // (3 -> 1), 1 -> 2
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(1, 3), /*curr_version=*/2),
+              IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesCompatible) {
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(2, 2), /*curr_version=*/2),
+              IsFalse());
+
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(2, 3), /*curr_version=*/2),
+              IsFalse());
+}
+
+TEST(VersionUtilTest, Upgrade) {
+  // Unlike other state changes, upgrade depends on the actual "encoded path".
+
+  // kVersionOne -> kVersionTwo
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionOne, kVersionOne),
+                                        /*curr_version=*/kVersionTwo),
+              IsFalse());
+
+  // kVersionTwo -> kVersionThree
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionTwo, kVersionTwo),
+                                        /*curr_version=*/kVersionThree),
+              IsFalse());
+
+  // kVersionOne -> kVersionThree.
+  EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionOne, kVersionOne),
+                                        /*curr_version=*/kVersionThree),
+              IsFalse());
+}
+
+}  // namespace
+
+}  // namespace version_util
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni.cc b/icing/icing-search-engine-test-jni-layer.cc
index 8392363..6acc99b 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni.cc
+++ b/icing/icing-search-engine-test-jni-layer.cc
@@ -21,12 +21,11 @@
 JNIEnv* g_jenv = nullptr;
 
 extern "C" JNIEXPORT jboolean JNICALL
-Java_icing_tokenization_reverse_1jni_ReverseJniLanguageSegmenterTest_testsMain(
-    JNIEnv* env, jclass ignored) {
+Java_icing_jni_IcingSearchEngineJniTest_testsMain(JNIEnv* env, jclass ignored) {
   g_jenv = env;
 
   std::vector<char*> my_argv;
-  char arg[] = "reverse-jni-language-segmenter-test-lib";
+  char arg[] = "jni-test-lib";
   my_argv.push_back(arg);
   int argc = 1;
   char** argv = &(my_argv[0]);
diff --git a/icing/icing-search-engine-with-icu-file_test.cc b/icing/icing-search-engine-with-icu-file_test.cc
index 32ac9e6..39f9df0 100644
--- a/icing/icing-search-engine-with-icu-file_test.cc
+++ b/icing/icing-search-engine-with-icu-file_test.cc
@@ -27,11 +27,14 @@
 #include "icing/proto/search.pb.h"
 #include "icing/proto/status.pb.h"
 #include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
 #include "icing/testing/tmp-directory.h"
 
 namespace icing {
 namespace lib {
 namespace {
+
 using ::icing::lib::portable_equals_proto::EqualsProto;
 using ::testing::Eq;
 
@@ -54,22 +57,6 @@ DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
       .Build();
 }
 
-SchemaProto CreateMessageSchema() {
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("Message");
-
-  auto body = type->add_properties();
-  body->set_property_name("body");
-  body->set_data_type(PropertyConfigProto::DataType::STRING);
-  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-
-  return schema;
-}
-
 ScoringSpecProto GetDefaultScoringSpec() {
   ScoringSpecProto scoring_spec;
   scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
@@ -79,15 +66,31 @@ ScoringSpecProto GetDefaultScoringSpec() {
 TEST(IcingSearchEngineWithIcuFileTest, ShouldInitialize) {
   IcingSearchEngine icing(GetDefaultIcingOptions());
   EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
 }
 
 TEST(IcingSearchEngineWithIcuFileTest, ShouldIndexAndSearch) {
   IcingSearchEngine icing(GetDefaultIcingOptions());
   ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
 
   DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
   ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
@@ -113,7 +116,8 @@ TEST(IcingSearchEngineWithIcuFileTest, ShouldIndexAndSearch) {
   // The token is a random number so we don't verify it.
   expected_search_result_proto.set_next_page_token(
       search_result_proto.next_page_token());
-  EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 }  // namespace
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 75ccc41..72be4e9 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -18,6 +18,8 @@
 #include <memory>
 #include <string>
 #include <string_view>
+#include <unordered_map>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -27,27 +29,57 @@
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/absl_ports/mutex.h"
 #include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-file.h"
+#include "icing/file/file-backed-proto.h"
 #include "icing/file/filesystem.h"
+#include "icing/file/version-util.h"
+#include "icing/index/data-indexing-handler.h"
 #include "icing/index/hit/doc-hit-info.h"
 #include "icing/index/index-processor.h"
 #include "icing/index/index.h"
+#include "icing/index/integer-section-indexing-handler.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/integer-index.h"
+#include "icing/index/term-indexing-handler.h"
+#include "icing/join/join-processor.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
+#include "icing/join/qualified-id-join-index-impl-v2.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
 #include "icing/legacy/index/icing-filesystem.h"
+#include "icing/portable/endian.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/initialize.pb.h"
+#include "icing/proto/internal/optimize.pb.h"
+#include "icing/proto/logging.pb.h"
 #include "icing/proto/optimize.pb.h"
 #include "icing/proto/persist.pb.h"
 #include "icing/proto/reset.pb.h"
 #include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
 #include "icing/proto/search.pb.h"
 #include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/query/advanced_query_parser/lexer.h"
+#include "icing/query/query-features.h"
 #include "icing/query/query-processor.h"
-#include "icing/result/result-retriever.h"
+#include "icing/query/query-results.h"
+#include "icing/query/suggestion-processor.h"
+#include "icing/result/page-result.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/projector.h"
+#include "icing/result/result-adjustment-info.h"
+#include "icing/result/result-retriever-v2.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/schema-util.h"
 #include "icing/schema/section.h"
-#include "icing/scoring/ranker.h"
+#include "icing/scoring/advanced_scoring/score-expression.h"
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
 #include "icing/scoring/scored-document-hit.h"
+#include "icing/scoring/scored-document-hits-ranker.h"
 #include "icing/scoring/scoring-processor.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
@@ -57,38 +89,97 @@
 #include "icing/transform/normalizer.h"
 #include "icing/util/clock.h"
 #include "icing/util/crc32.h"
+#include "icing/util/data-loss.h"
 #include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
 
 namespace icing {
 namespace lib {
 
 namespace {
 
+constexpr std::string_view kVersionFilename = "version";
 constexpr std::string_view kDocumentSubfolderName = "document_dir";
 constexpr std::string_view kIndexSubfolderName = "index_dir";
+constexpr std::string_view kIntegerIndexSubfolderName = "integer_index_dir";
+constexpr std::string_view kQualifiedIdJoinIndexSubfolderName =
+    "qualified_id_join_index_dir";
 constexpr std::string_view kSchemaSubfolderName = "schema_dir";
-constexpr std::string_view kIcingSearchEngineHeaderFilename =
-    "icing_search_engine_header";
-
-libtextclassifier3::Status ValidateOptions(
-    const IcingSearchEngineOptions& options) {
-  // These options are only used in IndexProcessor, which won't be created
-  // until the first Put call. So they must be checked here, so that any
-  // errors can be surfaced in Initialize.
-  if (options.max_tokens_per_doc() <= 0) {
-    return absl_ports::InvalidArgumentError(
-        "Options::max_tokens_per_doc must be greater than zero.");
+constexpr std::string_view kSetSchemaMarkerFilename = "set_schema_marker";
+constexpr std::string_view kInitMarkerFilename = "init_marker";
+constexpr std::string_view kOptimizeStatusFilename = "optimize_status";
+
+// The maximum number of unsuccessful initialization attempts from the current
+// state that we will tolerate before deleting all data and starting from a
+// fresh state.
+constexpr int kMaxUnsuccessfulInitAttempts = 5;
+
+// A pair that holds namespace and type.
+struct NamespaceTypePair {
+  std::string namespace_;
+  std::string type;
+
+  bool operator==(const NamespaceTypePair& other) const {
+    return namespace_ == other.namespace_ && type == other.type;
   }
-  return libtextclassifier3::Status::OK;
-}
+};
+
+struct NamespaceTypePairHasher {
+  std::size_t operator()(const NamespaceTypePair& pair) const {
+    return std::hash<std::string>()(pair.namespace_) ^
+           std::hash<std::string>()(pair.type);
+  }
+};
 
 libtextclassifier3::Status ValidateResultSpec(
-    const ResultSpecProto& result_spec) {
+    const DocumentStore* document_store, const ResultSpecProto& result_spec) {
   if (result_spec.num_per_page() < 0) {
     return absl_ports::InvalidArgumentError(
         "ResultSpecProto.num_per_page cannot be negative.");
   }
+  if (result_spec.num_total_bytes_per_page_threshold() <= 0) {
+    return absl_ports::InvalidArgumentError(
+        "ResultSpecProto.num_total_bytes_per_page_threshold cannot be "
+        "non-positive.");
+  }
+  if (result_spec.max_joined_children_per_parent_to_return() < 0) {
+    return absl_ports::InvalidArgumentError(
+        "ResultSpecProto.max_joined_children_per_parent_to_return cannot be "
+        "negative.");
+  }
+  if (result_spec.num_to_score() <= 0) {
+    return absl_ports::InvalidArgumentError(
+        "ResultSpecProto.num_to_score cannot be non-positive.");
+  }
+  // Validate ResultGroupings.
+  std::unordered_set<int32_t> unique_entry_ids;
+  ResultSpecProto::ResultGroupingType result_grouping_type =
+      result_spec.result_group_type();
+  for (const ResultSpecProto::ResultGrouping& result_grouping :
+       result_spec.result_groupings()) {
+    if (result_grouping.max_results() <= 0) {
+      return absl_ports::InvalidArgumentError(
+          "Cannot specify a result grouping with max results <= 0.");
+    }
+    for (const ResultSpecProto::ResultGrouping::Entry& entry :
+         result_grouping.entry_groupings()) {
+      const std::string& name_space = entry.namespace_();
+      const std::string& schema = entry.schema();
+      auto entry_id_or = document_store->GetResultGroupingEntryId(
+          result_grouping_type, name_space, schema);
+      if (!entry_id_or.ok()) {
+        continue;
+      }
+      int32_t entry_id = entry_id_or.ValueOrDie();
+      if (unique_entry_ids.find(entry_id) != unique_entry_ids.end()) {
+        return absl_ports::InvalidArgumentError(
+            "Entry Ids must be unique across result groups.");
+      }
+      unique_entry_ids.insert(entry_id);
+    }
+  }
   return libtextclassifier3::Status::OK;
 }
 
@@ -101,21 +192,71 @@ libtextclassifier3::Status ValidateSearchSpec(
                            "allowed query length: ",
                            std::to_string(configuration.max_query_length)));
   }
+  // Check that no unknown features have been enabled in the search spec.
+  std::unordered_set<Feature> query_features_set = GetQueryFeaturesSet();
+  for (const Feature feature : search_spec.enabled_features()) {
+    if (query_features_set.find(feature) == query_features_set.end()) {
+      return absl_ports::InvalidArgumentError(
+          absl_ports::StrCat("Unknown feature in "
+                             "SearchSpecProto.enabled_features: ",
+                             feature));
+    }
+  }
   return libtextclassifier3::Status::OK;
 }
 
-IndexProcessor::Options CreateIndexProcessorOptions(
-    const IcingSearchEngineOptions& options) {
-  IndexProcessor::Options index_processor_options;
-  index_processor_options.max_tokens_per_document =
-      options.max_tokens_per_doc();
-  index_processor_options.token_limit_behavior =
-      IndexProcessor::Options::TokenLimitBehavior::kSuppressError;
-  return index_processor_options;
+libtextclassifier3::Status ValidateSuggestionSpec(
+    const SuggestionSpecProto& suggestion_spec,
+    const PerformanceConfiguration& configuration) {
+  if (suggestion_spec.prefix().empty()) {
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("SuggestionSpecProto.prefix is empty!"));
+  }
+  if (suggestion_spec.scoring_spec().scoring_match_type() ==
+      TermMatchType::UNKNOWN) {
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("SuggestionSpecProto.term_match_type is unknown!"));
+  }
+  if (suggestion_spec.num_to_return() <= 0) {
+    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+        "SuggestionSpecProto.num_to_return must be positive."));
+  }
+  if (suggestion_spec.prefix().size() > configuration.max_query_length) {
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("SuggestionSpecProto.prefix is longer than the "
+                           "maximum allowed prefix length: ",
+                           std::to_string(configuration.max_query_length)));
+  }
+  return libtextclassifier3::Status::OK;
 }
 
-std::string MakeHeaderFilename(const std::string& base_dir) {
-  return absl_ports::StrCat(base_dir, "/", kIcingSearchEngineHeaderFilename);
+bool IsV2QualifiedIdJoinIndexEnabled(const IcingSearchEngineOptions& options) {
+  return options.use_new_qualified_id_join_index() &&
+         options.document_store_namespace_id_fingerprint();
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+CreateQualifiedIdJoinIndex(const Filesystem& filesystem,
+                           std::string qualified_id_join_index_dir,
+                           const IcingSearchEngineOptions& options) {
+  if (IsV2QualifiedIdJoinIndexEnabled(options)) {
+    // V2
+    return QualifiedIdJoinIndexImplV2::Create(
+        filesystem, std::move(qualified_id_join_index_dir),
+        options.pre_mapping_fbv());
+  } else {
+    // V1
+    // TODO(b/275121148): deprecate this part after rollout v2.
+    return QualifiedIdJoinIndexImplV1::Create(
+        filesystem, std::move(qualified_id_join_index_dir),
+        options.pre_mapping_fbv(), options.use_persistent_hash_map());
+  }
+}
+
+// Version file is a single file under base_dir containing version info of the
+// existing data.
+std::string MakeVersionFilePath(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kVersionFilename);
 }
 
 // Document store files are in a standalone subfolder for easier file
@@ -139,6 +280,22 @@ std::string MakeIndexDirectoryPath(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kIndexSubfolderName);
 }
 
+// Working path for integer index. Integer index is derived from
+// PersistentStorage and it will take full ownership of this working path,
+// including creation/deletion. See PersistentStorage for more details about
+// working path.
+std::string MakeIntegerIndexWorkingPath(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kIntegerIndexSubfolderName);
+}
+
+// Working path for qualified id join index. It is derived from
+// PersistentStorage and it will take full ownership of this working path,
+// including creation/deletion. See PersistentStorage for more details about
+// working path.
+std::string MakeQualifiedIdJoinIndexWorkingPath(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kQualifiedIdJoinIndexSubfolderName);
+}
+
 // SchemaStore files are in a standalone subfolder for easier file management.
 // We can delete and recreate the subfolder and not touch/affect anything
 // else.
@@ -146,32 +303,45 @@ std::string MakeSchemaDirectoryPath(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kSchemaSubfolderName);
 }
 
+std::string MakeSetSchemaMarkerFilePath(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kSetSchemaMarkerFilename);
+}
+
+std::string MakeInitMarkerFilePath(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kInitMarkerFilename);
+}
+
 void TransformStatus(const libtextclassifier3::Status& internal_status,
                      StatusProto* status_proto) {
+  StatusProto::Code code;
+  if (!internal_status.ok()) {
+    ICING_LOG(WARNING) << "Error: " << internal_status.error_code()
+                       << ", Message: " << internal_status.error_message();
+  }
   switch (internal_status.CanonicalCode()) {
     case libtextclassifier3::StatusCode::OK:
-      status_proto->set_code(StatusProto::OK);
+      code = StatusProto::OK;
       break;
     case libtextclassifier3::StatusCode::DATA_LOSS:
-      status_proto->set_code(StatusProto::WARNING_DATA_LOSS);
+      code = StatusProto::WARNING_DATA_LOSS;
       break;
     case libtextclassifier3::StatusCode::INVALID_ARGUMENT:
-      status_proto->set_code(StatusProto::INVALID_ARGUMENT);
+      code = StatusProto::INVALID_ARGUMENT;
       break;
     case libtextclassifier3::StatusCode::NOT_FOUND:
-      status_proto->set_code(StatusProto::NOT_FOUND);
+      code = StatusProto::NOT_FOUND;
       break;
     case libtextclassifier3::StatusCode::FAILED_PRECONDITION:
-      status_proto->set_code(StatusProto::FAILED_PRECONDITION);
+      code = StatusProto::FAILED_PRECONDITION;
       break;
     case libtextclassifier3::StatusCode::ABORTED:
-      status_proto->set_code(StatusProto::ABORTED);
+      code = StatusProto::ABORTED;
       break;
     case libtextclassifier3::StatusCode::INTERNAL:
       // TODO(b/147699081): Cleanup our internal use of INTERNAL since it
       // doesn't match with what it *should* indicate as described in
       // go/icing-library-apis.
-      status_proto->set_code(StatusProto::INTERNAL);
+      code = StatusProto::INTERNAL;
       break;
     case libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED:
       // TODO(b/147699081): Note that we don't detect all cases of OUT_OF_SPACE
@@ -179,43 +349,117 @@ void TransformStatus(const libtextclassifier3::Status& internal_status,
       // internally to indicate other resources are exhausted (e.g.
       // DocHitInfos) - although none of these are exposed through the API.
       // Consider separating the two cases out more clearly.
-      status_proto->set_code(StatusProto::OUT_OF_SPACE);
+      code = StatusProto::OUT_OF_SPACE;
       break;
-    default:
+    case libtextclassifier3::StatusCode::ALREADY_EXISTS:
+      code = StatusProto::ALREADY_EXISTS;
+      break;
+    case libtextclassifier3::StatusCode::CANCELLED:
+      [[fallthrough]];
+    case libtextclassifier3::StatusCode::UNKNOWN:
+      [[fallthrough]];
+    case libtextclassifier3::StatusCode::DEADLINE_EXCEEDED:
+      [[fallthrough]];
+    case libtextclassifier3::StatusCode::PERMISSION_DENIED:
+      [[fallthrough]];
+    case libtextclassifier3::StatusCode::OUT_OF_RANGE:
+      [[fallthrough]];
+    case libtextclassifier3::StatusCode::UNIMPLEMENTED:
+      [[fallthrough]];
+    case libtextclassifier3::StatusCode::UNAVAILABLE:
+      [[fallthrough]];
+    case libtextclassifier3::StatusCode::UNAUTHENTICATED:
       // Other internal status codes aren't supported externally yet. If it
       // should be supported, add another switch-case above.
-      ICING_LOG(FATAL)
-          << "Internal status code not supported in the external API";
+      ICING_LOG(ERROR) << "Internal status code "
+                       << internal_status.error_code()
+                       << " not supported in the external API";
+      code = StatusProto::UNKNOWN;
       break;
   }
-
+  status_proto->set_code(code);
   status_proto->set_message(internal_status.error_message());
 }
 
+libtextclassifier3::Status RetrieveAndAddDocumentInfo(
+    const DocumentStore* document_store, DeleteByQueryResultProto& result_proto,
+    std::unordered_map<NamespaceTypePair,
+                       DeleteByQueryResultProto::DocumentGroupInfo*,
+                       NamespaceTypePairHasher>& info_map,
+    DocumentId document_id) {
+  ICING_ASSIGN_OR_RETURN(DocumentProto document,
+                         document_store->Get(document_id));
+  NamespaceTypePair key = {document.namespace_(), document.schema()};
+  auto iter = info_map.find(key);
+  if (iter == info_map.end()) {
+    auto entry = result_proto.add_deleted_documents();
+    entry->set_namespace_(std::move(document.namespace_()));
+    entry->set_schema(std::move(document.schema()));
+    entry->add_uris(std::move(document.uri()));
+    info_map[key] = entry;
+  } else {
+    iter->second->add_uris(std::move(document.uri()));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+bool ShouldRebuildIndex(const OptimizeStatsProto& optimize_stats,
+                        float optimize_rebuild_index_threshold) {
+  int num_invalid_documents = optimize_stats.num_deleted_documents() +
+                              optimize_stats.num_expired_documents();
+  return num_invalid_documents >= optimize_stats.num_original_documents() *
+                                      optimize_rebuild_index_threshold;
+}
+
+// Useful method to get RankingStrategy if advanced scoring is enabled. When the
+// "RelevanceScore" function is used in the advanced scoring expression,
+// RankingStrategy will be treated as RELEVANCE_SCORE in order to prepare the
+// necessary information needed for calculating relevance score.
+libtextclassifier3::StatusOr<ScoringSpecProto::RankingStrategy::Code>
+GetRankingStrategyFromScoringSpec(const ScoringSpecProto& scoring_spec) {
+  if (scoring_spec.advanced_scoring_expression().empty()) {
+    return scoring_spec.rank_by();
+  }
+  // TODO(b/261474063) The Lexer will be called again when creating the
+  // AdvancedScorer instance. Consider refactoring the code to allow the Lexer
+  // to be called only once.
+  Lexer lexer(scoring_spec.advanced_scoring_expression(),
+              Lexer::Language::SCORING);
+  ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
+                         lexer.ExtractTokens());
+  for (const Lexer::LexerToken& token : lexer_tokens) {
+    if (token.type == Lexer::TokenType::FUNCTION_NAME &&
+        token.text == RelevanceScoreFunctionScoreExpression::kFunctionName) {
+      return ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE;
+    }
+  }
+  return ScoringSpecProto::RankingStrategy::NONE;
+}
+
 }  // namespace
 
 IcingSearchEngine::IcingSearchEngine(const IcingSearchEngineOptions& options,
                                      std::unique_ptr<const JniCache> jni_cache)
     : IcingSearchEngine(options, std::make_unique<Filesystem>(),
+                        std::make_unique<IcingFilesystem>(),
                         std::make_unique<Clock>(), std::move(jni_cache)) {}
 
 IcingSearchEngine::IcingSearchEngine(
     IcingSearchEngineOptions options,
-    std::unique_ptr<const Filesystem> filesystem, std::unique_ptr<Clock> clock,
-    std::unique_ptr<const JniCache> jni_cache)
+    std::unique_ptr<const Filesystem> filesystem,
+    std::unique_ptr<const IcingFilesystem> icing_filesystem,
+    std::unique_ptr<Clock> clock, std::unique_ptr<const JniCache> jni_cache)
     : options_(std::move(options)),
       filesystem_(std::move(filesystem)),
-      icing_filesystem_(std::make_unique<IcingFilesystem>()),
+      icing_filesystem_(std::move(icing_filesystem)),
       clock_(std::move(clock)),
-      result_state_manager_(performance_configuration_.max_num_hits_per_query,
-                            performance_configuration_.max_num_cache_results),
       jni_cache_(std::move(jni_cache)) {
   ICING_VLOG(1) << "Creating IcingSearchEngine in dir: " << options_.base_dir();
 }
 
 IcingSearchEngine::~IcingSearchEngine() {
   if (initialized_) {
-    if (PersistToDisk().status().code() != StatusProto::OK) {
+    if (PersistToDisk(PersistType::FULL).status().code() != StatusProto::OK) {
       ICING_LOG(ERROR)
           << "Error persisting to disk in IcingSearchEngine destructor";
     }
@@ -230,49 +474,164 @@ InitializeResultProto IcingSearchEngine::Initialize() {
   return InternalInitialize();
 }
 
+void IcingSearchEngine::ResetMembers() {
+  schema_store_.reset();
+  document_store_.reset();
+  language_segmenter_.reset();
+  normalizer_.reset();
+  index_.reset();
+  integer_index_.reset();
+  qualified_id_join_index_.reset();
+}
+
+libtextclassifier3::Status IcingSearchEngine::CheckInitMarkerFile(
+    InitializeStatsProto* initialize_stats) {
+  // Check to see if the marker file exists and if we've already passed our max
+  // number of init attempts.
+  std::string marker_filepath = MakeInitMarkerFilePath(options_.base_dir());
+  bool file_exists = filesystem_->FileExists(marker_filepath.c_str());
+  int network_init_attempts = 0;
+  int host_init_attempts = 0;
+
+  // Read the number of previous failed init attempts from the file. If it
+  // fails, then just assume the value is zero (the most likely reason for
+  // failure would be non-existence because the last init was successful
+  // anyways).
+  std::unique_ptr<ScopedFd> marker_file_fd = std::make_unique<ScopedFd>(
+      filesystem_->OpenForWrite(marker_filepath.c_str()));
+  libtextclassifier3::Status status;
+  if (file_exists &&
+      filesystem_->PRead(marker_file_fd->get(), &network_init_attempts,
+                         sizeof(network_init_attempts), /*offset=*/0)) {
+    host_init_attempts = GNetworkToHostL(network_init_attempts);
+    if (host_init_attempts > kMaxUnsuccessfulInitAttempts) {
+      // We're tried and failed to init too many times. We need to throw
+      // everything out and start from scratch.
+      ResetMembers();
+      marker_file_fd.reset();
+
+      // Delete the entire base directory.
+      if (!filesystem_->DeleteDirectoryRecursively(
+              options_.base_dir().c_str())) {
+        return absl_ports::InternalError("Failed to delete icing base dir!");
+      }
+
+      // Create the base directory again and reopen marker file.
+      if (!filesystem_->CreateDirectoryRecursively(
+              options_.base_dir().c_str())) {
+        return absl_ports::InternalError("Failed to create icing base dir!");
+      }
+
+      marker_file_fd = std::make_unique<ScopedFd>(
+          filesystem_->OpenForWrite(marker_filepath.c_str()));
+
+      status = absl_ports::DataLossError(
+          "Encountered failed initialization limit. Cleared all data.");
+      host_init_attempts = 0;
+    }
+  }
+
+  // Use network_init_attempts here because we might have set host_init_attempts
+  // to 0 if it exceeded the max threshold.
+  initialize_stats->set_num_previous_init_failures(
+      GNetworkToHostL(network_init_attempts));
+
+  ++host_init_attempts;
+  network_init_attempts = GHostToNetworkL(host_init_attempts);
+  // Write the updated number of attempts before we get started.
+  if (!filesystem_->PWrite(marker_file_fd->get(), /*offset=*/0,
+                           &network_init_attempts,
+                           sizeof(network_init_attempts)) ||
+      !filesystem_->DataSync(marker_file_fd->get())) {
+    return absl_ports::InternalError(
+        "Failed to write and sync init marker file");
+  }
+
+  return status;
+}
+
 InitializeResultProto IcingSearchEngine::InternalInitialize() {
   ICING_VLOG(1) << "Initializing IcingSearchEngine in dir: "
                 << options_.base_dir();
 
+  // Measure the latency of the initialization process.
+  std::unique_ptr<Timer> initialize_timer = clock_->GetNewTimer();
+
   InitializeResultProto result_proto;
   StatusProto* result_status = result_proto.mutable_status();
+  InitializeStatsProto* initialize_stats =
+      result_proto.mutable_initialize_stats();
   if (initialized_) {
     // Already initialized.
     result_status->set_code(StatusProto::OK);
+    initialize_stats->set_latency_ms(
+        initialize_timer->GetElapsedMilliseconds());
+    initialize_stats->set_num_documents(document_store_->num_documents());
     return result_proto;
   }
 
-  // Releases result / query cache if any
-  result_state_manager_.InvalidateAllResultStates();
+  // Now go ahead and try to initialize.
+  libtextclassifier3::Status status = InitializeMembers(initialize_stats);
+  if (status.ok() || absl_ports::IsDataLoss(status)) {
+    // We successfully initialized. We should delete the init marker file to
+    // indicate a successful init.
+    std::string marker_filepath = MakeInitMarkerFilePath(options_.base_dir());
+    if (!filesystem_->DeleteFile(marker_filepath.c_str())) {
+      status = absl_ports::InternalError("Failed to delete init marker file!");
+    } else {
+      initialized_ = true;
+    }
+  }
+  TransformStatus(status, result_status);
+  initialize_stats->set_latency_ms(initialize_timer->GetElapsedMilliseconds());
+  return result_proto;
+}
+
+libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
+    InitializeStatsProto* initialize_stats) {
+  ICING_RETURN_ERROR_IF_NULL(initialize_stats);
 
-  libtextclassifier3::Status status = InitializeMembers();
-  if (!status.ok()) {
-    TransformStatus(status, result_status);
-    return result_proto;
+  // Make sure the base directory exists
+  if (!filesystem_->CreateDirectoryRecursively(options_.base_dir().c_str())) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Could not create directory: ", options_.base_dir()));
   }
 
-  // Even if each subcomponent initialized fine independently, we need to
-  // check if they're consistent with each other.
-  if (!CheckConsistency().ok()) {
-    ICING_VLOG(1)
-        << "IcingSearchEngine in inconsistent state, regenerating all "
-           "derived data";
-    status = RegenerateDerivedFiles();
-    if (!status.ok()) {
-      TransformStatus(status, result_status);
-      return result_proto;
-    }
+  // Check to see if the marker file exists and if we've already passed our max
+  // number of init attempts.
+  libtextclassifier3::Status status = CheckInitMarkerFile(initialize_stats);
+  if (!status.ok() && !absl_ports::IsDataLoss(status)) {
+    return status;
   }
 
-  initialized_ = true;
-  result_status->set_code(StatusProto::OK);
-  return result_proto;
-}
+  // Read version file and determine the state change.
+  const std::string version_filepath = MakeVersionFilePath(options_.base_dir());
+  const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
+  ICING_ASSIGN_OR_RETURN(
+      version_util::VersionInfo version_info,
+      version_util::ReadVersion(*filesystem_, version_filepath, index_dir));
+  version_util::StateChange version_state_change =
+      version_util::GetVersionStateChange(version_info);
+  if (version_state_change != version_util::StateChange::kCompatible) {
+    // Step 1: migrate schema according to the version state change.
+    ICING_RETURN_IF_ERROR(SchemaStore::MigrateSchema(
+        filesystem_.get(), MakeSchemaDirectoryPath(options_.base_dir()),
+        version_state_change, version_util::kVersion));
+
+    // Step 2: discard all derived data if needed rebuild.
+    if (version_util::ShouldRebuildDerivedFiles(version_info)) {
+      ICING_RETURN_IF_ERROR(DiscardDerivedFiles());
+    }
+
+    // Step 3: update version file
+    version_util::VersionInfo new_version_info(
+        version_util::kVersion,
+        std::max(version_info.max_version, version_util::kVersion));
+    ICING_RETURN_IF_ERROR(version_util::WriteVersion(
+        *filesystem_, version_filepath, new_version_info));
+  }
 
-libtextclassifier3::Status IcingSearchEngine::InitializeMembers() {
-  ICING_RETURN_IF_ERROR(InitializeOptions());
-  ICING_RETURN_IF_ERROR(InitializeSchemaStore());
-  ICING_RETURN_IF_ERROR(InitializeDocumentStore());
+  ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats));
 
   // TODO(b/156383798) : Resolve how to specify the locale.
   language_segmenter_factory::SegmenterOptions segmenter_options(
@@ -283,24 +642,156 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers() {
   TC3_ASSIGN_OR_RETURN(normalizer_,
                        normalizer_factory::Create(options_.max_token_length()));
 
-  ICING_RETURN_IF_ERROR(InitializeIndex());
+  std::string marker_filepath =
+      MakeSetSchemaMarkerFilePath(options_.base_dir());
+
+  libtextclassifier3::Status index_init_status;
+  if (absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
+    // The schema was either lost or never set before. Wipe out the doc store
+    // and index directories and initialize them from scratch.
+    const std::string doc_store_dir =
+        MakeDocumentDirectoryPath(options_.base_dir());
+    const std::string integer_index_dir =
+        MakeIntegerIndexWorkingPath(options_.base_dir());
+    const std::string qualified_id_join_index_dir =
+        MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
+    if (!filesystem_->DeleteDirectoryRecursively(doc_store_dir.c_str()) ||
+        !filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
+        !IntegerIndex::Discard(*filesystem_, integer_index_dir).ok() ||
+        !QualifiedIdJoinIndex::Discard(*filesystem_,
+                                       qualified_id_join_index_dir)
+             .ok()) {
+      return absl_ports::InternalError(absl_ports::StrCat(
+          "Could not delete directories: ", index_dir, ", ", integer_index_dir,
+          ", ", qualified_id_join_index_dir, " and ", doc_store_dir));
+    }
+    ICING_ASSIGN_OR_RETURN(
+        bool document_store_derived_files_regenerated,
+        InitializeDocumentStore(
+            /*force_recovery_and_revalidate_documents=*/false,
+            initialize_stats));
+    index_init_status = InitializeIndex(
+        document_store_derived_files_regenerated, initialize_stats);
+    if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
+      return index_init_status;
+    }
+  } else if (filesystem_->FileExists(marker_filepath.c_str())) {
+    // If the marker file is still around then something wonky happened when we
+    // last tried to set the schema.
+    //
+    // Since we're going to rebuild all indices in this case, the return value
+    // of InitializeDocumentStore (document_store_derived_files_regenerated) is
+    // unused.
+    ICING_RETURN_IF_ERROR(InitializeDocumentStore(
+        /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
+
+    // We're going to need to build the index from scratch. So just delete its
+    // directory now.
+    // Discard index directory and instantiate a new one.
+    Index::Options index_options(
+        index_dir, options_.index_merge_size(),
+        options_.lite_index_sort_at_indexing(), options_.lite_index_sort_size(),
+        options_.build_property_existence_metadata_hits());
+    if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
+        !filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Could not recreate directory: ", index_dir));
+    }
+    ICING_ASSIGN_OR_RETURN(index_,
+                           Index::Create(index_options, filesystem_.get(),
+                                         icing_filesystem_.get()));
 
-  return libtextclassifier3::Status::OK;
-}
+    // Discard integer index directory and instantiate a new one.
+    std::string integer_index_dir =
+        MakeIntegerIndexWorkingPath(options_.base_dir());
+    ICING_RETURN_IF_ERROR(
+        IntegerIndex::Discard(*filesystem_, integer_index_dir));
+    ICING_ASSIGN_OR_RETURN(
+        integer_index_,
+        IntegerIndex::Create(*filesystem_, std::move(integer_index_dir),
+                             options_.integer_index_bucket_split_threshold(),
+                             options_.pre_mapping_fbv()));
+
+    // Discard qualified id join index directory and instantiate a new one.
+    std::string qualified_id_join_index_dir =
+        MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
+    ICING_RETURN_IF_ERROR(QualifiedIdJoinIndex::Discard(
+        *filesystem_, qualified_id_join_index_dir));
+    ICING_ASSIGN_OR_RETURN(
+        qualified_id_join_index_,
+        CreateQualifiedIdJoinIndex(
+            *filesystem_, std::move(qualified_id_join_index_dir), options_));
+
+    std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
+    IndexRestorationResult restore_result = RestoreIndexIfNeeded();
+    index_init_status = std::move(restore_result.status);
+    // DATA_LOSS means that we have successfully initialized and re-added
+    // content to the index. Some indexed content was lost, but otherwise the
+    // index is in a valid state and can be queried.
+    if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
+      return index_init_status;
+    }
 
-libtextclassifier3::Status IcingSearchEngine::InitializeOptions() {
-  ICING_RETURN_IF_ERROR(ValidateOptions(options_));
+    // Delete the marker file to indicate that everything is now in sync with
+    // whatever changes were made to the schema.
+    filesystem_->DeleteFile(marker_filepath.c_str());
+
+    initialize_stats->set_index_restoration_latency_ms(
+        restore_timer->GetElapsedMilliseconds());
+    initialize_stats->set_index_restoration_cause(
+        InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
+    initialize_stats->set_integer_index_restoration_cause(
+        InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
+    initialize_stats->set_qualified_id_join_index_restoration_cause(
+        InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
+  } else if (version_state_change != version_util::StateChange::kCompatible) {
+    ICING_ASSIGN_OR_RETURN(bool document_store_derived_files_regenerated,
+                           InitializeDocumentStore(
+                               /*force_recovery_and_revalidate_documents=*/true,
+                               initialize_stats));
+    index_init_status = InitializeIndex(
+        document_store_derived_files_regenerated, initialize_stats);
+    if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
+      return index_init_status;
+    }
 
-  // Make sure the base directory exists
-  if (!filesystem_->CreateDirectoryRecursively(options_.base_dir().c_str())) {
-    return absl_ports::InternalError(absl_ports::StrCat(
-        "Could not create directory: ", options_.base_dir()));
+    initialize_stats->set_schema_store_recovery_cause(
+        InitializeStatsProto::VERSION_CHANGED);
+    initialize_stats->set_document_store_recovery_cause(
+        InitializeStatsProto::VERSION_CHANGED);
+    initialize_stats->set_index_restoration_cause(
+        InitializeStatsProto::VERSION_CHANGED);
+    initialize_stats->set_integer_index_restoration_cause(
+        InitializeStatsProto::VERSION_CHANGED);
+    initialize_stats->set_qualified_id_join_index_restoration_cause(
+        InitializeStatsProto::VERSION_CHANGED);
+  } else {
+    ICING_ASSIGN_OR_RETURN(
+        bool document_store_derived_files_regenerated,
+        InitializeDocumentStore(
+            /*force_recovery_and_revalidate_documents=*/false,
+            initialize_stats));
+    index_init_status = InitializeIndex(
+        document_store_derived_files_regenerated, initialize_stats);
+    if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
+      return index_init_status;
+    }
   }
 
-  return libtextclassifier3::Status::OK;
+  if (status.ok()) {
+    status = index_init_status;
+  }
+
+  result_state_manager_ = std::make_unique<ResultStateManager>(
+      performance_configuration_.max_num_total_hits, *document_store_);
+
+  return status;
 }
 
-libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore() {
+libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore(
+    InitializeStatsProto* initialize_stats) {
+  ICING_RETURN_ERROR_IF_NULL(initialize_stats);
+
   const std::string schema_store_dir =
       MakeSchemaDirectoryPath(options_.base_dir());
   // Make sure the sub-directory exists
@@ -309,12 +800,17 @@ libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore() {
         absl_ports::StrCat("Could not create directory: ", schema_store_dir));
   }
   ICING_ASSIGN_OR_RETURN(
-      schema_store_, SchemaStore::Create(filesystem_.get(), schema_store_dir));
+      schema_store_, SchemaStore::Create(filesystem_.get(), schema_store_dir,
+                                         clock_.get(), initialize_stats));
 
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore() {
+libtextclassifier3::StatusOr<bool> IcingSearchEngine::InitializeDocumentStore(
+    bool force_recovery_and_revalidate_documents,
+    InitializeStatsProto* initialize_stats) {
+  ICING_RETURN_ERROR_IF_NULL(initialize_stats);
+
   const std::string document_dir =
       MakeDocumentDirectoryPath(options_.base_dir());
   // Make sure the sub-directory exists
@@ -323,23 +819,38 @@ libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore() {
         absl_ports::StrCat("Could not create directory: ", document_dir));
   }
   ICING_ASSIGN_OR_RETURN(
-      document_store_,
-      DocumentStore::Create(filesystem_.get(), document_dir, clock_.get(),
-                            schema_store_.get()));
-
-  return libtextclassifier3::Status::OK;
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(
+          filesystem_.get(), document_dir, clock_.get(), schema_store_.get(),
+          force_recovery_and_revalidate_documents,
+          options_.document_store_namespace_id_fingerprint(),
+          options_.pre_mapping_fbv(), options_.use_persistent_hash_map(),
+          options_.compression_level(), initialize_stats));
+  document_store_ = std::move(create_result.document_store);
+
+  return create_result.derived_files_regenerated;
 }
 
-libtextclassifier3::Status IcingSearchEngine::InitializeIndex() {
+libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
+    bool document_store_derived_files_regenerated,
+    InitializeStatsProto* initialize_stats) {
+  ICING_RETURN_ERROR_IF_NULL(initialize_stats);
+
   const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
   // Make sure the sub-directory exists
   if (!filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
     return absl_ports::InternalError(
         absl_ports::StrCat("Could not create directory: ", index_dir));
   }
-  Index::Options index_options(index_dir, options_.index_merge_size());
+  Index::Options index_options(
+      index_dir, options_.index_merge_size(),
+      options_.lite_index_sort_at_indexing(), options_.lite_index_sort_size(),
+      options_.build_property_existence_metadata_hits());
 
-  auto index_or = Index::Create(index_options, icing_filesystem_.get());
+  // Term index
+  InitializeStatsProto::RecoveryCause index_recovery_cause;
+  auto index_or =
+      Index::Create(index_options, filesystem_.get(), icing_filesystem_.get());
   if (!index_or.ok()) {
     if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
         !filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
@@ -347,66 +858,114 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex() {
           absl_ports::StrCat("Could not recreate directory: ", index_dir));
     }
 
+    index_recovery_cause = InitializeStatsProto::IO_ERROR;
+
     // Try recreating it from scratch and re-indexing everything.
-    ICING_ASSIGN_OR_RETURN(
-        index_, Index::Create(index_options, icing_filesystem_.get()));
-    ICING_RETURN_IF_ERROR(RestoreIndex());
+    ICING_ASSIGN_OR_RETURN(index_,
+                           Index::Create(index_options, filesystem_.get(),
+                                         icing_filesystem_.get()));
   } else {
     // Index was created fine.
     index_ = std::move(index_or).ValueOrDie();
-  }
-
-  return libtextclassifier3::Status::OK;
-}  // namespace lib
+    // If a recover does have to happen, then it must be because the index is
+    // out of sync with the document store.
+    index_recovery_cause = InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
+  }
+
+  // Integer index
+  std::string integer_index_dir =
+      MakeIntegerIndexWorkingPath(options_.base_dir());
+  InitializeStatsProto::RecoveryCause integer_index_recovery_cause;
+  auto integer_index_or =
+      IntegerIndex::Create(*filesystem_, integer_index_dir,
+                           options_.integer_index_bucket_split_threshold(),
+                           options_.pre_mapping_fbv());
+  if (!integer_index_or.ok()) {
+    ICING_RETURN_IF_ERROR(
+        IntegerIndex::Discard(*filesystem_, integer_index_dir));
 
-libtextclassifier3::Status IcingSearchEngine::CheckConsistency() {
-  if (!HeaderExists()) {
-    // Without a header file, we have no checksum and can't even detect
-    // inconsistencies
-    return absl_ports::NotFoundError("No header file found.");
-  }
+    integer_index_recovery_cause = InitializeStatsProto::IO_ERROR;
 
-  // Header does exist, verify that the header looks fine.
-  IcingSearchEngine::Header header;
-  if (!filesystem_->Read(MakeHeaderFilename(options_.base_dir()).c_str(),
-                         &header, sizeof(header))) {
-    return absl_ports::InternalError(absl_ports::StrCat(
-        "Couldn't read: ", MakeHeaderFilename(options_.base_dir())));
-  }
+    // Try recreating it from scratch and re-indexing everything.
+    ICING_ASSIGN_OR_RETURN(
+        integer_index_,
+        IntegerIndex::Create(*filesystem_, std::move(integer_index_dir),
+                             options_.integer_index_bucket_split_threshold(),
+                             options_.pre_mapping_fbv()));
+  } else {
+    // Integer index was created fine.
+    integer_index_ = std::move(integer_index_or).ValueOrDie();
+    // If a recover does have to happen, then it must be because the index is
+    // out of sync with the document store.
+    integer_index_recovery_cause =
+        InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
+  }
+
+  // Qualified id join index
+  std::string qualified_id_join_index_dir =
+      MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
+  InitializeStatsProto::RecoveryCause qualified_id_join_index_recovery_cause;
+  if (document_store_derived_files_regenerated &&
+      IsV2QualifiedIdJoinIndexEnabled(options_)) {
+    // V2 qualified id join index depends on document store derived files, so we
+    // have to rebuild it from scratch if
+    // document_store_derived_files_regenerated is true.
+    ICING_RETURN_IF_ERROR(QualifiedIdJoinIndex::Discard(
+        *filesystem_, qualified_id_join_index_dir));
 
-  if (header.magic != IcingSearchEngine::Header::kMagic) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Invalid header kMagic for file: ",
-                           MakeHeaderFilename(options_.base_dir())));
-  }
+    ICING_ASSIGN_OR_RETURN(
+        qualified_id_join_index_,
+        CreateQualifiedIdJoinIndex(
+            *filesystem_, std::move(qualified_id_join_index_dir), options_));
 
-  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
-  if (checksum.Get() != header.checksum) {
-    return absl_ports::InternalError(
-        "IcingSearchEngine checksum doesn't match");
+    qualified_id_join_index_recovery_cause =
+        InitializeStatsProto::DEPENDENCIES_CHANGED;
+  } else {
+    auto qualified_id_join_index_or = CreateQualifiedIdJoinIndex(
+        *filesystem_, qualified_id_join_index_dir, options_);
+    if (!qualified_id_join_index_or.ok()) {
+      ICING_RETURN_IF_ERROR(QualifiedIdJoinIndex::Discard(
+          *filesystem_, qualified_id_join_index_dir));
+
+      qualified_id_join_index_recovery_cause = InitializeStatsProto::IO_ERROR;
+
+      // Try recreating it from scratch and rebuild everything.
+      ICING_ASSIGN_OR_RETURN(
+          qualified_id_join_index_,
+          CreateQualifiedIdJoinIndex(
+              *filesystem_, std::move(qualified_id_join_index_dir), options_));
+    } else {
+      // Qualified id join index was created fine.
+      qualified_id_join_index_ =
+          std::move(qualified_id_join_index_or).ValueOrDie();
+      // If a recover does have to happen, then it must be because the index is
+      // out of sync with the document store.
+      qualified_id_join_index_recovery_cause =
+          InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
+    }
   }
 
-  return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::Status IcingSearchEngine::RegenerateDerivedFiles() {
-  ICING_RETURN_IF_ERROR(
-      document_store_->UpdateSchemaStore(schema_store_.get()));
-  ICING_RETURN_IF_ERROR(index_->Reset());
-  ICING_RETURN_IF_ERROR(RestoreIndex());
+  std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
+  IndexRestorationResult restore_result = RestoreIndexIfNeeded();
+  if (restore_result.index_needed_restoration ||
+      restore_result.integer_index_needed_restoration ||
+      restore_result.qualified_id_join_index_needed_restoration) {
+    initialize_stats->set_index_restoration_latency_ms(
+        restore_timer->GetElapsedMilliseconds());
 
-  const std::string header_file =
-      MakeHeaderFilename(options_.base_dir().c_str());
-  if (HeaderExists()) {
-    if (!filesystem_->DeleteFile(header_file.c_str())) {
-      return absl_ports::InternalError(
-          absl_ports::StrCat("Unable to delete file: ", header_file));
+    if (restore_result.index_needed_restoration) {
+      initialize_stats->set_index_restoration_cause(index_recovery_cause);
+    }
+    if (restore_result.integer_index_needed_restoration) {
+      initialize_stats->set_integer_index_restoration_cause(
+          integer_index_recovery_cause);
+    }
+    if (restore_result.qualified_id_join_index_needed_restoration) {
+      initialize_stats->set_qualified_id_join_index_restoration_cause(
+          qualified_id_join_index_recovery_cause);
     }
   }
-  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
-  ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
-
-  return libtextclassifier3::Status::OK;
+  return restore_result.status;
 }
 
 SetSchemaResultProto IcingSearchEngine::SetSchema(
@@ -422,18 +981,15 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
   StatusProto* result_status = result_proto.mutable_status();
 
   absl_ports::unique_lock l(&mutex_);
+  ScopedTimer timer(clock_->GetNewTimer(), [&result_proto](int64_t t) {
+    result_proto.set_latency_ms(t);
+  });
   if (!initialized_) {
     result_status->set_code(StatusProto::FAILED_PRECONDITION);
     result_status->set_message("IcingSearchEngine has not been initialized!");
     return result_proto;
   }
 
-  libtextclassifier3::Status status = SchemaUtil::Validate(new_schema);
-  if (!status.ok()) {
-    TransformStatus(status, result_status);
-    return result_proto;
-  }
-
   auto lost_previous_schema_or = LostPreviousSchema();
   if (!lost_previous_schema_or.ok()) {
     TransformStatus(lost_previous_schema_or.status(), result_status);
@@ -441,14 +997,24 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
   }
   bool lost_previous_schema = lost_previous_schema_or.ValueOrDie();
 
+  std::string marker_filepath =
+      MakeSetSchemaMarkerFilePath(options_.base_dir());
+  // Create the marker file indicating that we are going to apply a schema
+  // change. No need to write anything to the marker file - its existence is the
+  // only thing that matters. The marker file is used to indicate if we
+  // encountered a crash or a power loss while updating the schema and other
+  // files. So set it up to be deleted as long as we return from this function.
+  DestructibleFile marker_file(marker_filepath, filesystem_.get());
+
   auto set_schema_result_or = schema_store_->SetSchema(
-      std::move(new_schema), ignore_errors_and_delete_documents);
+      std::move(new_schema), ignore_errors_and_delete_documents,
+      options_.allow_circular_schema_definitions());
   if (!set_schema_result_or.ok()) {
     TransformStatus(set_schema_result_or.status(), result_status);
     return result_proto;
   }
-  const SchemaStore::SetSchemaResult set_schema_result =
-      set_schema_result_or.ValueOrDie();
+  SchemaStore::SetSchemaResult set_schema_result =
+      std::move(set_schema_result_or).ValueOrDie();
 
   for (const std::string& deleted_type :
        set_schema_result.schema_types_deleted_by_name) {
@@ -460,6 +1026,34 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
     result_proto.add_incompatible_schema_types(incompatible_type);
   }
 
+  for (const std::string& new_type :
+       set_schema_result.schema_types_new_by_name) {
+    result_proto.add_new_schema_types(std::move(new_type));
+  }
+
+  for (const std::string& compatible_type :
+       set_schema_result.schema_types_changed_fully_compatible_by_name) {
+    result_proto.add_fully_compatible_changed_schema_types(
+        std::move(compatible_type));
+  }
+
+  bool index_incompatible =
+      !set_schema_result.schema_types_index_incompatible_by_name.empty();
+  for (const std::string& index_incompatible_type :
+       set_schema_result.schema_types_index_incompatible_by_name) {
+    result_proto.add_index_incompatible_changed_schema_types(
+        std::move(index_incompatible_type));
+  }
+
+  bool join_incompatible =
+      !set_schema_result.schema_types_join_incompatible_by_name.empty();
+  for (const std::string& join_incompatible_type :
+       set_schema_result.schema_types_join_incompatible_by_name) {
+    result_proto.add_join_incompatible_changed_schema_types(
+        std::move(join_incompatible_type));
+  }
+
+  libtextclassifier3::Status status;
   if (set_schema_result.success) {
     if (lost_previous_schema) {
       // No previous schema to calculate a diff against. We have to go through
@@ -480,26 +1074,42 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
       }
     }
 
-    if (lost_previous_schema || set_schema_result.index_incompatible) {
-      // Clears all index files
-      status = index_->Reset();
+    if (lost_previous_schema || index_incompatible) {
+      // Clears search indices
+      status = ClearSearchIndices();
       if (!status.ok()) {
         TransformStatus(status, result_status);
         return result_proto;
       }
+    }
 
-      status = RestoreIndex();
+    if (lost_previous_schema || join_incompatible) {
+      // Clears join indices
+      status = ClearJoinIndices();
       if (!status.ok()) {
         TransformStatus(status, result_status);
         return result_proto;
       }
     }
 
+    if (lost_previous_schema || index_incompatible || join_incompatible) {
+      IndexRestorationResult restore_result = RestoreIndexIfNeeded();
+      // DATA_LOSS means that we have successfully re-added content to the
+      // index. Some indexed content was lost, but otherwise the index is in a
+      // valid state and can be queried.
+      if (!restore_result.status.ok() &&
+          !absl_ports::IsDataLoss(restore_result.status)) {
+        TransformStatus(status, result_status);
+        return result_proto;
+      }
+    }
+
     result_status->set_code(StatusProto::OK);
   } else {
     result_status->set_code(StatusProto::FAILED_PRECONDITION);
     result_status->set_message("Schema is incompatible.");
   }
+
   return result_proto;
 }
 
@@ -557,6 +1167,11 @@ PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
 
   PutResultProto result_proto;
   StatusProto* result_status = result_proto.mutable_status();
+  PutDocumentStatsProto* put_document_stats =
+      result_proto.mutable_put_document_stats();
+  ScopedTimer put_timer(clock_->GetNewTimer(), [put_document_stats](int64_t t) {
+    put_document_stats->set_latency_ms(t);
+  });
 
   // Lock must be acquired before validation because the DocumentStore uses
   // the schema file to validate, and the schema could be changed in
@@ -568,35 +1183,75 @@ PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
     return result_proto;
   }
 
-  auto document_id_or = document_store_->Put(document);
+  auto tokenized_document_or = TokenizedDocument::Create(
+      schema_store_.get(), language_segmenter_.get(), std::move(document));
+  if (!tokenized_document_or.ok()) {
+    TransformStatus(tokenized_document_or.status(), result_status);
+    return result_proto;
+  }
+  TokenizedDocument tokenized_document(
+      std::move(tokenized_document_or).ValueOrDie());
+
+  auto document_id_or = document_store_->Put(
+      tokenized_document.document(), tokenized_document.num_string_tokens(),
+      put_document_stats);
   if (!document_id_or.ok()) {
     TransformStatus(document_id_or.status(), result_status);
     return result_proto;
   }
   DocumentId document_id = document_id_or.ValueOrDie();
 
-  auto index_processor_or = IndexProcessor::Create(
-      schema_store_.get(), language_segmenter_.get(), normalizer_.get(),
-      index_.get(), CreateIndexProcessorOptions(options_));
-  if (!index_processor_or.ok()) {
-    TransformStatus(index_processor_or.status(), result_status);
+  auto data_indexing_handlers_or = CreateDataIndexingHandlers();
+  if (!data_indexing_handlers_or.ok()) {
+    TransformStatus(data_indexing_handlers_or.status(), result_status);
     return result_proto;
   }
-  std::unique_ptr<IndexProcessor> index_processor =
-      std::move(index_processor_or).ValueOrDie();
+  IndexProcessor index_processor(
+      std::move(data_indexing_handlers_or).ValueOrDie(), clock_.get());
+
+  auto index_status = index_processor.IndexDocument(
+      tokenized_document, document_id, put_document_stats);
+  // Getting an internal error from the index could possibly mean that the index
+  // is broken. Try to rebuild them to recover.
+  if (absl_ports::IsInternal(index_status)) {
+    ICING_LOG(ERROR) << "Got an internal error from the index. Trying to "
+                        "rebuild the index!\n"
+                     << index_status.error_message();
+    index_status = ClearAllIndices();
+    if (index_status.ok()) {
+      index_status = RestoreIndexIfNeeded().status;
+      if (!index_status.ok()) {
+        ICING_LOG(ERROR) << "Failed to reindex documents after a failure of "
+                            "indexing a document.";
+      }
+    } else {
+      ICING_LOG(ERROR)
+          << "Failed to clear indices after a failure of indexing a document.";
+    }
+  }
 
-  auto status = index_processor->IndexDocument(document, document_id);
-  if (!status.ok()) {
-    TransformStatus(status, result_status);
-    return result_proto;
+  if (!index_status.ok()) {
+    // If we encountered a failure or cannot resolve an internal error while
+    // indexing this document, then mark it as deleted.
+    int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
+    libtextclassifier3::Status delete_status =
+        document_store_->Delete(document_id, current_time_ms);
+    if (!delete_status.ok()) {
+      // This is pretty dire (and, hopefully, unlikely). We can't roll back the
+      // document that we just added. Wipeout the whole index.
+      ICING_LOG(ERROR) << "Cannot delete the document that is failed to index. "
+                          "Wiping out the whole Icing search engine.";
+      ResetInternal();
+    }
   }
 
-  result_status->set_code(StatusProto::OK);
+  TransformStatus(index_status, result_status);
   return result_proto;
 }
 
 GetResultProto IcingSearchEngine::Get(const std::string_view name_space,
-                                      const std::string_view uri) {
+                                      const std::string_view uri,
+                                      const GetResultSpecProto& result_spec) {
   GetResultProto result_proto;
   StatusProto* result_status = result_proto.mutable_status();
 
@@ -613,8 +1268,48 @@ GetResultProto IcingSearchEngine::Get(const std::string_view name_space,
     return result_proto;
   }
 
+  DocumentProto document = std::move(document_or).ValueOrDie();
+  std::unique_ptr<ProjectionTree> type_projection_tree;
+  std::unique_ptr<ProjectionTree> wildcard_projection_tree;
+  for (const SchemaStore::ExpandedTypePropertyMask& type_field_mask :
+       schema_store_->ExpandTypePropertyMasks(
+           result_spec.type_property_masks())) {
+    if (type_field_mask.schema_type == document.schema()) {
+      type_projection_tree = std::make_unique<ProjectionTree>(type_field_mask);
+    } else if (type_field_mask.schema_type ==
+               SchemaStore::kSchemaTypeWildcard) {
+      wildcard_projection_tree =
+          std::make_unique<ProjectionTree>(type_field_mask);
+    }
+  }
+
+  // Apply projection
+  if (type_projection_tree != nullptr) {
+    projector::Project(type_projection_tree->root().children, &document);
+  } else if (wildcard_projection_tree != nullptr) {
+    projector::Project(wildcard_projection_tree->root().children, &document);
+  }
+
   result_status->set_code(StatusProto::OK);
-  *result_proto.mutable_document() = std::move(document_or).ValueOrDie();
+  *result_proto.mutable_document() = std::move(document);
+  return result_proto;
+}
+
+ReportUsageResultProto IcingSearchEngine::ReportUsage(
+    const UsageReport& usage_report) {
+  ReportUsageResultProto result_proto;
+  StatusProto* result_status = result_proto.mutable_status();
+
+  absl_ports::unique_lock l(&mutex_);
+  if (!initialized_) {
+    result_status->set_code(StatusProto::FAILED_PRECONDITION);
+    result_status->set_message("IcingSearchEngine has not been initialized!");
+    return result_proto;
+  }
+
+  libtextclassifier3::Status status =
+      document_store_->ReportUsage(usage_report);
+  TransformStatus(status, result_status);
   return result_proto;
 }
 
@@ -623,6 +1318,11 @@ GetAllNamespacesResultProto IcingSearchEngine::GetAllNamespaces() {
   StatusProto* result_status = result_proto.mutable_status();
 
   absl_ports::shared_lock l(&mutex_);
+  if (!initialized_) {
+    result_status->set_code(StatusProto::FAILED_PRECONDITION);
+    result_status->set_message("IcingSearchEngine has not been initialized!");
+    return result_proto;
+  }
 
   std::vector<std::string> namespaces = document_store_->GetAllNamespaces();
 
@@ -648,18 +1348,30 @@ DeleteResultProto IcingSearchEngine::Delete(const std::string_view name_space,
     return result_proto;
   }
 
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  DeleteStatsProto* delete_stats = result_proto.mutable_delete_stats();
+  delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SINGLE);
+
+  std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
-  libtextclassifier3::Status status = document_store_->Delete(name_space, uri);
+  int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
+  libtextclassifier3::Status status =
+      document_store_->Delete(name_space, uri, current_time_ms);
   if (!status.ok()) {
-    ICING_LOG(ERROR) << status.error_message()
-                     << "Failed to delete Document. namespace: " << name_space
-                     << ", uri: " << uri;
+    LogSeverity::Code severity = ERROR;
+    if (absl_ports::IsNotFound(status)) {
+      severity = DBG;
+    }
+    ICING_LOG(severity) << status.error_message()
+                        << "Failed to delete Document. namespace: "
+                        << name_space << ", uri: " << uri;
     TransformStatus(status, result_status);
     return result_proto;
   }
 
   result_status->set_code(StatusProto::OK);
+  delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
+  delete_stats->set_num_documents_deleted(1);
   return result_proto;
 }
 
@@ -676,16 +1388,24 @@ DeleteByNamespaceResultProto IcingSearchEngine::DeleteByNamespace(
     return delete_result;
   }
 
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  DeleteStatsProto* delete_stats = delete_result.mutable_delete_stats();
+  delete_stats->set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
+
+  std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
-  libtextclassifier3::Status status =
+  DocumentStore::DeleteByGroupResult doc_store_result =
       document_store_->DeleteByNamespace(name_space);
-  TransformStatus(status, result_status);
-  if (!status.ok()) {
-    ICING_LOG(ERROR) << status.error_message()
+  if (!doc_store_result.status.ok()) {
+    ICING_LOG(ERROR) << doc_store_result.status.error_message()
                      << "Failed to delete Namespace: " << name_space;
+    TransformStatus(doc_store_result.status, result_status);
     return delete_result;
   }
+
+  result_status->set_code(StatusProto::OK);
+  delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
+  delete_stats->set_num_documents_deleted(doc_store_result.num_docs_deleted);
   return delete_result;
 }
 
@@ -702,20 +1422,143 @@ DeleteBySchemaTypeResultProto IcingSearchEngine::DeleteBySchemaType(
     return delete_result;
   }
 
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  DeleteStatsProto* delete_stats = delete_result.mutable_delete_stats();
+  delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
+
+  std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
-  libtextclassifier3::Status status =
+  DocumentStore::DeleteByGroupResult doc_store_result =
       document_store_->DeleteBySchemaType(schema_type);
-  TransformStatus(status, result_status);
-  if (!status.ok()) {
-    ICING_LOG(ERROR) << status.error_message()
+  if (!doc_store_result.status.ok()) {
+    ICING_LOG(ERROR) << doc_store_result.status.error_message()
                      << "Failed to delete SchemaType: " << schema_type;
+    TransformStatus(doc_store_result.status, result_status);
     return delete_result;
   }
+
+  result_status->set_code(StatusProto::OK);
+  delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
+  delete_stats->set_num_documents_deleted(doc_store_result.num_docs_deleted);
   return delete_result;
 }
 
-PersistToDiskResultProto IcingSearchEngine::PersistToDisk() {
+DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
+    const SearchSpecProto& search_spec, bool return_deleted_document_info) {
+  ICING_VLOG(1) << "Deleting documents for query " << search_spec.query()
+                << " from doc store";
+
+  DeleteByQueryResultProto result_proto;
+  StatusProto* result_status = result_proto.mutable_status();
+
+  absl_ports::unique_lock l(&mutex_);
+  if (!initialized_) {
+    result_status->set_code(StatusProto::FAILED_PRECONDITION);
+    result_status->set_message("IcingSearchEngine has not been initialized!");
+    return result_proto;
+  }
+
+  DeleteByQueryStatsProto* delete_stats =
+      result_proto.mutable_delete_by_query_stats();
+  delete_stats->set_query_length(search_spec.query().length());
+  delete_stats->set_num_namespaces_filtered(
+      search_spec.namespace_filters_size());
+  delete_stats->set_num_schema_types_filtered(
+      search_spec.schema_type_filters_size());
+
+  ScopedTimer delete_timer(clock_->GetNewTimer(), [delete_stats](int64_t t) {
+    delete_stats->set_latency_ms(t);
+  });
+  libtextclassifier3::Status status =
+      ValidateSearchSpec(search_spec, performance_configuration_);
+  if (!status.ok()) {
+    TransformStatus(status, result_status);
+    return result_proto;
+  }
+
+  std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
+  // Gets unordered results from query processor
+  auto query_processor_or = QueryProcessor::Create(
+      index_.get(), integer_index_.get(), language_segmenter_.get(),
+      normalizer_.get(), document_store_.get(), schema_store_.get());
+  if (!query_processor_or.ok()) {
+    TransformStatus(query_processor_or.status(), result_status);
+    delete_stats->set_parse_query_latency_ms(
+        component_timer->GetElapsedMilliseconds());
+    return result_proto;
+  }
+  std::unique_ptr<QueryProcessor> query_processor =
+      std::move(query_processor_or).ValueOrDie();
+
+  int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
+  auto query_results_or = query_processor->ParseSearch(
+      search_spec, ScoringSpecProto::RankingStrategy::NONE, current_time_ms);
+  if (!query_results_or.ok()) {
+    TransformStatus(query_results_or.status(), result_status);
+    delete_stats->set_parse_query_latency_ms(
+        component_timer->GetElapsedMilliseconds());
+    return result_proto;
+  }
+  QueryResults query_results = std::move(query_results_or).ValueOrDie();
+  delete_stats->set_parse_query_latency_ms(
+      component_timer->GetElapsedMilliseconds());
+
+  ICING_VLOG(2) << "Deleting the docs that matched the query.";
+  int num_deleted = 0;
+  // A map used to group deleted documents.
+  // From the (namespace, type) pair to a list of uris.
+  std::unordered_map<NamespaceTypePair,
+                     DeleteByQueryResultProto::DocumentGroupInfo*,
+                     NamespaceTypePairHasher>
+      deleted_info_map;
+
+  component_timer = clock_->GetNewTimer();
+  while (query_results.root_iterator->Advance().ok()) {
+    ICING_VLOG(3) << "Deleting doc "
+                  << query_results.root_iterator->doc_hit_info().document_id();
+    ++num_deleted;
+    if (return_deleted_document_info) {
+      status = RetrieveAndAddDocumentInfo(
+          document_store_.get(), result_proto, deleted_info_map,
+          query_results.root_iterator->doc_hit_info().document_id());
+      if (!status.ok()) {
+        TransformStatus(status, result_status);
+        delete_stats->set_document_removal_latency_ms(
+            component_timer->GetElapsedMilliseconds());
+        return result_proto;
+      }
+    }
+    status = document_store_->Delete(
+        query_results.root_iterator->doc_hit_info().document_id(),
+        current_time_ms);
+    if (!status.ok()) {
+      TransformStatus(status, result_status);
+      delete_stats->set_document_removal_latency_ms(
+          component_timer->GetElapsedMilliseconds());
+      return result_proto;
+    }
+  }
+  delete_stats->set_document_removal_latency_ms(
+      component_timer->GetElapsedMilliseconds());
+  int term_count = 0;
+  for (const auto& section_and_terms : query_results.query_terms) {
+    term_count += section_and_terms.second.size();
+  }
+  delete_stats->set_num_terms(term_count);
+
+  if (num_deleted > 0) {
+    result_proto.mutable_status()->set_code(StatusProto::OK);
+  } else {
+    result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+    result_proto.mutable_status()->set_message(
+        "No documents matched the query to delete by!");
+  }
+  delete_stats->set_num_documents_deleted(num_deleted);
+  return result_proto;
+}
+
+PersistToDiskResultProto IcingSearchEngine::PersistToDisk(
+    PersistType::Code persist_type) {
   ICING_VLOG(1) << "Persisting data to disk";
 
   PersistToDiskResultProto result_proto;
@@ -728,7 +1571,7 @@ PersistToDiskResultProto IcingSearchEngine::PersistToDisk() {
     return result_proto;
   }
 
-  auto status = InternalPersistToDisk();
+  auto status = InternalPersistToDisk(persist_type);
   TransformStatus(status, result_status);
   return result_proto;
 }
@@ -752,53 +1595,168 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
     return result_proto;
   }
 
-  // Releases result / query cache if any
-  result_state_manager_.InvalidateAllResultStates();
+  OptimizeStatsProto* optimize_stats = result_proto.mutable_optimize_stats();
+  ScopedTimer optimize_timer(
+      clock_->GetNewTimer(),
+      [optimize_stats](int64_t t) { optimize_stats->set_latency_ms(t); });
 
   // Flushes data to disk before doing optimization
-  auto status = InternalPersistToDisk();
+  auto status = InternalPersistToDisk(PersistType::FULL);
   if (!status.ok()) {
     TransformStatus(status, result_status);
     return result_proto;
   }
 
+  int64_t before_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
+  optimize_stats->set_storage_size_before(
+      Filesystem::SanitizeFileSize(before_size));
+
   // TODO(b/143646633): figure out if we need to optimize index and doc store
   // at the same time.
-  libtextclassifier3::Status optimization_status = OptimizeDocumentStore();
-
-  if (!optimization_status.ok() &&
-      !absl_ports::IsDataLoss(optimization_status)) {
+  std::unique_ptr<Timer> optimize_doc_store_timer = clock_->GetNewTimer();
+  libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
+      optimize_result_or = OptimizeDocumentStore(optimize_stats);
+  optimize_stats->set_document_store_optimize_latency_ms(
+      optimize_doc_store_timer->GetElapsedMilliseconds());
+
+  if (!optimize_result_or.ok() &&
+      !absl_ports::IsDataLoss(optimize_result_or.status())) {
     // The status now is either ABORTED_ERROR or INTERNAL_ERROR.
     // If ABORTED_ERROR, Icing should still be working.
     // If INTERNAL_ERROR, we're having IO errors or other errors that we can't
     // recover from.
-    TransformStatus(optimization_status, result_status);
+    TransformStatus(optimize_result_or.status(), result_status);
     return result_proto;
   }
 
   // The status is either OK or DATA_LOSS. The optimized document store is
   // guaranteed to work, so we update index according to the new document store.
-  libtextclassifier3::Status index_reset_status = index_->Reset();
-  if (!index_reset_status.ok()) {
-    status = absl_ports::Annotate(
-        absl_ports::InternalError("Failed to reset index after optimization."),
-        index_reset_status.error_message());
-    TransformStatus(status, result_status);
-    return result_proto;
+  std::unique_ptr<Timer> optimize_index_timer = clock_->GetNewTimer();
+  auto doc_store_optimize_result_status = optimize_result_or.status();
+  bool should_rebuild_index =
+      !optimize_result_or.ok() ||
+      optimize_result_or.ValueOrDie().should_rebuild_index ||
+      ShouldRebuildIndex(*optimize_stats,
+                         options_.optimize_rebuild_index_threshold());
+  if (!should_rebuild_index) {
+    // At this point should_rebuild_index is false, so it means
+    // optimize_result_or.ok() is true and therefore it is safe to call
+    // ValueOrDie.
+    DocumentStore::OptimizeResult optimize_result =
+        std::move(optimize_result_or).ValueOrDie();
+
+    optimize_stats->set_index_restoration_mode(
+        OptimizeStatsProto::INDEX_TRANSLATION);
+    libtextclassifier3::Status index_optimize_status =
+        index_->Optimize(optimize_result.document_id_old_to_new,
+                         document_store_->last_added_document_id());
+    if (!index_optimize_status.ok()) {
+      ICING_LOG(WARNING) << "Failed to optimize index. Error: "
+                         << index_optimize_status.error_message();
+      should_rebuild_index = true;
+    }
+
+    libtextclassifier3::Status integer_index_optimize_status =
+        integer_index_->Optimize(optimize_result.document_id_old_to_new,
+                                 document_store_->last_added_document_id());
+    if (!integer_index_optimize_status.ok()) {
+      ICING_LOG(WARNING) << "Failed to optimize integer index. Error: "
+                         << integer_index_optimize_status.error_message();
+      should_rebuild_index = true;
+    }
+
+    libtextclassifier3::Status qualified_id_join_index_optimize_status =
+        qualified_id_join_index_->Optimize(
+            optimize_result.document_id_old_to_new,
+            optimize_result.namespace_id_old_to_new,
+            document_store_->last_added_document_id());
+    if (!qualified_id_join_index_optimize_status.ok()) {
+      ICING_LOG(WARNING)
+          << "Failed to optimize qualified id join index. Error: "
+          << qualified_id_join_index_optimize_status.error_message();
+      should_rebuild_index = true;
+    }
   }
+  // If we received a DATA_LOSS error from OptimizeDocumentStore, we have a
+  // valid document store, but it might be the old one or the new one. So throw
+  // out the index data and rebuild from scratch.
+  // Also rebuild index if DocumentStore::OptimizeInto hints to do so.
+  // Likewise, if Index::Optimize failed, then attempt to recover the index by
+  // rebuilding from scratch.
+  // If ShouldRebuildIndex() returns true, we will also rebuild the index for
+  // better performance.
+  if (should_rebuild_index) {
+    optimize_stats->set_index_restoration_mode(
+        OptimizeStatsProto::FULL_INDEX_REBUILD);
+    ICING_LOG(WARNING) << "Clearing the entire index!";
+
+    libtextclassifier3::Status index_clear_status = ClearAllIndices();
+    if (!index_clear_status.ok()) {
+      status = absl_ports::Annotate(
+          absl_ports::InternalError("Failed to clear index."),
+          index_clear_status.error_message());
+      TransformStatus(status, result_status);
+      optimize_stats->set_index_restoration_latency_ms(
+          optimize_index_timer->GetElapsedMilliseconds());
+      return result_proto;
+    }
 
-  libtextclassifier3::Status index_restoration_status = RestoreIndex();
-  if (!index_restoration_status.ok()) {
-    status = absl_ports::Annotate(
-        absl_ports::InternalError(
-            "Failed to reindex documents after optimization."),
-        index_restoration_status.error_message());
+    IndexRestorationResult index_restoration_status = RestoreIndexIfNeeded();
+    // DATA_LOSS means that we have successfully re-added content to the index.
+    // Some indexed content was lost, but otherwise the index is in a valid
+    // state and can be queried.
+    if (!index_restoration_status.status.ok() &&
+        !absl_ports::IsDataLoss(index_restoration_status.status)) {
+      status = absl_ports::Annotate(
+          absl_ports::InternalError(
+              "Failed to reindex documents after optimization."),
+          index_restoration_status.status.error_message());
 
+      TransformStatus(status, result_status);
+      optimize_stats->set_index_restoration_latency_ms(
+          optimize_index_timer->GetElapsedMilliseconds());
+      return result_proto;
+    }
+  }
+  optimize_stats->set_index_restoration_latency_ms(
+      optimize_index_timer->GetElapsedMilliseconds());
+
+  // Read the optimize status to get the time that we last ran.
+  std::string optimize_status_filename =
+      absl_ports::StrCat(options_.base_dir(), "/", kOptimizeStatusFilename);
+  FileBackedProto<OptimizeStatusProto> optimize_status_file(
+      *filesystem_, optimize_status_filename);
+  auto optimize_status_or = optimize_status_file.Read();
+  int64_t current_time = clock_->GetSystemTimeMilliseconds();
+  if (optimize_status_or.ok()) {
+    // If we have trouble reading the status or this is the first time that
+    // we've ever run, don't set this field.
+    optimize_stats->set_time_since_last_optimize_ms(
+        current_time - optimize_status_or.ValueOrDie()
+                           ->last_successful_optimize_run_time_ms());
+  }
+
+  // Update the status for this run and write it.
+  auto optimize_status = std::make_unique<OptimizeStatusProto>();
+  optimize_status->set_last_successful_optimize_run_time_ms(current_time);
+  auto write_status = optimize_status_file.Write(std::move(optimize_status));
+  if (!write_status.ok()) {
+    ICING_LOG(ERROR) << "Failed to write optimize status:\n"
+                     << write_status.error_message();
+  }
+
+  // Flushes data to disk after doing optimization
+  status = InternalPersistToDisk(PersistType::FULL);
+  if (!status.ok()) {
     TransformStatus(status, result_status);
     return result_proto;
   }
 
-  TransformStatus(optimization_status, result_status);
+  int64_t after_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
+  optimize_stats->set_storage_size_after(
+      Filesystem::SanitizeFileSize(after_size));
+
+  TransformStatus(doc_store_optimize_result_status, result_status);
   return result_proto;
 }
 
@@ -815,6 +1773,22 @@ GetOptimizeInfoResultProto IcingSearchEngine::GetOptimizeInfo() {
     return result_proto;
   }
 
+  // Read the optimize status to get the time that we last ran.
+  std::string optimize_status_filename =
+      absl_ports::StrCat(options_.base_dir(), "/", kOptimizeStatusFilename);
+  FileBackedProto<OptimizeStatusProto> optimize_status_file(
+      *filesystem_, optimize_status_filename);
+  auto optimize_status_or = optimize_status_file.Read();
+  int64_t current_time = clock_->GetSystemTimeMilliseconds();
+
+  if (optimize_status_or.ok()) {
+    // If we have trouble reading the status or this is the first time that
+    // we've ever run, don't set this field.
+    result_proto.set_time_since_last_optimize_ms(
+        current_time - optimize_status_or.ValueOrDie()
+                           ->last_successful_optimize_run_time_ms());
+  }
+
   // Get stats from DocumentStore
   auto doc_store_optimize_info_or = document_store_->GetOptimizeInfo();
   if (!doc_store_optimize_info_or.ok()) {
@@ -840,6 +1814,8 @@ GetOptimizeInfoResultProto IcingSearchEngine::GetOptimizeInfo() {
   }
   int64_t index_elements_size = index_elements_size_or.ValueOrDie();
 
+  // TODO(b/259744228): add stats for integer index
+
   // Sum up the optimizable sizes from DocumentStore and Index
   result_proto.set_estimated_optimizable_bytes(
       index_elements_size * doc_store_optimize_info.optimizable_docs /
@@ -850,94 +1826,162 @@ GetOptimizeInfoResultProto IcingSearchEngine::GetOptimizeInfo() {
   return result_proto;
 }
 
-libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk() {
-  ICING_RETURN_IF_ERROR(schema_store_->PersistToDisk());
-  ICING_RETURN_IF_ERROR(document_store_->PersistToDisk());
-  ICING_RETURN_IF_ERROR(index_->PersistToDisk());
-
-  // Update the combined checksum and write to header file.
-  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
-  ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
-
-  return libtextclassifier3::Status::OK;
+StorageInfoResultProto IcingSearchEngine::GetStorageInfo() {
+  StorageInfoResultProto result;
+  absl_ports::shared_lock l(&mutex_);
+  if (!initialized_) {
+    result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
+    result.mutable_status()->set_message(
+        "IcingSearchEngine has not been initialized!");
+    return result;
+  }
+
+  int64_t index_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
+  result.mutable_storage_info()->set_total_storage_size(
+      Filesystem::SanitizeFileSize(index_size));
+  *result.mutable_storage_info()->mutable_document_storage_info() =
+      document_store_->GetStorageInfo();
+  *result.mutable_storage_info()->mutable_schema_store_storage_info() =
+      schema_store_->GetStorageInfo();
+  *result.mutable_storage_info()->mutable_index_storage_info() =
+      index_->GetStorageInfo();
+  // TODO(b/259744228): add stats for integer index
+  result.mutable_status()->set_code(StatusProto::OK);
+  return result;
 }
 
-libtextclassifier3::StatusOr<Crc32> IcingSearchEngine::ComputeChecksum() {
-  Crc32 total_checksum;
-  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
-  // that can support error logging.
-  auto checksum_or = schema_store_->ComputeChecksum();
-  if (!checksum_or.ok()) {
-    ICING_LOG(ERROR) << checksum_or.status().error_message()
-                     << "Failed to compute checksum of SchemaStore";
-    return checksum_or.status();
+DebugInfoResultProto IcingSearchEngine::GetDebugInfo(
+    DebugInfoVerbosity::Code verbosity) {
+  DebugInfoResultProto debug_info;
+  StatusProto* result_status = debug_info.mutable_status();
+  absl_ports::shared_lock l(&mutex_);
+  if (!initialized_) {
+    debug_info.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
+    debug_info.mutable_status()->set_message(
+        "IcingSearchEngine has not been initialized!");
+    return debug_info;
   }
 
-  Crc32 schema_store_checksum = std::move(checksum_or).ValueOrDie();
+  // Index
+  *debug_info.mutable_debug_info()->mutable_index_info() =
+      index_->GetDebugInfo(verbosity);
 
-  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
-  // that can support error logging.
-  checksum_or = document_store_->ComputeChecksum();
-  if (!checksum_or.ok()) {
-    ICING_LOG(ERROR) << checksum_or.status().error_message()
-                     << "Failed to compute checksum of DocumentStore";
-    return checksum_or.status();
+  // TODO(b/259744228): add debug info for integer index
+
+  // Document Store
+  libtextclassifier3::StatusOr<DocumentDebugInfoProto> document_debug_info =
+      document_store_->GetDebugInfo(verbosity);
+  if (!document_debug_info.ok()) {
+    TransformStatus(document_debug_info.status(), result_status);
+    return debug_info;
   }
-  Crc32 document_store_checksum = std::move(checksum_or).ValueOrDie();
+  *debug_info.mutable_debug_info()->mutable_document_info() =
+      std::move(document_debug_info).ValueOrDie();
 
-  Crc32 index_checksum = index_->ComputeChecksum();
+  // Schema Store
+  libtextclassifier3::StatusOr<SchemaDebugInfoProto> schema_debug_info =
+      schema_store_->GetDebugInfo();
+  if (!schema_debug_info.ok()) {
+    TransformStatus(schema_debug_info.status(), result_status);
+    return debug_info;
+  }
+  *debug_info.mutable_debug_info()->mutable_schema_info() =
+      std::move(schema_debug_info).ValueOrDie();
+
+  result_status->set_code(StatusProto::OK);
+  return debug_info;
+}
 
-  total_checksum.Append(std::to_string(document_store_checksum.Get()));
-  total_checksum.Append(std::to_string(schema_store_checksum.Get()));
-  total_checksum.Append(std::to_string(index_checksum.Get()));
+libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk(
+    PersistType::Code persist_type) {
+  if (persist_type == PersistType::LITE) {
+    return document_store_->PersistToDisk(persist_type);
+  }
+  ICING_RETURN_IF_ERROR(schema_store_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(document_store_->PersistToDisk(PersistType::FULL));
+  ICING_RETURN_IF_ERROR(index_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(integer_index_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(qualified_id_join_index_->PersistToDisk());
 
-  return total_checksum;
+  return libtextclassifier3::Status::OK;
 }
 
-bool IcingSearchEngine::HeaderExists() {
-  if (!filesystem_->FileExists(
-          MakeHeaderFilename(options_.base_dir()).c_str())) {
-    return false;
+SearchResultProto IcingSearchEngine::Search(
+    const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+    const ResultSpecProto& result_spec) {
+  if (search_spec.use_read_only_search()) {
+    return SearchLockedShared(search_spec, scoring_spec, result_spec);
+  } else {
+    return SearchLockedExclusive(search_spec, scoring_spec, result_spec);
   }
+}
+
+SearchResultProto IcingSearchEngine::SearchLockedShared(
+    const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+    const ResultSpecProto& result_spec) {
+  std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
+
+  // Only acquire an overall read-lock for this implementation. Finer-grained
+  // locks are implemented around code paths that write changes to Icing's data
+  // members.
+  absl_ports::shared_lock l(&mutex_);
+  int64_t lock_acquisition_latency = overall_timer->GetElapsedMilliseconds();
 
-  int64_t file_size =
-      filesystem_->GetFileSize(MakeHeaderFilename(options_.base_dir()).c_str());
+  SearchResultProto result_proto =
+      InternalSearch(search_spec, scoring_spec, result_spec);
 
-  // If it's been truncated to size 0 before, we consider it to be a new file
-  return file_size != 0 && file_size != Filesystem::kBadFileSize;
+  result_proto.mutable_query_stats()->set_lock_acquisition_latency_ms(
+      lock_acquisition_latency);
+  result_proto.mutable_query_stats()->set_latency_ms(
+      overall_timer->GetElapsedMilliseconds());
+  return result_proto;
 }
 
-libtextclassifier3::Status IcingSearchEngine::UpdateHeader(
-    const Crc32& checksum) {
-  // Write the header
-  IcingSearchEngine::Header header;
-  header.magic = IcingSearchEngine::Header::kMagic;
-  header.checksum = checksum.Get();
+SearchResultProto IcingSearchEngine::SearchLockedExclusive(
+    const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+    const ResultSpecProto& result_spec) {
+  std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
 
-  // This should overwrite the header.
-  if (!filesystem_->Write(MakeHeaderFilename(options_.base_dir()).c_str(),
-                          &header, sizeof(header))) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Failed to write IcingSearchEngine header: ",
-                           MakeHeaderFilename(options_.base_dir())));
-  }
-  return libtextclassifier3::Status::OK;
+  // Acquire the overall write-lock for this locked implementation.
+  absl_ports::unique_lock l(&mutex_);
+  int64_t lock_acquisition_latency = overall_timer->GetElapsedMilliseconds();
+
+  SearchResultProto result_proto =
+      InternalSearch(search_spec, scoring_spec, result_spec);
+
+  result_proto.mutable_query_stats()->set_lock_acquisition_latency_ms(
+      lock_acquisition_latency);
+  result_proto.mutable_query_stats()->set_latency_ms(
+      overall_timer->GetElapsedMilliseconds());
+  return result_proto;
 }
 
-SearchResultProto IcingSearchEngine::Search(
+SearchResultProto IcingSearchEngine::InternalSearch(
     const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
     const ResultSpecProto& result_spec) {
   SearchResultProto result_proto;
   StatusProto* result_status = result_proto.mutable_status();
-  // TODO(b/146008613) Explore ideas to make this function read-only.
-  absl_ports::unique_lock l(&mutex_);
+
+  QueryStatsProto* query_stats = result_proto.mutable_query_stats();
+  query_stats->set_is_first_page(true);
+  query_stats->set_requested_page_size(result_spec.num_per_page());
+
+  // TODO(b/305098009): deprecate search-related flat fields in query_stats.
+  query_stats->set_num_namespaces_filtered(
+      search_spec.namespace_filters_size());
+  query_stats->set_num_schema_types_filtered(
+      search_spec.schema_type_filters_size());
+  query_stats->set_query_length(search_spec.query().length());
+  query_stats->set_ranking_strategy(scoring_spec.rank_by());
+
   if (!initialized_) {
     result_status->set_code(StatusProto::FAILED_PRECONDITION);
     result_status->set_message("IcingSearchEngine has not been initialized!");
     return result_proto;
   }
 
-  libtextclassifier3::Status status = ValidateResultSpec(result_spec);
+  libtextclassifier3::Status status =
+      ValidateResultSpec(document_store_.get(), result_spec);
   if (!status.ok()) {
     TransformStatus(status, result_status);
     return result_proto;
@@ -948,153 +1992,337 @@ SearchResultProto IcingSearchEngine::Search(
     return result_proto;
   }
 
-  // Gets unordered results from query processor
-  auto query_processor_or = QueryProcessor::Create(
-      index_.get(), language_segmenter_.get(), normalizer_.get(),
-      document_store_.get(), schema_store_.get(), clock_.get());
-  if (!query_processor_or.ok()) {
-    TransformStatus(query_processor_or.status(), result_status);
-    return result_proto;
-  }
-  std::unique_ptr<QueryProcessor> query_processor =
-      std::move(query_processor_or).ValueOrDie();
-
-  auto query_results_or = query_processor->ParseSearch(search_spec);
-  if (!query_results_or.ok()) {
-    TransformStatus(query_results_or.status(), result_status);
-    return result_proto;
-  }
-  QueryProcessor::QueryResults query_results =
-      std::move(query_results_or).ValueOrDie();
+  const JoinSpecProto& join_spec = search_spec.join_spec();
+  std::unique_ptr<JoinChildrenFetcher> join_children_fetcher;
+  std::unique_ptr<ResultAdjustmentInfo> child_result_adjustment_info;
+  int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
+  if (!join_spec.parent_property_expression().empty() &&
+      !join_spec.child_property_expression().empty()) {
+    query_stats->set_is_join_query(true);
+    QueryStatsProto::SearchStats* child_search_stats =
+        query_stats->mutable_child_search_stats();
+
+    // Process child query
+    QueryScoringResults nested_query_scoring_results = ProcessQueryAndScore(
+        join_spec.nested_spec().search_spec(),
+        join_spec.nested_spec().scoring_spec(),
+        join_spec.nested_spec().result_spec(),
+        /*join_children_fetcher=*/nullptr, current_time_ms, child_search_stats);
+    if (!nested_query_scoring_results.status.ok()) {
+      TransformStatus(nested_query_scoring_results.status, result_status);
+      return result_proto;
+    }
 
-  // Scores but does not rank the results.
-  libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
-      scoring_processor_or =
-          ScoringProcessor::Create(scoring_spec, document_store_.get());
-  if (!scoring_processor_or.ok()) {
-    TransformStatus(scoring_processor_or.status(), result_status);
+    JoinProcessor join_processor(document_store_.get(), schema_store_.get(),
+                                 qualified_id_join_index_.get(),
+                                 current_time_ms);
+    // Building a JoinChildrenFetcher where child documents are grouped by
+    // their joinable values.
+    libtextclassifier3::StatusOr<JoinChildrenFetcher> join_children_fetcher_or =
+        join_processor.GetChildrenFetcher(
+            search_spec.join_spec(),
+            std::move(nested_query_scoring_results.scored_document_hits));
+    if (!join_children_fetcher_or.ok()) {
+      TransformStatus(join_children_fetcher_or.status(), result_status);
+      return result_proto;
+    }
+    join_children_fetcher = std::make_unique<JoinChildrenFetcher>(
+        std::move(join_children_fetcher_or).ValueOrDie());
+
+    // Assign child's ResultAdjustmentInfo.
+    child_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>(
+        join_spec.nested_spec().search_spec(),
+        join_spec.nested_spec().scoring_spec(),
+        join_spec.nested_spec().result_spec(), schema_store_.get(),
+        std::move(nested_query_scoring_results.query_terms));
+  }
+
+  // Process parent query
+  QueryStatsProto::SearchStats* parent_search_stats =
+      query_stats->mutable_parent_search_stats();
+  QueryScoringResults query_scoring_results = ProcessQueryAndScore(
+      search_spec, scoring_spec, result_spec, join_children_fetcher.get(),
+      current_time_ms, parent_search_stats);
+  // TODO(b/305098009): deprecate search-related flat fields in query_stats.
+  query_stats->set_num_terms(parent_search_stats->num_terms());
+  query_stats->set_parse_query_latency_ms(
+      parent_search_stats->parse_query_latency_ms());
+  query_stats->set_scoring_latency_ms(
+      parent_search_stats->scoring_latency_ms());
+  query_stats->set_num_documents_scored(
+      parent_search_stats->num_documents_scored());
+  if (!query_scoring_results.status.ok()) {
+    TransformStatus(query_scoring_results.status, result_status);
     return result_proto;
   }
-  std::unique_ptr<ScoringProcessor> scoring_processor =
-      std::move(scoring_processor_or).ValueOrDie();
-  std::vector<ScoredDocumentHit> result_document_hits =
-      scoring_processor->Score(std::move(query_results.root_iterator),
-                               performance_configuration_.num_to_score);
 
   // Returns early for empty result
-  if (result_document_hits.empty()) {
+  if (query_scoring_results.scored_document_hits.empty()) {
     result_status->set_code(StatusProto::OK);
     return result_proto;
   }
 
-  // Ranks and paginates results
-  libtextclassifier3::StatusOr<PageResultState> page_result_state_or =
-      result_state_manager_.RankAndPaginate(ResultState(
-          std::move(result_document_hits), std::move(query_results.query_terms),
-          search_spec, scoring_spec, result_spec));
-  if (!page_result_state_or.ok()) {
-    TransformStatus(page_result_state_or.status(), result_status);
-    return result_proto;
-  }
-  PageResultState page_result_state =
-      std::move(page_result_state_or).ValueOrDie();
-
-  // Retrieves the document protos and snippets if requested
+  // Construct parent's result adjustment info.
+  auto parent_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>(
+      search_spec, scoring_spec, result_spec, schema_store_.get(),
+      std::move(query_scoring_results.query_terms));
+
+  std::unique_ptr<ScoredDocumentHitsRanker> ranker;
+  if (join_children_fetcher != nullptr) {
+    std::unique_ptr<Timer> join_timer = clock_->GetNewTimer();
+    // Join 2 scored document hits
+    JoinProcessor join_processor(document_store_.get(), schema_store_.get(),
+                                 qualified_id_join_index_.get(),
+                                 current_time_ms);
+    libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>>
+        joined_result_document_hits_or = join_processor.Join(
+            join_spec, std::move(query_scoring_results.scored_document_hits),
+            *join_children_fetcher);
+    if (!joined_result_document_hits_or.ok()) {
+      TransformStatus(joined_result_document_hits_or.status(), result_status);
+      return result_proto;
+    }
+    std::vector<JoinedScoredDocumentHit> joined_result_document_hits =
+        std::move(joined_result_document_hits_or).ValueOrDie();
+
+    query_stats->set_join_latency_ms(join_timer->GetElapsedMilliseconds());
+
+    std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
+    // Ranks results
+    ranker = std::make_unique<
+        PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
+        std::move(joined_result_document_hits),
+        /*is_descending=*/scoring_spec.order_by() ==
+            ScoringSpecProto::Order::DESC);
+    query_stats->set_ranking_latency_ms(
+        component_timer->GetElapsedMilliseconds());
+  } else {
+    // Non-join query
+    std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
+    // Ranks results
+    ranker = std::make_unique<
+        PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+        std::move(query_scoring_results.scored_document_hits),
+        /*is_descending=*/scoring_spec.order_by() ==
+            ScoringSpecProto::Order::DESC);
+    query_stats->set_ranking_latency_ms(
+        component_timer->GetElapsedMilliseconds());
+  }
+
+  std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
+  // CacheAndRetrieveFirstPage and retrieves the document protos and snippets if
+  // requested
   auto result_retriever_or =
-      ResultRetriever::Create(document_store_.get(), schema_store_.get(),
-                              language_segmenter_.get(), normalizer_.get());
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get());
   if (!result_retriever_or.ok()) {
-    result_state_manager_.InvalidateResultState(
-        page_result_state.next_page_token);
     TransformStatus(result_retriever_or.status(), result_status);
+    query_stats->set_document_retrieval_latency_ms(
+        component_timer->GetElapsedMilliseconds());
     return result_proto;
   }
-  std::unique_ptr<ResultRetriever> result_retriever =
+  std::unique_ptr<ResultRetrieverV2> result_retriever =
       std::move(result_retriever_or).ValueOrDie();
 
-  libtextclassifier3::StatusOr<std::vector<SearchResultProto::ResultProto>>
-      results_or = result_retriever->RetrieveResults(page_result_state);
-  if (!results_or.ok()) {
-    result_state_manager_.InvalidateResultState(
-        page_result_state.next_page_token);
-    TransformStatus(results_or.status(), result_status);
+  libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
+      page_result_info_or = result_state_manager_->CacheAndRetrieveFirstPage(
+          std::move(ranker), std::move(parent_result_adjustment_info),
+          std::move(child_result_adjustment_info), result_spec,
+          *document_store_, *result_retriever, current_time_ms);
+  if (!page_result_info_or.ok()) {
+    TransformStatus(page_result_info_or.status(), result_status);
+    query_stats->set_document_retrieval_latency_ms(
+        component_timer->GetElapsedMilliseconds());
     return result_proto;
   }
-  std::vector<SearchResultProto::ResultProto> results =
-      std::move(results_or).ValueOrDie();
+  std::pair<uint64_t, PageResult> page_result_info =
+      std::move(page_result_info_or).ValueOrDie();
 
   // Assembles the final search result proto
-  result_proto.mutable_results()->Reserve(results.size());
-  for (SearchResultProto::ResultProto& result : results) {
+  result_proto.mutable_results()->Reserve(
+      page_result_info.second.results.size());
+
+  int32_t child_count = 0;
+  for (SearchResultProto::ResultProto& result :
+       page_result_info.second.results) {
+    child_count += result.joined_results_size();
     result_proto.mutable_results()->Add(std::move(result));
   }
+
   result_status->set_code(StatusProto::OK);
-  if (page_result_state.next_page_token != kInvalidNextPageToken) {
-    result_proto.set_next_page_token(page_result_state.next_page_token);
+  if (page_result_info.first != kInvalidNextPageToken) {
+    result_proto.set_next_page_token(page_result_info.first);
   }
+
+  query_stats->set_document_retrieval_latency_ms(
+      component_timer->GetElapsedMilliseconds());
+  query_stats->set_num_results_returned_current_page(
+      result_proto.results_size());
+
+  query_stats->set_num_joined_results_returned_current_page(child_count);
+
+  query_stats->set_num_results_with_snippets(
+      page_result_info.second.num_results_with_snippets);
   return result_proto;
 }
 
+IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
+    const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+    const ResultSpecProto& result_spec,
+    const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms,
+    QueryStatsProto::SearchStats* search_stats) {
+  search_stats->set_num_namespaces_filtered(
+      search_spec.namespace_filters_size());
+  search_stats->set_num_schema_types_filtered(
+      search_spec.schema_type_filters_size());
+  search_stats->set_query_length(search_spec.query().length());
+  search_stats->set_ranking_strategy(scoring_spec.rank_by());
+
+  std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
+
+  // Gets unordered results from query processor
+  auto query_processor_or = QueryProcessor::Create(
+      index_.get(), integer_index_.get(), language_segmenter_.get(),
+      normalizer_.get(), document_store_.get(), schema_store_.get());
+  if (!query_processor_or.ok()) {
+    search_stats->set_parse_query_latency_ms(
+        component_timer->GetElapsedMilliseconds());
+    return QueryScoringResults(std::move(query_processor_or).status(),
+                               /*query_terms_in=*/{},
+                               /*scored_document_hits_in=*/{});
+  }
+  std::unique_ptr<QueryProcessor> query_processor =
+      std::move(query_processor_or).ValueOrDie();
+
+  auto ranking_strategy_or = GetRankingStrategyFromScoringSpec(scoring_spec);
+  libtextclassifier3::StatusOr<QueryResults> query_results_or;
+  if (ranking_strategy_or.ok()) {
+    query_results_or = query_processor->ParseSearch(
+        search_spec, ranking_strategy_or.ValueOrDie(), current_time_ms);
+  } else {
+    query_results_or = ranking_strategy_or.status();
+  }
+  search_stats->set_parse_query_latency_ms(
+      component_timer->GetElapsedMilliseconds());
+  if (!query_results_or.ok()) {
+    return QueryScoringResults(std::move(query_results_or).status(),
+                               /*query_terms_in=*/{},
+                               /*scored_document_hits_in=*/{});
+  }
+  QueryResults query_results = std::move(query_results_or).ValueOrDie();
+
+  // Set SearchStats related to QueryResults.
+  int term_count = 0;
+  for (const auto& section_and_terms : query_results.query_terms) {
+    term_count += section_and_terms.second.size();
+  }
+  search_stats->set_num_terms(term_count);
+
+  if (query_results.features_in_use.count(kNumericSearchFeature)) {
+    search_stats->set_is_numeric_query(true);
+  }
+
+  component_timer = clock_->GetNewTimer();
+  // Scores but does not rank the results.
+  libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
+      scoring_processor_or = ScoringProcessor::Create(
+          scoring_spec, document_store_.get(), schema_store_.get(),
+          current_time_ms, join_children_fetcher);
+  if (!scoring_processor_or.ok()) {
+    return QueryScoringResults(std::move(scoring_processor_or).status(),
+                               std::move(query_results.query_terms),
+                               /*scored_document_hits_in=*/{});
+  }
+  std::unique_ptr<ScoringProcessor> scoring_processor =
+      std::move(scoring_processor_or).ValueOrDie();
+  std::vector<ScoredDocumentHit> scored_document_hits =
+      scoring_processor->Score(
+          std::move(query_results.root_iterator), result_spec.num_to_score(),
+          &query_results.query_term_iterators, search_stats);
+  search_stats->set_scoring_latency_ms(
+      component_timer->GetElapsedMilliseconds());
+
+  return QueryScoringResults(libtextclassifier3::Status::OK,
+                             std::move(query_results.query_terms),
+                             std::move(scored_document_hits));
+}
+
 SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
   SearchResultProto result_proto;
   StatusProto* result_status = result_proto.mutable_status();
 
+  QueryStatsProto* query_stats = result_proto.mutable_query_stats();
+  query_stats->set_is_first_page(false);
+  std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
   // ResultStateManager has its own writer lock, so here we only need a reader
   // lock for other components.
   absl_ports::shared_lock l(&mutex_);
+  query_stats->set_lock_acquisition_latency_ms(
+      overall_timer->GetElapsedMilliseconds());
   if (!initialized_) {
     result_status->set_code(StatusProto::FAILED_PRECONDITION);
     result_status->set_message("IcingSearchEngine has not been initialized!");
     return result_proto;
   }
 
-  libtextclassifier3::StatusOr<PageResultState> page_result_state_or =
-      result_state_manager_.GetNextPage(next_page_token);
-
-  if (!page_result_state_or.ok()) {
-    if (absl_ports::IsNotFound(page_result_state_or.status())) {
-      // NOT_FOUND means an empty result.
-      result_status->set_code(StatusProto::OK);
-    } else {
-      // Real error, pass up.
-      TransformStatus(page_result_state_or.status(), result_status);
-    }
-    return result_proto;
-  }
-
-  PageResultState page_result_state =
-      std::move(page_result_state_or).ValueOrDie();
-
-  // Retrieves the document protos.
   auto result_retriever_or =
-      ResultRetriever::Create(document_store_.get(), schema_store_.get(),
-                              language_segmenter_.get(), normalizer_.get());
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get());
   if (!result_retriever_or.ok()) {
     TransformStatus(result_retriever_or.status(), result_status);
     return result_proto;
   }
-  std::unique_ptr<ResultRetriever> result_retriever =
+  std::unique_ptr<ResultRetrieverV2> result_retriever =
       std::move(result_retriever_or).ValueOrDie();
 
-  libtextclassifier3::StatusOr<std::vector<SearchResultProto::ResultProto>>
-      results_or = result_retriever->RetrieveResults(page_result_state);
-  if (!results_or.ok()) {
-    TransformStatus(results_or.status(), result_status);
+  int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
+  libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
+      page_result_info_or = result_state_manager_->GetNextPage(
+          next_page_token, *result_retriever, current_time_ms);
+  if (!page_result_info_or.ok()) {
+    if (absl_ports::IsNotFound(page_result_info_or.status())) {
+      // NOT_FOUND means an empty result.
+      result_status->set_code(StatusProto::OK);
+    } else {
+      // Real error, pass up.
+      TransformStatus(page_result_info_or.status(), result_status);
+    }
     return result_proto;
   }
-  std::vector<SearchResultProto::ResultProto> results =
-      std::move(results_or).ValueOrDie();
+
+  std::pair<uint64_t, PageResult> page_result_info =
+      std::move(page_result_info_or).ValueOrDie();
+  query_stats->set_requested_page_size(
+      page_result_info.second.requested_page_size);
 
   // Assembles the final search result proto
-  result_proto.mutable_results()->Reserve(results.size());
-  for (SearchResultProto::ResultProto& result : results) {
+  result_proto.mutable_results()->Reserve(
+      page_result_info.second.results.size());
+
+  int32_t child_count = 0;
+  for (SearchResultProto::ResultProto& result :
+       page_result_info.second.results) {
+    child_count += result.joined_results_size();
     result_proto.mutable_results()->Add(std::move(result));
   }
 
   result_status->set_code(StatusProto::OK);
-  if (result_proto.results_size() > 0) {
-    result_proto.set_next_page_token(next_page_token);
-  }
+  if (page_result_info.first != kInvalidNextPageToken) {
+    result_proto.set_next_page_token(page_result_info.first);
+  }
+
+  // The only thing that we're doing is document retrieval. So document
+  // retrieval latency and overall latency are the same and can use the same
+  // timer.
+  query_stats->set_document_retrieval_latency_ms(
+      overall_timer->GetElapsedMilliseconds());
+  query_stats->set_latency_ms(overall_timer->GetElapsedMilliseconds());
+  query_stats->set_num_results_returned_current_page(
+      result_proto.results_size());
+  query_stats->set_num_results_with_snippets(
+      page_result_info.second.num_results_with_snippets);
+  query_stats->set_num_joined_results_returned_current_page(child_count);
+
   return result_proto;
 }
 
@@ -1104,10 +2332,11 @@ void IcingSearchEngine::InvalidateNextPageToken(uint64_t next_page_token) {
     ICING_LOG(ERROR) << "IcingSearchEngine has not been initialized!";
     return;
   }
-  result_state_manager_.InvalidateResultState(next_page_token);
+  result_state_manager_->InvalidateResultState(next_page_token);
 }
 
-libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
+libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
+IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
   // Gets the current directory path and an empty tmp directory path for
   // document store optimization.
   const std::string current_document_dir =
@@ -1123,17 +2352,21 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
   }
 
   // Copies valid document data to tmp directory
-  auto optimize_status = document_store_->OptimizeInto(temporary_document_dir);
+  libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
+      optimize_result_or = document_store_->OptimizeInto(
+          temporary_document_dir, language_segmenter_.get(), optimize_stats);
 
   // Handles error if any
-  if (!optimize_status.ok()) {
+  if (!optimize_result_or.ok()) {
     filesystem_->DeleteDirectoryRecursively(temporary_document_dir.c_str());
     return absl_ports::Annotate(
         absl_ports::AbortedError("Failed to optimize document store"),
-        optimize_status.error_message());
+        optimize_result_or.status().error_message());
   }
 
-  // Resets before swapping
+  // result_state_manager_ depends on document_store_. So we need to reset it at
+  // the same time that we reset the document_store_.
+  result_state_manager_.reset();
   document_store_.reset();
 
   // When swapping files, always put the current working directory at the
@@ -1146,24 +2379,35 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
     // Ensures that current directory is still present.
     if (!filesystem_->CreateDirectoryRecursively(
             current_document_dir.c_str())) {
+      // Can't even create the old directory. Mark as uninitialized and return
+      // INTERNAL.
+      initialized_ = false;
       return absl_ports::InternalError(
           "Failed to create file directory for document store");
     }
 
     // Tries to rebuild document store if swapping fails, to avoid leaving the
     // system in the broken state for future operations.
-    auto document_store_or =
-        DocumentStore::Create(filesystem_.get(), current_document_dir,
-                              clock_.get(), schema_store_.get());
+    auto create_result_or = DocumentStore::Create(
+        filesystem_.get(), current_document_dir, clock_.get(),
+        schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
+        options_.document_store_namespace_id_fingerprint(),
+        options_.pre_mapping_fbv(), options_.use_persistent_hash_map(),
+        options_.compression_level(), /*initialize_stats=*/nullptr);
     // TODO(b/144458732): Implement a more robust version of
     // TC_ASSIGN_OR_RETURN that can support error logging.
-    if (!document_store_or.ok()) {
+    if (!create_result_or.ok()) {
+      // Unable to create DocumentStore from the old file. Mark as uninitialized
+      // and return INTERNAL.
+      initialized_ = false;
       ICING_LOG(ERROR) << "Failed to create document store instance";
       return absl_ports::Annotate(
           absl_ports::InternalError("Failed to create document store instance"),
-          document_store_or.status().error_message());
+          create_result_or.status().error_message());
     }
-    document_store_ = std::move(document_store_or).ValueOrDie();
+    document_store_ = std::move(create_result_or.ValueOrDie().document_store);
+    result_state_manager_ = std::make_unique<ResultStateManager>(
+        performance_configuration_.max_num_total_hits, *document_store_);
 
     // Potential data loss
     // TODO(b/147373249): Find a way to detect true data loss error
@@ -1172,13 +2416,25 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
   }
 
   // Recreates the doc store instance
-  ICING_ASSIGN_OR_RETURN(
-      document_store_,
-      DocumentStore::Create(filesystem_.get(), current_document_dir,
-                            clock_.get(), schema_store_.get()),
-      absl_ports::InternalError(
-          "Document store has been optimized, but a valid document store "
-          "instance can't be created"));
+  auto create_result_or = DocumentStore::Create(
+      filesystem_.get(), current_document_dir, clock_.get(),
+      schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
+      options_.document_store_namespace_id_fingerprint(),
+      options_.pre_mapping_fbv(), options_.use_persistent_hash_map(),
+      options_.compression_level(), /*initialize_stats=*/nullptr);
+  if (!create_result_or.ok()) {
+    // Unable to create DocumentStore from the new file. Mark as uninitialized
+    // and return INTERNAL.
+    initialized_ = false;
+    return absl_ports::InternalError(
+        "Document store has been optimized, but a valid document store "
+        "instance can't be created");
+  }
+  DocumentStore::CreateResult create_result =
+      std::move(create_result_or).ValueOrDie();
+  document_store_ = std::move(create_result.document_store);
+  result_state_manager_ = std::make_unique<ResultStateManager>(
+      performance_configuration_.max_num_total_hits, *document_store_);
 
   // Deletes tmp directory
   if (!filesystem_->DeleteDirectoryRecursively(
@@ -1187,26 +2443,70 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
                         "delete temporary file directory";
   }
 
-  return libtextclassifier3::Status::OK;
+  // Since we created new (optimized) document store with correct PersistToDisk
+  // call, we shouldn't have data loss or regenerate derived files. Therefore,
+  // if we really encounter any of these situations, then return DataLossError
+  // to let the caller rebuild index.
+  if (create_result.data_loss != DataLoss::NONE ||
+      create_result.derived_files_regenerated) {
+    return absl_ports::DataLossError(
+        "Unexpected data loss or derived files regenerated for new document "
+        "store");
+  }
+
+  return optimize_result_or;
 }
 
-libtextclassifier3::Status IcingSearchEngine::RestoreIndex() {
+IcingSearchEngine::IndexRestorationResult
+IcingSearchEngine::RestoreIndexIfNeeded() {
   DocumentId last_stored_document_id =
       document_store_->last_added_document_id();
-
-  if (last_stored_document_id == kInvalidDocumentId) {
-    // Nothing to index
-    return libtextclassifier3::Status::OK;
+  if (last_stored_document_id == index_->last_added_document_id() &&
+      last_stored_document_id == integer_index_->last_added_document_id() &&
+      last_stored_document_id ==
+          qualified_id_join_index_->last_added_document_id()) {
+    // No need to recover.
+    return {libtextclassifier3::Status::OK, false, false, false};
   }
 
-  ICING_ASSIGN_OR_RETURN(
-      std::unique_ptr<IndexProcessor> index_processor,
-      IndexProcessor::Create(schema_store_.get(), language_segmenter_.get(),
-                             normalizer_.get(), index_.get(),
-                             CreateIndexProcessorOptions(options_)));
-
-  for (DocumentId document_id = kMinDocumentId;
-       document_id <= last_stored_document_id; document_id++) {
+  if (last_stored_document_id == kInvalidDocumentId) {
+    // Document store is empty but index is not. Clear the index.
+    return {ClearAllIndices(), false, false, false};
+  }
+
+  // Truncate indices first.
+  auto truncate_result_or = TruncateIndicesTo(last_stored_document_id);
+  if (!truncate_result_or.ok()) {
+    return {std::move(truncate_result_or).status(), false, false, false};
+  }
+  TruncateIndexResult truncate_result =
+      std::move(truncate_result_or).ValueOrDie();
+
+  if (truncate_result.first_document_to_reindex > last_stored_document_id) {
+    // Nothing to restore. Just return.
+    return {libtextclassifier3::Status::OK, false, false, false};
+  }
+
+  auto data_indexing_handlers_or = CreateDataIndexingHandlers();
+  if (!data_indexing_handlers_or.ok()) {
+    return {data_indexing_handlers_or.status(),
+            truncate_result.index_needed_restoration,
+            truncate_result.integer_index_needed_restoration,
+            truncate_result.qualified_id_join_index_needed_restoration};
+  }
+  // By using recovery_mode for IndexProcessor, we're able to replay documents
+  // from smaller document id and it will skip documents that are already been
+  // indexed.
+  IndexProcessor index_processor(
+      std::move(data_indexing_handlers_or).ValueOrDie(), clock_.get(),
+      /*recovery_mode=*/true);
+
+  ICING_VLOG(1) << "Restoring index by replaying documents from document id "
+                << truncate_result.first_document_to_reindex
+                << " to document id " << last_stored_document_id;
+  libtextclassifier3::Status overall_status;
+  for (DocumentId document_id = truncate_result.first_document_to_reindex;
+       document_id <= last_stored_document_id; ++document_id) {
     libtextclassifier3::StatusOr<DocumentProto> document_or =
         document_store_->Get(document_id);
 
@@ -1217,15 +2517,45 @@ libtextclassifier3::Status IcingSearchEngine::RestoreIndex() {
         continue;
       } else {
         // Returns other errors
-        return document_or.status();
+        return {document_or.status(), truncate_result.index_needed_restoration,
+                truncate_result.integer_index_needed_restoration,
+                truncate_result.qualified_id_join_index_needed_restoration};
       }
     }
+    DocumentProto document(std::move(document_or).ValueOrDie());
+
+    libtextclassifier3::StatusOr<TokenizedDocument> tokenized_document_or =
+        TokenizedDocument::Create(schema_store_.get(),
+                                  language_segmenter_.get(),
+                                  std::move(document));
+    if (!tokenized_document_or.ok()) {
+      return {tokenized_document_or.status(),
+              truncate_result.index_needed_restoration,
+              truncate_result.integer_index_needed_restoration,
+              truncate_result.qualified_id_join_index_needed_restoration};
+    }
+    TokenizedDocument tokenized_document(
+        std::move(tokenized_document_or).ValueOrDie());
 
-    ICING_RETURN_IF_ERROR(
-        index_processor->IndexDocument(document_or.ValueOrDie(), document_id));
+    libtextclassifier3::Status status =
+        index_processor.IndexDocument(tokenized_document, document_id);
+    if (!status.ok()) {
+      if (!absl_ports::IsDataLoss(status)) {
+        // Real error. Stop recovering and pass it up.
+        return {status, truncate_result.index_needed_restoration,
+                truncate_result.integer_index_needed_restoration,
+                truncate_result.qualified_id_join_index_needed_restoration};
+      }
+      // FIXME: why can we skip data loss error here?
+      // Just a data loss. Keep trying to add the remaining docs, but report the
+      // data loss when we're done.
+      overall_status = status;
+    }
   }
 
-  return libtextclassifier3::Status::OK;
+  return {overall_status, truncate_result.index_needed_restoration,
+          truncate_result.integer_index_needed_restoration,
+          truncate_result.qualified_id_join_index_needed_restoration};
 }
 
 libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
@@ -1251,30 +2581,199 @@ libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
   return document_store_->last_added_document_id() != kInvalidDocumentId;
 }
 
+libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DataIndexingHandler>>>
+IcingSearchEngine::CreateDataIndexingHandlers() {
+  std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+
+  // Term index handler
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+      TermIndexingHandler::Create(
+          clock_.get(), normalizer_.get(), index_.get(),
+          options_.build_property_existence_metadata_hits()));
+  handlers.push_back(std::move(term_indexing_handler));
+
+  // Integer index handler
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<IntegerSectionIndexingHandler>
+                             integer_section_indexing_handler,
+                         IntegerSectionIndexingHandler::Create(
+                             clock_.get(), integer_index_.get()));
+  handlers.push_back(std::move(integer_section_indexing_handler));
+
+  // Qualified id join index handler
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler>
+          qualified_id_join_indexing_handler,
+      QualifiedIdJoinIndexingHandler::Create(
+          clock_.get(), document_store_.get(), qualified_id_join_index_.get()));
+  handlers.push_back(std::move(qualified_id_join_indexing_handler));
+
+  return handlers;
+}
+
+libtextclassifier3::StatusOr<IcingSearchEngine::TruncateIndexResult>
+IcingSearchEngine::TruncateIndicesTo(DocumentId last_stored_document_id) {
+  // Attempt to truncate term index.
+  // TruncateTo ensures that the index does not hold any data that is not
+  // present in the ground truth. If the document store lost some documents,
+  // TruncateTo will ensure that the index does not contain any hits from those
+  // lost documents. If the index does not contain any hits for documents with
+  // document id greater than last_stored_document_id, then TruncateTo will have
+  // no effect.
+  ICING_RETURN_IF_ERROR(index_->TruncateTo(last_stored_document_id));
+
+  // Get last indexed document id for term index after truncating.
+  DocumentId term_index_last_added_document_id =
+      index_->last_added_document_id();
+  DocumentId first_document_to_reindex =
+      (term_index_last_added_document_id != kInvalidDocumentId)
+          ? term_index_last_added_document_id + 1
+          : kMinDocumentId;
+  bool index_needed_restoration =
+      (last_stored_document_id != term_index_last_added_document_id);
+
+  // Attempt to truncate integer index.
+  bool integer_index_needed_restoration = false;
+  DocumentId integer_index_last_added_document_id =
+      integer_index_->last_added_document_id();
+  if (integer_index_last_added_document_id == kInvalidDocumentId ||
+      last_stored_document_id > integer_index_last_added_document_id) {
+    // If last_stored_document_id is greater than
+    // integer_index_last_added_document_id, then we only have to replay docs
+    // starting from integer_index_last_added_document_id + 1. Also use std::min
+    // since we might need to replay even smaller doc ids for term index.
+    integer_index_needed_restoration = true;
+    if (integer_index_last_added_document_id != kInvalidDocumentId) {
+      first_document_to_reindex = std::min(
+          first_document_to_reindex, integer_index_last_added_document_id + 1);
+    } else {
+      first_document_to_reindex = kMinDocumentId;
+    }
+  } else if (last_stored_document_id < integer_index_last_added_document_id) {
+    // Clear the entire integer index if last_stored_document_id is smaller than
+    // integer_index_last_added_document_id, because there is no way to remove
+    // data with doc_id > last_stored_document_id from integer index and we have
+    // to rebuild.
+    ICING_RETURN_IF_ERROR(integer_index_->Clear());
+
+    // Since the entire integer index is discarded, we start to rebuild it by
+    // setting first_document_to_reindex to kMinDocumentId.
+    integer_index_needed_restoration = true;
+    first_document_to_reindex = kMinDocumentId;
+  }
+
+  // Attempt to truncate qualified id join index
+  bool qualified_id_join_index_needed_restoration = false;
+  DocumentId qualified_id_join_index_last_added_document_id =
+      qualified_id_join_index_->last_added_document_id();
+  if (qualified_id_join_index_last_added_document_id == kInvalidDocumentId ||
+      last_stored_document_id >
+          qualified_id_join_index_last_added_document_id) {
+    // If last_stored_document_id is greater than
+    // qualified_id_join_index_last_added_document_id, then we only have to
+    // replay docs starting from (qualified_id_join_index_last_added_document_id
+    // + 1). Also use std::min since we might need to replay even smaller doc
+    // ids for other components.
+    qualified_id_join_index_needed_restoration = true;
+    if (qualified_id_join_index_last_added_document_id != kInvalidDocumentId) {
+      first_document_to_reindex =
+          std::min(first_document_to_reindex,
+                   qualified_id_join_index_last_added_document_id + 1);
+    } else {
+      first_document_to_reindex = kMinDocumentId;
+    }
+  } else if (last_stored_document_id <
+             qualified_id_join_index_last_added_document_id) {
+    // Clear the entire qualified id join index if last_stored_document_id is
+    // smaller than qualified_id_join_index_last_added_document_id, because
+    // there is no way to remove data with doc_id > last_stored_document_id from
+    // join index efficiently and we have to rebuild.
+    ICING_RETURN_IF_ERROR(qualified_id_join_index_->Clear());
+
+    // Since the entire qualified id join index is discarded, we start to
+    // rebuild it by setting first_document_to_reindex to kMinDocumentId.
+    qualified_id_join_index_needed_restoration = true;
+    first_document_to_reindex = kMinDocumentId;
+  }
+
+  return TruncateIndexResult(first_document_to_reindex,
+                             index_needed_restoration,
+                             integer_index_needed_restoration,
+                             qualified_id_join_index_needed_restoration);
+}
+
+libtextclassifier3::Status IcingSearchEngine::DiscardDerivedFiles() {
+  if (schema_store_ != nullptr || document_store_ != nullptr ||
+      index_ != nullptr || integer_index_ != nullptr ||
+      qualified_id_join_index_ != nullptr) {
+    return absl_ports::FailedPreconditionError(
+        "Cannot discard derived files while having valid instances");
+  }
+
+  // Schema store
+  ICING_RETURN_IF_ERROR(
+      SchemaStore::DiscardDerivedFiles(filesystem_.get(), options_.base_dir()));
+
+  // Document store
+  ICING_RETURN_IF_ERROR(DocumentStore::DiscardDerivedFiles(
+      filesystem_.get(), options_.base_dir()));
+
+  // Term index
+  if (!filesystem_->DeleteDirectoryRecursively(
+          MakeIndexDirectoryPath(options_.base_dir()).c_str())) {
+    return absl_ports::InternalError("Failed to discard index");
+  }
+
+  // Integer index
+  if (!filesystem_->DeleteDirectoryRecursively(
+          MakeIntegerIndexWorkingPath(options_.base_dir()).c_str())) {
+    return absl_ports::InternalError("Failed to discard integer index");
+  }
+
+  // Qualified id join index
+  if (!filesystem_->DeleteDirectoryRecursively(
+          MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir()).c_str())) {
+    return absl_ports::InternalError(
+        "Failed to discard qualified id join index");
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IcingSearchEngine::ClearSearchIndices() {
+  ICING_RETURN_IF_ERROR(index_->Reset());
+  ICING_RETURN_IF_ERROR(integer_index_->Clear());
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IcingSearchEngine::ClearJoinIndices() {
+  return qualified_id_join_index_->Clear();
+}
+
+libtextclassifier3::Status IcingSearchEngine::ClearAllIndices() {
+  ICING_RETURN_IF_ERROR(ClearSearchIndices());
+  ICING_RETURN_IF_ERROR(ClearJoinIndices());
+  return libtextclassifier3::Status::OK;
+}
+
 ResetResultProto IcingSearchEngine::Reset() {
+  absl_ports::unique_lock l(&mutex_);
+  return ResetInternal();
+}
+
+ResetResultProto IcingSearchEngine::ResetInternal() {
   ICING_VLOG(1) << "Resetting IcingSearchEngine";
 
   ResetResultProto result_proto;
   StatusProto* result_status = result_proto.mutable_status();
 
-  int64_t before_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
-
+  initialized_ = false;
+  ResetMembers();
   if (!filesystem_->DeleteDirectoryRecursively(options_.base_dir().c_str())) {
-    int64_t after_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
-    if (after_size != before_size) {
-      // Our filesystem doesn't atomically delete. If we have a discrepancy in
-      // size, then that means we may have deleted some files, but not others.
-      // So our data is in an invalid state now.
-      result_status->set_code(StatusProto::INTERNAL);
-      return result_proto;
-    }
-
-    result_status->set_code(StatusProto::ABORTED);
+    result_status->set_code(StatusProto::INTERNAL);
     return result_proto;
   }
 
-  absl_ports::unique_lock l(&mutex_);
-  initialized_ = false;
   if (InternalInitialize().status().code() != StatusProto::OK) {
     // We shouldn't hit the following Initialize errors:
     //   NOT_FOUND: all data was cleared, we aren't expecting anything
@@ -1295,5 +2794,54 @@ ResetResultProto IcingSearchEngine::Reset() {
   return result_proto;
 }
 
+SuggestionResponse IcingSearchEngine::SearchSuggestions(
+    const SuggestionSpecProto& suggestion_spec) {
+  // TODO(b/146008613) Explore ideas to make this function read-only.
+  absl_ports::unique_lock l(&mutex_);
+  SuggestionResponse response;
+  StatusProto* response_status = response.mutable_status();
+  if (!initialized_) {
+    response_status->set_code(StatusProto::FAILED_PRECONDITION);
+    response_status->set_message("IcingSearchEngine has not been initialized!");
+    return response;
+  }
+
+  libtextclassifier3::Status status =
+      ValidateSuggestionSpec(suggestion_spec, performance_configuration_);
+  if (!status.ok()) {
+    TransformStatus(status, response_status);
+    return response;
+  }
+
+  // Create the suggestion processor.
+  auto suggestion_processor_or = SuggestionProcessor::Create(
+      index_.get(), integer_index_.get(), language_segmenter_.get(),
+      normalizer_.get(), document_store_.get(), schema_store_.get());
+  if (!suggestion_processor_or.ok()) {
+    TransformStatus(suggestion_processor_or.status(), response_status);
+    return response;
+  }
+  std::unique_ptr<SuggestionProcessor> suggestion_processor =
+      std::move(suggestion_processor_or).ValueOrDie();
+
+  // Run suggestion based on given SuggestionSpec.
+  int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
+  libtextclassifier3::StatusOr<std::vector<TermMetadata>> terms_or =
+      suggestion_processor->QuerySuggestions(suggestion_spec, current_time_ms);
+  if (!terms_or.ok()) {
+    TransformStatus(terms_or.status(), response_status);
+    return response;
+  }
+
+  // Convert vector<TermMetaData> into final SuggestionResponse proto.
+  for (TermMetadata& term : terms_or.ValueOrDie()) {
+    SuggestionResponse::Suggestion suggestion;
+    suggestion.set_query(std::move(term.content));
+    response.mutable_suggestions()->Add(std::move(suggestion));
+  }
+  response_status->set_code(StatusProto::OK);
+  return response;
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index 6ae76d7..d316350 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -19,26 +19,38 @@
 #include <memory>
 #include <string>
 #include <string_view>
+#include <utility>
+#include <vector>
 
-#include "icing/jni/jni-cache.h"
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/mutex.h"
 #include "icing/absl_ports/thread_annotations.h"
 #include "icing/file/filesystem.h"
+#include "icing/index/data-indexing-handler.h"
 #include "icing/index/index.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/join/qualified-id-join-index.h"
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/performance-configuration.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
 #include "icing/proto/optimize.pb.h"
 #include "icing/proto/persist.pb.h"
 #include "icing/proto/reset.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
 #include "icing/proto/search.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/query/query-terms.h"
 #include "icing/result/result-state-manager.h"
 #include "icing/schema/schema-store.h"
+#include "icing/scoring/scored-document-hit.h"
 #include "icing/store/document-store.h"
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/transform/normalizer.h"
@@ -51,16 +63,6 @@ namespace lib {
 // TODO(cassiewang) Top-level comments and links to design-doc.
 class IcingSearchEngine {
  public:
-  struct Header {
-    static constexpr int32_t kMagic = 0x6e650d0a;
-
-    // Holds the magic as a quick sanity check against file corruption.
-    int32_t magic;
-
-    // Checksum of the IcingSearchEngine's sub-component's checksums.
-    uint32_t checksum;
-  };
-
   // Note: It is only required to provide a pointer to a valid instance of
   // JniCache if this instance needs to perform reverse-jni calls. Users on
   // Linux and iOS should always provide a nullptr.
@@ -128,12 +130,18 @@ class IcingSearchEngine {
   //
   // Returns:
   //   OK on success
+  //   ALREADY_EXISTS if 'new_schema' contains multiple definitions of the same
+  //     type or contains a type that has multiple properties with the same
+  //     name.
   //   INVALID_ARGUMENT if 'new_schema' is invalid
   //   FAILED_PRECONDITION if 'new_schema' is incompatible, or IcingSearchEngine
   //     has not been initialized yet.
   //   INTERNAL_ERROR if Icing failed to store the new schema or upgrade
   //     existing data based on the new schema. Using Icing beyond this error is
   //     undefined and may cause crashes.
+  //   DATA_LOSS_ERROR if 'new_schema' requires the index to be rebuilt and an
+  //     IO error leads to some documents being excluded from the index. These
+  //     documents will still be retrievable via Get, but won't match queries.
   //
   // TODO(cassiewang) Figure out, document (and maybe even enforce) the best
   // way ordering of calls between Initialize() and SetSchema(), both when
@@ -180,10 +188,14 @@ class IcingSearchEngine {
   //
   // Returns:
   //   OK on success
+  //   OUT_OF_SPACE if exceeds maximum number of allowed documents
   //   FAILED_PRECONDITION if a schema has not been set yet, IcingSearchEngine
   //     has not been initialized yet.
   //   NOT_FOUND if there is no SchemaTypeConfig in the SchemaProto that matches
   //     the document's schema
+  //   DATA_LOSS if an IO error occurs while merging document into the index and
+  //     the index is lost. These documents will still be retrievable via Get,
+  //     but won't match queries.
   //   INTERNAL_ERROR on IO error
   PutResultProto Put(DocumentProto&& document) ICING_LOCKS_EXCLUDED(mutex_);
 
@@ -203,7 +215,17 @@ class IcingSearchEngine {
   //   NOT_FOUND if the key doesn't exist or doc has been deleted
   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
   //   INTERNAL_ERROR on IO error
-  GetResultProto Get(std::string_view name_space, std::string_view uri);
+  GetResultProto Get(std::string_view name_space, std::string_view uri,
+                     const GetResultSpecProto& result_spec);
+
+  // Reports usage. The corresponding usage scores of the specified document in
+  // the report will be updated.
+  //
+  // Returns:
+  //   OK on success
+  //   NOT_FOUND if the [namesapce + uri] key in the report doesn't exist
+  //   INTERNAL_ERROR on I/O errors.
+  ReportUsageResultProto ReportUsage(const UsageReport& usage_report);
 
   // Returns all the namespaces that have at least one valid document in it.
   //
@@ -256,10 +278,26 @@ class IcingSearchEngine {
   DeleteBySchemaTypeResultProto DeleteBySchemaType(std::string_view schema_type)
       ICING_LOCKS_EXCLUDED(mutex_);
 
+  // Deletes all Documents that match the query specified in search_spec. Delete
+  // changes are automatically applied to disk, callers can also call
+  // PersistToDisk() to flush changes immediately.
+  //
+  // NOTE: Space is not reclaimed for deleted documents until Optimize() is
+  // called.
+  //
+  // Returns:
+  //   OK on success
+  //   NOT_FOUND if the query doesn't match any documents
+  //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
+  //   INTERNAL_ERROR on IO error
+  DeleteByQueryResultProto DeleteByQuery(
+      const SearchSpecProto& search_spec,
+      bool return_deleted_document_info = false) ICING_LOCKS_EXCLUDED(mutex_);
+
   // Retrieves, scores, ranks, and returns the results according to the specs.
   // Results can be empty. If there're multiple pages of results,
-  // SearchResultProto.next_page_token will be populated and that can be used to
-  // fetch more pages via GetNextPage() method. Clients should call
+  // SearchResultProto.next_page_token will be set to a non-zero token and can
+  // be used to fetch more pages via GetNextPage() method. Clients should call
   // InvalidateNextPageToken() after they get the pages they need to release
   // result cache in memory. Please refer to each proto file for spec
   // definitions.
@@ -275,8 +313,24 @@ class IcingSearchEngine {
                            const ResultSpecProto& result_spec)
       ICING_LOCKS_EXCLUDED(mutex_);
 
+  // Retrieves, scores, ranks and returns the suggested query string according
+  // to the specs. Results can be empty.
+  //
+  // Returns a SuggestionResponse with status:
+  //   OK with results on success
+  //   INVALID_ARGUMENT if any of specs is invalid
+  //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
+  //   INTERNAL_ERROR on any other errors
+  SuggestionResponse SearchSuggestions(
+      const SuggestionSpecProto& suggestion_spec) ICING_LOCKS_EXCLUDED(mutex_);
+
   // Fetches the next page of results of a previously executed query. Results
-  // can be empty if next-page token is invalid or all pages have been returned.
+  // can be empty if next-page token is invalid. Invalid next page tokens are
+  // tokens that are either zero or were previously passed to
+  // InvalidateNextPageToken. If there are pages of results remaining after the
+  // one retrieved by this call, SearchResultProto.next_page_token will be
+  // set to a non-zero token and can be used to fetch more pages via
+  // GetNextPage() method.
   //
   // Returns a SearchResultProto with status:
   //   OK with results on success
@@ -288,12 +342,26 @@ class IcingSearchEngine {
 
   // Invalidates the next-page token so that no more results of the related
   // query can be returned.
-  void InvalidateNextPageToken(uint64_t next_page_token);
+  void InvalidateNextPageToken(uint64_t next_page_token)
+      ICING_LOCKS_EXCLUDED(mutex_);
 
   // Makes sure that every update/delete received till this point is flushed
   // to disk. If the app crashes after a call to PersistToDisk(), Icing
   // would be able to fully recover all data written up to this point.
   //
+  // If persist_type is PersistType::LITE, then only the ground truth will be
+  // synced. This should be relatively lightweight to do (order of microseconds)
+  // and ensures that there will be no data loss. At worst, Icing may need to
+  // recover internal data structures by replaying the document log upon the
+  // next startup. Clients should call PersistToDisk(LITE) after each batch of
+  // mutations.
+  //
+  // If persist_type is PersistType::FULL, then all internal data structures in
+  // Icing will be synced. This is a heavier operation (order of milliseconds).
+  // It ensures that Icing will not need to recover internal data structures
+  // upon the next startup. Clients should call PersistToDisk(FULL) before their
+  // process dies.
+  //
   // NOTE: It is not necessary to call PersistToDisk() to read back data
   // that was recently written. All read APIs will include the most recent
   // updates/deletes regardless of the data being flushed to disk.
@@ -302,7 +370,8 @@ class IcingSearchEngine {
   //   OK on success
   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
   //   INTERNAL on I/O error
-  PersistToDiskResultProto PersistToDisk() ICING_LOCKS_EXCLUDED(mutex_);
+  PersistToDiskResultProto PersistToDisk(PersistType::Code persist_type)
+      ICING_LOCKS_EXCLUDED(mutex_);
 
   // Allows Icing to run tasks that are too expensive and/or unnecessary to be
   // executed in real-time, but are useful to keep it fast and be
@@ -338,6 +407,16 @@ class IcingSearchEngine {
   //   INTERNAL_ERROR on IO error
   GetOptimizeInfoResultProto GetOptimizeInfo() ICING_LOCKS_EXCLUDED(mutex_);
 
+  // Calculates the StorageInfo for Icing.
+  //
+  // If an IO error occurs while trying to calculate the value for a field, then
+  // that field will be set to -1.
+  StorageInfoResultProto GetStorageInfo() ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Get debug information for Icing.
+  DebugInfoResultProto GetDebugInfo(DebugInfoVerbosity::Code verbosity)
+      ICING_LOCKS_EXCLUDED(mutex_);
+
   // Clears all data from Icing and re-initializes. Clients DO NOT need to call
   // Initialize again.
   //
@@ -354,6 +433,7 @@ class IcingSearchEngine {
  protected:
   IcingSearchEngine(IcingSearchEngineOptions options,
                     std::unique_ptr<const Filesystem> filesystem,
+                    std::unique_ptr<const IcingFilesystem> icing_filesystem,
                     std::unique_ptr<Clock> clock,
                     std::unique_ptr<const JniCache> jni_cache = nullptr);
 
@@ -364,15 +444,19 @@ class IcingSearchEngine {
   bool initialized_ ICING_GUARDED_BY(mutex_) = false;
 
   // Abstraction for accessing time values.
-  std::unique_ptr<Clock> clock_;
+  const std::unique_ptr<const Clock> clock_;
 
   // Provides key thresholds that affects the running time and memory of major
   // components in Icing search engine.
-  PerformanceConfiguration performance_configuration_;
-
-  // Used to manage pagination state of query results. A lock is not needed here
-  // because ResultStateManager has its own reader-writer lock.
-  ResultStateManager result_state_manager_;
+  const PerformanceConfiguration performance_configuration_;
+
+  // Used to manage pagination state of query results. Even though
+  // ResultStateManager has its own reader-writer lock, mutex_ must still be
+  // acquired first in order to adhere to the global lock ordering:
+  //   1. mutex_
+  //   2. result_state_manager_.lock_
+  std::unique_ptr<ResultStateManager> result_state_manager_
+      ICING_GUARDED_BY(mutex_);
 
   // Used to provide reader and writer locks
   absl_ports::shared_mutex mutex_;
@@ -388,18 +472,45 @@ class IcingSearchEngine {
 
   std::unique_ptr<const Normalizer> normalizer_ ICING_GUARDED_BY(mutex_);
 
-  // Storage for all hits of content from the document store.
+  // Storage for all hits of string contents from the document store.
   std::unique_ptr<Index> index_ ICING_GUARDED_BY(mutex_);
 
+  // Storage for all hits of numeric contents from the document store.
+  std::unique_ptr<NumericIndex<int64_t>> integer_index_
+      ICING_GUARDED_BY(mutex_);
+
+  // Storage for all join qualified ids from the document store.
+  std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index_
+      ICING_GUARDED_BY(mutex_);
+
   // Pointer to JNI class references
   const std::unique_ptr<const JniCache> jni_cache_;
 
+  // Resets all members that are created during Initialize.
+  void ResetMembers() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Resets all members that are created during Initialize, deletes all
+  // underlying files and initializes a fresh index.
+  ResetResultProto ResetInternal() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Checks for the existence of the init marker file. If the failed init count
+  // exceeds kMaxUnsuccessfulInitAttempts, all data is deleted and the index is
+  // initialized from scratch. The updated count (original failed init count + 1
+  // ) is written to the marker file.
+  //
+  // RETURNS
+  //   OK on success
+  //   INTERNAL if an IO error occurs while trying to update the marker file.
+  libtextclassifier3::Status CheckInitMarkerFile(
+      InitializeStatsProto* initialize_stats)
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
   // Helper method to do the actual work to persist data to disk. We need this
   // separate method so that other public methods don't need to call
   // PersistToDisk(). Public methods calling each other may cause deadlock
   // issues.
-  libtextclassifier3::Status InternalPersistToDisk()
-      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+  libtextclassifier3::Status InternalPersistToDisk(
+      PersistType::Code persist_type) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Helper method to the actual work to Initialize. We need this separate
   // method so that other public methods don't need to call Initialize(). Public
@@ -411,49 +522,110 @@ class IcingSearchEngine {
   //
   // Returns:
   //   OK on success
+  //   FAILED_PRECONDITION if initialize_stats is null
   //   RESOURCE_EXHAUSTED if the index runs out of storage
   //   NOT_FOUND if some Document's schema type is not in the SchemaStore
   //   INTERNAL on any I/O errors
-  libtextclassifier3::Status InitializeMembers()
-      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
-  // Do any validation/setup required for the given IcingSearchEngineOptions
-  //
-  // Returns:
-  //   OK on success
-  //   INVALID_ARGUMENT if options has invalid values
-  //   INTERNAL on I/O error
-  libtextclassifier3::Status InitializeOptions()
+  libtextclassifier3::Status InitializeMembers(
+      InitializeStatsProto* initialize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Do any initialization/recovery necessary to create a SchemaStore instance.
   //
   // Returns:
   //   OK on success
+  //   FAILED_PRECONDITION if initialize_stats is null
   //   INTERNAL on I/O error
-  libtextclassifier3::Status InitializeSchemaStore()
+  libtextclassifier3::Status InitializeSchemaStore(
+      InitializeStatsProto* initialize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Do any initialization/recovery necessary to create a DocumentStore
   // instance.
   //
+  // See comments on DocumentStore::Create for explanation of
+  // force_recovery_and_revalidate_documents.
+  //
   // Returns:
-  //   OK on success
+  //   On success, a boolean flag indicating whether derived files of the
+  //     document store have been regenerated or not. If true, any other
+  //     components depending on them should also be rebuilt if true.
+  //   FAILED_PRECONDITION if initialize_stats is null
   //   INTERNAL on I/O error
-  libtextclassifier3::Status InitializeDocumentStore()
+  libtextclassifier3::StatusOr<bool> InitializeDocumentStore(
+      bool force_recovery_and_revalidate_documents,
+      InitializeStatsProto* initialize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
-  // Do any initialization/recovery necessary to create a DocumentStore
-  // instance.
+  // Do any initialization/recovery necessary to create term index, integer
+  // index, and qualified id join index instances.
+  //
+  // If document_store_derived_files_regenerated is true, then we have to
+  // rebuild qualified id join index since NamespaceIds were reassigned.
   //
   // Returns:
   //   OK on success
+  //   FAILED_PRECONDITION if initialize_stats is null
   //   RESOURCE_EXHAUSTED if the index runs out of storage
   //   NOT_FOUND if some Document's schema type is not in the SchemaStore
   //   INTERNAL on I/O error
-  libtextclassifier3::Status InitializeIndex()
+  libtextclassifier3::Status InitializeIndex(
+      bool document_store_derived_files_regenerated,
+      InitializeStatsProto* initialize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
+  // Implementation of IcingSearchEngine::Search that only grabs the overall
+  // read-lock, allowing for parallel non-exclusive operations.
+  // This implementation is used if search_spec.use_read_only_search is true.
+  SearchResultProto SearchLockedShared(const SearchSpecProto& search_spec,
+                                   const ScoringSpecProto& scoring_spec,
+                                   const ResultSpecProto& result_spec)
+      ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Implementation of IcingSearchEngine::Search that requires the overall
+  // write lock. No other operations of any kind can be executed in parallel if
+  // this version is used.
+  // This implementation is used if search_spec.use_read_only_search is false.
+  SearchResultProto SearchLockedExclusive(const SearchSpecProto& search_spec,
+                                 const ScoringSpecProto& scoring_spec,
+                                 const ResultSpecProto& result_spec)
+      ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Helper method for the actual work to Search. We need this separate
+  // method to manage locking for Search.
+  SearchResultProto InternalSearch(const SearchSpecProto& search_spec,
+                                   const ScoringSpecProto& scoring_spec,
+                                   const ResultSpecProto& result_spec)
+      ICING_SHARED_LOCKS_REQUIRED(mutex_);
+
+  // Processes query and scores according to the specs. It is a helper function
+  // (called by Search) to process and score normal query and the nested child
+  // query for join search.
+  //
+  // Returns a QueryScoringResults
+  //   OK on success with a vector of ScoredDocumentHits,
+  //      SectionRestrictQueryTermsMap, and other stats fields for logging.
+  //   Any other errors when processing the query or scoring
+  struct QueryScoringResults {
+    libtextclassifier3::Status status;
+    SectionRestrictQueryTermsMap query_terms;
+    std::vector<ScoredDocumentHit> scored_document_hits;
+
+    explicit QueryScoringResults(
+        libtextclassifier3::Status status_in,
+        SectionRestrictQueryTermsMap&& query_terms_in,
+        std::vector<ScoredDocumentHit>&& scored_document_hits_in)
+        : status(std::move(status_in)),
+          query_terms(std::move(query_terms_in)),
+          scored_document_hits(std::move(scored_document_hits_in)) {}
+  };
+  QueryScoringResults ProcessQueryAndScore(
+      const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+      const ResultSpecProto& result_spec,
+      const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms,
+      QueryStatsProto::SearchStats* search_stats)
+      ICING_SHARED_LOCKS_REQUIRED(mutex_);
+
   // Many of the internal components rely on other components' derived data.
   // Check that everything is consistent with each other so that we're not
   // using outdated derived data in some parts of our system.
@@ -469,12 +641,23 @@ class IcingSearchEngine {
   libtextclassifier3::Status CheckConsistency()
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
+  // Discards all derived data.
+  //
+  // Returns:
+  //   OK on success
+  //   FAILED_PRECONDITION_ERROR if those instances are valid (non nullptr)
+  //   INTERNAL_ERROR on any I/O errors
+  libtextclassifier3::Status DiscardDerivedFiles()
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
   // Repopulates derived data off our ground truths.
   //
   // Returns:
   //   OK on success
   //   INTERNAL_ERROR on any IO errors
-  libtextclassifier3::Status RegenerateDerivedFiles()
+  libtextclassifier3::Status RegenerateDerivedFiles(
+      InitializeStatsProto* initialize_stats = nullptr,
+      bool log_document_store_stats = false)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Optimizes the DocumentStore by removing any unneeded documents (i.e.
@@ -484,44 +667,41 @@ class IcingSearchEngine {
   // would need call Initialize() to reinitialize everything into a valid state.
   //
   // Returns:
-  //   OK on success
+  //   On success, OptimizeResult which contains a vector mapping from old
+  //   document id to new document id and another vector mapping from old
+  //   namespace id to new namespace id. A value of kInvalidDocumentId indicates
+  //   that the old document id has been deleted.
   //   ABORTED_ERROR if any error happens before the actual optimization, the
   //                 original document store should be still available
   //   DATA_LOSS_ERROR on errors that could potentially cause data loss,
   //                   document store is still available
   //   INTERNAL_ERROR on any IO errors or other errors that we can't recover
   //                  from
-  libtextclassifier3::Status OptimizeDocumentStore()
+  libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
+  OptimizeDocumentStore(OptimizeStatsProto* optimize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
-  // Helper method to restore missing document data in index_. All documents
-  // will be reindexed. This does not clear the index, so it is recommended to
-  // call Index::Reset first.
+  // Helper method to restore missing document data in index_, integer_index_,
+  // and qualified_id_join_index_. All documents will be reindexed. This does
+  // not clear the index, so it is recommended to call ClearAllIndices,
+  // ClearSearchIndices, or ClearJoinIndices first if needed.
   //
   // Returns:
-  //   OK on success
+  //   On success, OK and a bool indicating whether or not restoration was
+  //     needed.
+  //   DATA_LOSS, if an error during index merging caused us to lose indexed
+  //     data in the main index. Despite the data loss, this is still considered
+  //     a successful run and needed_restoration will be set to true.
   //   RESOURCE_EXHAUSTED if the index fills up before finishing indexing
   //   NOT_FOUND if some Document's schema type is not in the SchemaStore
   //   INTERNAL_ERROR on any IO errors
-  libtextclassifier3::Status RestoreIndex()
-      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
-  // Computes the combined checksum of the IcingSearchEngine - includes all its
-  // subcomponents
-  //
-  // Returns:
-  //   Combined checksum on success
-  //   INTERNAL_ERROR on compute error
-  libtextclassifier3::StatusOr<Crc32> ComputeChecksum()
-      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
-  // Checks if the header exists already. This does not create the header file
-  // if it doesn't exist.
-  bool HeaderExists() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
-  // Update and replace the header file. Creates the header file if it doesn't
-  // exist.
-  libtextclassifier3::Status UpdateHeader(const Crc32& checksum)
+  struct IndexRestorationResult {
+    libtextclassifier3::Status status;
+    bool index_needed_restoration;
+    bool integer_index_needed_restoration;
+    bool qualified_id_join_index_needed_restoration;
+  };
+  IndexRestorationResult RestoreIndexIfNeeded()
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // If we lost the schema during a previous failure, it may "look" the same as
@@ -535,6 +715,70 @@ class IcingSearchEngine {
   //   INTERNAL_ERROR on I/O error
   libtextclassifier3::StatusOr<bool> LostPreviousSchema()
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Helper method to create all types of data indexing handlers to index term,
+  // integer, and join qualified ids.
+  libtextclassifier3::StatusOr<
+      std::vector<std::unique_ptr<DataIndexingHandler>>>
+  CreateDataIndexingHandlers() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Helper method to discard parts of (term, integer, qualified id join)
+  // indices if they contain data for document ids greater than
+  // last_stored_document_id.
+  //
+  // REQUIRES: last_stored_document_id is valid (!= kInvalidDocumentId). Note:
+  //   if we want to truncate everything in the index, then please call
+  //   ClearSearchIndices/ClearJoinIndices/ClearAllIndices instead.
+  //
+  // Returns:
+  //   On success, a DocumentId indicating the first document to start for
+  //     reindexing and 2 bool flags indicating whether term or integer index
+  //     needs restoration.
+  //   INTERNAL on any I/O errors
+  struct TruncateIndexResult {
+    DocumentId first_document_to_reindex;
+    bool index_needed_restoration;
+    bool integer_index_needed_restoration;
+    bool qualified_id_join_index_needed_restoration;
+
+    explicit TruncateIndexResult(
+        DocumentId first_document_to_reindex_in,
+        bool index_needed_restoration_in,
+        bool integer_index_needed_restoration_in,
+        bool qualified_id_join_index_needed_restoration_in)
+        : first_document_to_reindex(first_document_to_reindex_in),
+          index_needed_restoration(index_needed_restoration_in),
+          integer_index_needed_restoration(integer_index_needed_restoration_in),
+          qualified_id_join_index_needed_restoration(
+              qualified_id_join_index_needed_restoration_in) {}
+  };
+  libtextclassifier3::StatusOr<TruncateIndexResult> TruncateIndicesTo(
+      DocumentId last_stored_document_id)
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Helper method to discard search (term, integer) indices.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on any I/O errors
+  libtextclassifier3::Status ClearSearchIndices()
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Helper method to discard join (qualified id) indices.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on any I/O errors
+  libtextclassifier3::Status ClearJoinIndices()
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Helper method to discard all search and join indices.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on any I/O errors
+  libtextclassifier3::Status ClearAllIndices()
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 };
 
 }  // namespace lib
diff --git a/icing/icing-search-engine_backwards_compatibility_test.cc b/icing/icing-search-engine_backwards_compatibility_test.cc
new file mode 100644
index 0000000..178e923
--- /dev/null
+++ b/icing/icing-search-engine_backwards_compatibility_test.cc
@@ -0,0 +1,569 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::Eq;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+  TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+                        std::unique_ptr<const Filesystem> filesystem,
+                        std::unique_ptr<const IcingFilesystem> icing_filesystem,
+                        std::unique_ptr<Clock> clock,
+                        std::unique_ptr<JniCache> jni_cache)
+      : IcingSearchEngine(options, std::move(filesystem),
+                          std::move(icing_filesystem), std::move(clock),
+                          std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+class IcingSearchEngineBackwardsCompatibilityTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+  Filesystem filesystem_;
+};
+
+ScoringSpecProto GetDefaultScoringSpec() {
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  return scoring_spec;
+}
+
+std::string GetTestDataDir(std::string_view test_subdir) {
+  if (IsAndroidX86()) {
+    return GetTestFilePath(
+        absl_ports::StrCat("icing/testdata/", test_subdir,
+                           "/icing_search_engine_android_x86"));
+  } else if (IsAndroidArm()) {
+    return GetTestFilePath(
+        absl_ports::StrCat("icing/testdata/", test_subdir,
+                           "/icing_search_engine_android_arm"));
+  } else if (IsIosPlatform()) {
+    return GetTestFilePath(absl_ports::StrCat("icing/testdata/",
+                                              test_subdir,
+                                              "/icing_search_engine_ios"));
+  } else {
+    return GetTestFilePath(absl_ports::StrCat("icing/testdata/",
+                                              test_subdir,
+                                              "/icing_search_engine_linux"));
+  }
+}
+
+TEST_F(IcingSearchEngineBackwardsCompatibilityTest,
+       MigrateToPortableFileBackedProtoLog) {
+  // Copy the testdata files into our IcingSearchEngine directory
+  std::string dir_without_portable_log = GetTestDataDir("not_portable_log");
+
+  // Create dst directory that we'll initialize the IcingSearchEngine over.
+  std::string base_dir = GetTestBaseDir() + "_migrate";
+  ASSERT_THAT(filesystem()->DeleteDirectoryRecursively(base_dir.c_str()), true);
+  ASSERT_THAT(filesystem()->CreateDirectoryRecursively(base_dir.c_str()), true);
+
+  ASSERT_TRUE(filesystem()->CopyDirectory(dir_without_portable_log.c_str(),
+                                          base_dir.c_str(),
+                                          /*recursive=*/true));
+
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_base_dir(base_dir);
+
+  IcingSearchEngine icing(icing_options, GetTestJniCache());
+  InitializeResultProto init_result = icing.Initialize();
+  EXPECT_THAT(init_result.status(), ProtoIsOk());
+
+  // Since there will be version change, the recovery cause will be
+  // VERSION_CHANGED.
+  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+              Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+              Eq(InitializeStatsProto::VERSION_CHANGED));
+  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::VERSION_CHANGED));
+  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::VERSION_CHANGED));
+
+  // Set up schema, this is the one used to validate documents in the testdata
+  // files. Do not change unless you're also updating the testdata files.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Make sure our schema is still the same as we expect. If not, there's
+  // definitely no way we're getting the documents back that we expect.
+  GetSchemaResultProto expected_get_schema_result_proto;
+  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_schema_result_proto.mutable_schema() = schema;
+  ASSERT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+
+  // These are the documents that are stored in the testdata files. Do not
+  // change unless you're also updating the testdata files.
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .AddStringProperty("body", "bar")
+                                .Build();
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri2")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(20)
+                                .SetScore(321)
+                                .AddStringProperty("body", "baz bat")
+                                .Build();
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(30)
+                                .SetScore(123)
+                                .AddStringProperty("subject", "phoo")
+                                .Build();
+
+  // Document 1 and 3 were put normally, and document 2 was deleted in our
+  // testdata files.
+  EXPECT_THAT(icing
+                  .Get(document1.namespace_(), document1.uri(),
+                       GetResultSpecProto::default_instance())
+                  .document(),
+              EqualsProto(document1));
+  EXPECT_THAT(icing
+                  .Get(document2.namespace_(), document2.uri(),
+                       GetResultSpecProto::default_instance())
+                  .status(),
+              ProtoStatusIs(StatusProto::NOT_FOUND));
+  EXPECT_THAT(icing
+                  .Get(document3.namespace_(), document3.uri(),
+                       GetResultSpecProto::default_instance())
+                  .document(),
+              EqualsProto(document3));
+
+  // Searching for "foo" should get us document1.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("foo");
+
+  SearchResultProto expected_document1;
+  expected_document1.mutable_status()->set_code(StatusProto::OK);
+  *expected_document1.mutable_results()->Add()->mutable_document() = document1;
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(expected_document1));
+
+  // Searching for "baz" would've gotten us document2, except it got deleted.
+  // Make sure that it's cleared from our index too.
+  search_spec.set_query("baz");
+
+  SearchResultProto expected_no_documents;
+  expected_no_documents.mutable_status()->set_code(StatusProto::OK);
+
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(expected_no_documents));
+
+  // Searching for "phoo" should get us document3.
+  search_spec.set_query("phoo");
+
+  SearchResultProto expected_document3;
+  expected_document3.mutable_status()->set_code(StatusProto::OK);
+  *expected_document3.mutable_results()->Add()->mutable_document() = document3;
+
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(expected_document3));
+}
+
+TEST_F(IcingSearchEngineBackwardsCompatibilityTest, MigrateToLargerScale) {
+  // Copy the testdata files into our IcingSearchEngine directory
+  std::string test_data_dir = GetTestDataDir("icing_scale_migration");
+
+  // Create dst directory that we'll initialize the IcingSearchEngine over.
+  std::string base_dir = GetTestBaseDir() + "_migrate";
+  ASSERT_THAT(filesystem()->DeleteDirectoryRecursively(base_dir.c_str()), true);
+  ASSERT_THAT(filesystem()->CreateDirectoryRecursively(base_dir.c_str()), true);
+
+  ASSERT_TRUE(filesystem()->CopyDirectory(test_data_dir.c_str(),
+                                          base_dir.c_str(),
+                                          /*recursive=*/true));
+
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_base_dir(base_dir);
+
+  IcingSearchEngine icing(icing_options, GetTestJniCache());
+  InitializeResultProto init_result = icing.Initialize();
+  EXPECT_THAT(init_result.status(), ProtoIsOk());
+
+  // Since there will be version change, the recovery cause will be
+  // VERSION_CHANGED.
+  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+              Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+              Eq(InitializeStatsProto::VERSION_CHANGED));
+  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::VERSION_CHANGED));
+  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::VERSION_CHANGED));
+
+  // Verify that the schema stored in the index matches the one that we expect.
+  // Do not change unless you're also updating the testdata files.
+  SchemaProto expected_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Make sure our schema is still the same as we expect. If not, there's
+  // definitely no way we're getting the documents back that we expect.
+  GetSchemaResultProto expected_get_schema_result_proto;
+  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_schema_result_proto.mutable_schema() = expected_schema;
+  ASSERT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+
+  // These are the documents that are stored in the testdata files. Do not
+  // change unless you're also updating the testdata files.
+  DocumentProto expected_document1 = DocumentBuilder()
+                                         .SetKey("namespace1", "uri1")
+                                         .SetSchema("email")
+                                         .SetCreationTimestampMs(10)
+                                         .AddStringProperty("subject", "foo")
+                                         .AddStringProperty("body", "bar")
+                                         .Build();
+
+  DocumentProto expected_deleted_document2 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri2")
+          .SetSchema("email")
+          .SetCreationTimestampMs(20)
+          .SetScore(321)
+          .AddStringProperty("body", "baz bat")
+          .Build();
+
+  DocumentProto expected_document3 = DocumentBuilder()
+                                         .SetKey("namespace2", "uri1")
+                                         .SetSchema("email")
+                                         .SetCreationTimestampMs(30)
+                                         .SetScore(123)
+                                         .AddStringProperty("subject", "phoo")
+                                         .Build();
+
+  // Document 1 and 3 were put normally, and document 2 was deleted in our
+  // testdata files.
+  EXPECT_THAT(
+      icing
+          .Get(expected_document1.namespace_(), expected_document1.uri(),
+               GetResultSpecProto::default_instance())
+          .document(),
+      EqualsProto(expected_document1));
+  EXPECT_THAT(icing
+                  .Get(expected_deleted_document2.namespace_(),
+                       expected_deleted_document2.uri(),
+                       GetResultSpecProto::default_instance())
+                  .status(),
+              ProtoStatusIs(StatusProto::NOT_FOUND));
+  EXPECT_THAT(
+      icing
+          .Get(expected_document3.namespace_(), expected_document3.uri(),
+               GetResultSpecProto::default_instance())
+          .document(),
+      EqualsProto(expected_document3));
+
+  // Searching for "foo" should get us document1.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("foo");
+
+  SearchResultProto expected_document1_search;
+  expected_document1_search.mutable_status()->set_code(StatusProto::OK);
+  *expected_document1_search.mutable_results()->Add()->mutable_document() =
+      expected_document1;
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_document1_search));
+
+  // Searching for "baz" would've gotten us document2, except it got deleted.
+  // Make sure that it's cleared from our index too.
+  search_spec.set_query("baz");
+
+  SearchResultProto expected_no_documents;
+  expected_no_documents.mutable_status()->set_code(StatusProto::OK);
+
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(expected_no_documents));
+
+  // Searching for "phoo" should get us document3.
+  search_spec.set_query("phoo");
+
+  SearchResultProto expected_document3_search;
+  expected_document3_search.mutable_status()->set_code(StatusProto::OK);
+  *expected_document3_search.mutable_results()->Add()->mutable_document() =
+      expected_document3;
+
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_document3_search));
+}
+
+TEST_F(IcingSearchEngineBackwardsCompatibilityTest,
+       MigrateToAppendOnlySchemaStorage) {
+  // Copy the testdata files into our IcingSearchEngine directory
+  std::string test_data_dir = GetTestDataDir("blob_schema_store");
+
+  // Create dst directory that we'll initialize the IcingSearchEngine over.
+  std::string base_dir = GetTestBaseDir() + "_migrate";
+  ASSERT_THAT(filesystem()->DeleteDirectoryRecursively(base_dir.c_str()), true);
+  ASSERT_THAT(filesystem()->CreateDirectoryRecursively(base_dir.c_str()), true);
+
+  ASSERT_TRUE(filesystem()->CopyDirectory(test_data_dir.c_str(),
+                                          base_dir.c_str(),
+                                          /*recursive=*/true));
+
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_base_dir(base_dir);
+
+  IcingSearchEngine icing(icing_options, GetTestJniCache());
+  InitializeResultProto init_result = icing.Initialize();
+  EXPECT_THAT(init_result.status(), ProtoIsOk());
+
+  // Since there will be version change, the recovery cause will be
+  // VERSION_CHANGED.
+  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+              Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+              Eq(InitializeStatsProto::VERSION_CHANGED));
+  // TODO: create enum code for legacy schema store recovery after schema store
+  // change is made.
+  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::VERSION_CHANGED));
+  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::VERSION_CHANGED));
+
+  // Verify that the schema stored in the index matches the one that we expect.
+  // Do not change unless you're also updating the testdata files.
+  SchemaProto expected_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("transaction")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("unindexedStringProperty")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("unindexedIntegerProperty")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableIntegerProperty")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("stringExactProperty")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("stringPrefixProperty")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  GetSchemaResultProto expected_get_schema_result_proto;
+  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_schema_result_proto.mutable_schema() = expected_schema;
+  ASSERT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+
+  // These are the documents that are stored in the testdata files. Do not
+  // change unless you're also updating the testdata files.
+  DocumentProto expected_document1 = DocumentBuilder()
+                                         .SetKey("namespace1", "uri1")
+                                         .SetSchema("email")
+                                         .SetCreationTimestampMs(10)
+                                         .AddStringProperty("subject", "foo")
+                                         .AddStringProperty("body", "bar")
+                                         .Build();
+
+  DocumentProto expected_document2 = DocumentBuilder()
+                                         .SetKey("namespace2", "uri1")
+                                         .SetSchema("email")
+                                         .SetCreationTimestampMs(20)
+                                         .SetScore(123)
+                                         .AddStringProperty("subject", "phoo")
+                                         .Build();
+
+  DocumentProto expected_document3 =
+      DocumentBuilder()
+          .SetKey("namespace3", "uri3")
+          .SetSchema("transaction")
+          .SetCreationTimestampMs(30)
+          .SetScore(123)
+          .AddStringProperty("stringExactProperty", "foo")
+          .AddInt64Property("indexableIntegerProperty", 10)
+          .Build();
+
+  EXPECT_THAT(
+      icing
+          .Get(expected_document1.namespace_(), expected_document1.uri(),
+               GetResultSpecProto::default_instance())
+          .document(),
+      EqualsProto(expected_document1));
+  EXPECT_THAT(
+      icing
+          .Get(expected_document2.namespace_(), expected_document2.uri(),
+               GetResultSpecProto::default_instance())
+          .document(),
+      EqualsProto(expected_document2));
+  EXPECT_THAT(
+      icing
+          .Get(expected_document3.namespace_(), expected_document3.uri(),
+               GetResultSpecProto::default_instance())
+          .document(),
+      EqualsProto(expected_document3));
+
+  // Searching for "foo" should get us document1 and not document3 due to the
+  // schema type filter.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("foo");
+  search_spec.add_schema_type_filters("email");
+
+  SearchResultProto expected_document1_search;
+  expected_document1_search.mutable_status()->set_code(StatusProto::OK);
+  *expected_document1_search.mutable_results()->Add()->mutable_document() =
+      expected_document1;
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_document1_search));
+
+  // Searching for "phoo" should get us document2.
+  search_spec.set_query("phoo");
+
+  SearchResultProto expected_document2_search;
+  expected_document2_search.mutable_status()->set_code(StatusProto::OK);
+  *expected_document2_search.mutable_results()->Add()->mutable_document() =
+      expected_document2;
+
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_document2_search));
+
+  // Searching for "foo" should get us both document 1 and document3 now that
+  // schema type 'transaction' has been added to the schema filter.
+  search_spec.set_query("foo");
+  search_spec.add_schema_type_filters("transaction");
+
+  SearchResultProto expected_document_1_and_3_search;
+  expected_document_1_and_3_search.mutable_status()->set_code(StatusProto::OK);
+  *expected_document_1_and_3_search.mutable_results()
+       ->Add()
+       ->mutable_document() = expected_document3;
+  *expected_document_1_and_3_search.mutable_results()
+       ->Add()
+       ->mutable_document() = expected_document1;
+
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_document_1_and_3_search));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index a6d96e0..18c6bb9 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -16,7 +16,9 @@
 
 #include <fstream>
 #include <iostream>
+#include <limits>
 #include <memory>
+#include <numeric>
 #include <ostream>
 #include <random>
 #include <sstream>
@@ -32,17 +34,23 @@
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
 #include "icing/icing-search-engine.h"
+#include "icing/join/join-processor.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/initialize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
 #include "icing/proto/search.pb.h"
 #include "icing/proto/status.pb.h"
 #include "icing/proto/term.pb.h"
+#include "icing/query/query-features.h"
+#include "icing/schema-builder.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/document-generator.h"
+#include "icing/testing/numeric/number-generator.h"
+#include "icing/testing/numeric/uniform-distribution-integer-generator.h"
 #include "icing/testing/random-string.h"
-#include "icing/testing/recorder-test-utils.h"
 #include "icing/testing/schema-generator.h"
 #include "icing/testing/tmp-directory.h"
 
@@ -51,7 +59,7 @@
 //    //icing:icing-search-engine_benchmark
 //
 //    $ blaze-bin/icing/icing-search-engine_benchmark
-//    --benchmarks=all --benchmark_memory_usage
+//    --benchmark_filter=all --benchmark_memory_usage
 //
 // Run on an Android device:
 //    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
@@ -61,7 +69,8 @@
 //    $ adb push blaze-bin/icing/icing-search-engine_benchmark
 //    /data/local/tmp/
 //
-//    $ adb shell /data/local/tmp/icing-search-engine_benchmark --benchmarks=all
+//    $ adb shell /data/local/tmp/icing-search-engine_benchmark
+//    --benchmark_filter=all
 
 namespace icing {
 namespace lib {
@@ -69,6 +78,7 @@ namespace lib {
 namespace {
 
 using ::testing::Eq;
+using ::testing::HasSubstr;
 
 // Icing GMSCore has, on average, 17 corpora on a device and 30 corpora at the
 // 95th pct. Most clients use a single type. This is a function of Icing's
@@ -87,14 +97,6 @@ constexpr int kAvgDocumentSize = 300;
 // ASSUME: ~75% of the document's size comes from it's content.
 constexpr float kContentSizePct = 0.7;
 
-// Average length of word in English is 4.7 characters.
-constexpr int kAvgTokenLen = 5;
-// Made up value. This results in a fairly reasonable language - the majority of
-// generated words are 3-9 characters, ~3% of words are >=20 chars, and the
-// longest ones are 27 chars, (roughly consistent with the longest,
-// non-contrived English words
-// https://en.wikipedia.org/wiki/Longest_word_in_English)
-constexpr int kTokenStdDev = 7;
 constexpr int kLanguageSize = 1000;
 
 // Lite Index size required to fit 128k docs, each doc requires ~64 bytes of
@@ -114,22 +116,6 @@ std::vector<std::string> CreateNamespaces(int num_namespaces) {
   return namespaces;
 }
 
-// Creates a vector containing num_words randomly-generated words for use by
-// documents.
-template <typename Rand>
-std::vector<std::string> CreateLanguage(int num_words, Rand* r) {
-  std::vector<std::string> language;
-  std::normal_distribution<> norm_dist(kAvgTokenLen, kTokenStdDev);
-  while (--num_words >= 0) {
-    int word_length = 0;
-    while (word_length < 1) {
-      word_length = std::round(norm_dist(*r));
-    }
-    language.push_back(RandomString(kAlNumAlphabet, word_length, r));
-  }
-  return language;
-}
-
 SearchSpecProto CreateSearchSpec(const std::string& query,
                                  const std::vector<std::string>& namespaces,
                                  TermMatchType::Code match_type) {
@@ -175,6 +161,202 @@ class DestructibleDirectory {
   std::string dir_;
 };
 
+std::vector<DocumentProto> GenerateRandomDocuments(
+    EvenDistributionTypeSelector* type_selector, int num_docs,
+    const std::vector<std::string>& language) {
+  std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
+  EvenDistributionNamespaceSelector namespace_selector(namespaces);
+
+  std::default_random_engine random;
+  UniformDistributionLanguageTokenGenerator<std::default_random_engine>
+      token_generator(language, &random);
+
+  DocumentGenerator<
+      EvenDistributionNamespaceSelector, EvenDistributionTypeSelector,
+      UniformDistributionLanguageTokenGenerator<std::default_random_engine>>
+      generator(&namespace_selector, type_selector, &token_generator,
+                kAvgDocumentSize * kContentSizePct);
+
+  std::vector<DocumentProto> random_docs;
+  random_docs.reserve(num_docs);
+  for (int i = 0; i < num_docs; i++) {
+    random_docs.push_back(generator.generateDoc());
+  }
+  return random_docs;
+}
+
+std::unique_ptr<NumberGenerator<int64_t>> CreateIntegerGenerator(
+    size_t num_documents) {
+  // Since the collision # follows poisson distribution with lambda =
+  // (num_keys / range), we set the range 10x (lambda = 0.1) to avoid too many
+  // collisions.
+  //
+  // Distribution:
+  // - keys in range being picked for 0 times: 90.5%
+  // - keys in range being picked for 1 time:  9%
+  // - keys in range being picked for 2 times: 0.45%
+  // - keys in range being picked for 3 times: 0.015%
+  //
+  // For example, num_keys = 1M, range = 10M. Then there will be ~904837 unique
+  // keys, 45242 keys being picked twice, 1508 keys being picked thrice ...
+  return std::make_unique<UniformDistributionIntegerGenerator<int64_t>>(
+      /*seed=*/12345, /*range_lower=*/0,
+      /*range_upper=*/static_cast<int64_t>(num_documents) * 10 - 1);
+}
+
+void BM_IndexLatency(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  std::default_random_engine random;
+  int num_types = kAvgNumNamespaces * kAvgNumTypes;
+  ExactStringPropertyGenerator property_generator;
+  SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
+      /*num_properties=*/state.range(1), &property_generator);
+  SchemaProto schema = schema_generator.GenerateSchema(num_types);
+  EvenDistributionTypeSelector type_selector(schema);
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  int num_docs = state.range(0);
+  std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
+  const std::vector<DocumentProto> random_docs =
+      GenerateRandomDocuments(&type_selector, num_docs, language);
+  for (auto _ : state) {
+    state.PauseTiming();
+    ASSERT_THAT(icing->Reset().status(), ProtoIsOk());
+    ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+    state.ResumeTiming();
+    for (const DocumentProto& doc : random_docs) {
+      ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
+    }
+  }
+}
+BENCHMARK(BM_IndexLatency)
+    // Arguments: num_indexed_documents, num_sections
+    ->ArgPair(1000000, 5);
+
+void BM_QueryLatency(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  std::default_random_engine random;
+  int num_types = kAvgNumNamespaces * kAvgNumTypes;
+  ExactStringPropertyGenerator property_generator;
+  SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
+      /*num_properties=*/state.range(1), &property_generator);
+  SchemaProto schema = schema_generator.GenerateSchema(num_types);
+  EvenDistributionTypeSelector type_selector(schema);
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  int num_docs = state.range(0);
+  std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
+  const std::vector<DocumentProto> random_docs =
+      GenerateRandomDocuments(&type_selector, num_docs, language);
+  for (const DocumentProto& doc : random_docs) {
+    ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
+  }
+
+  SearchSpecProto search_spec = CreateSearchSpec(
+      language.at(0), std::vector<std::string>(), TermMatchType::PREFIX);
+  ResultSpecProto result_spec = CreateResultSpec(1, 1000000, 1000000);
+  ScoringSpecProto scoring_spec =
+      CreateScoringSpec(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+  for (auto _ : state) {
+    SearchResultProto results = icing->Search(
+        search_spec, ScoringSpecProto::default_instance(), result_spec);
+  }
+}
+BENCHMARK(BM_QueryLatency)
+    // Arguments: num_indexed_documents, num_sections
+    ->ArgPair(1000000, 2);
+
+void BM_IndexThroughput(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  std::default_random_engine random;
+  int num_types = kAvgNumNamespaces * kAvgNumTypes;
+  ExactStringPropertyGenerator property_generator;
+  SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
+      /*num_properties=*/state.range(1), &property_generator);
+  SchemaProto schema = schema_generator.GenerateSchema(num_types);
+  EvenDistributionTypeSelector type_selector(schema);
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  int num_docs = state.range(0);
+  std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
+  const std::vector<DocumentProto> random_docs =
+      GenerateRandomDocuments(&type_selector, num_docs, language);
+  for (auto s : state) {
+    for (const DocumentProto& doc : random_docs) {
+      ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
+    }
+  }
+  state.SetItemsProcessed(state.iterations() * num_docs);
+}
+BENCHMARK(BM_IndexThroughput)
+    // Arguments: num_indexed_documents, num_sections
+    ->ArgPair(1, 1)
+    ->ArgPair(2, 1)
+    ->ArgPair(8, 1)
+    ->ArgPair(32, 1)
+    ->ArgPair(128, 1)
+    ->ArgPair(1 << 10, 1)
+    ->ArgPair(1 << 13, 1)
+    ->ArgPair(1 << 15, 1)
+    ->ArgPair(1 << 17, 1)
+    ->ArgPair(1, 5)
+    ->ArgPair(2, 5)
+    ->ArgPair(8, 5)
+    ->ArgPair(32, 5)
+    ->ArgPair(128, 5)
+    ->ArgPair(1 << 10, 5)
+    ->ArgPair(1 << 13, 5)
+    ->ArgPair(1 << 15, 5)
+    ->ArgPair(1 << 17, 5)
+    ->ArgPair(1, 10)
+    ->ArgPair(2, 10)
+    ->ArgPair(8, 10)
+    ->ArgPair(32, 10)
+    ->ArgPair(128, 10)
+    ->ArgPair(1 << 10, 10)
+    ->ArgPair(1 << 13, 10)
+    ->ArgPair(1 << 15, 10)
+    ->ArgPair(1 << 17, 10);
+
 void BM_MutlipleIndices(benchmark::State& state) {
   // Initialize the filesystem
   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
@@ -202,11 +384,8 @@ void BM_MutlipleIndices(benchmark::State& state) {
     options.set_index_merge_size(kIcingFullIndexSize / num_indices);
     auto icing = std::make_unique<IcingSearchEngine>(options);
 
-    InitializeResultProto init_result = icing->Initialize();
-    ASSERT_THAT(init_result.status().code(), Eq(StatusProto::OK));
-
-    SetSchemaResultProto schema_result = icing->SetSchema(schema);
-    ASSERT_THAT(schema_result.status().code(), Eq(StatusProto::OK));
+    ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
     icings.push_back(std::move(icing));
   }
 
@@ -214,7 +393,7 @@ void BM_MutlipleIndices(benchmark::State& state) {
   std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
   EvenDistributionNamespaceSelector namespace_selector(namespaces);
 
-  std::vector<std::string> language = CreateLanguage(kLanguageSize, &random);
+  std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
   UniformDistributionLanguageTokenGenerator<std::default_random_engine>
       token_generator(language, &random);
 
@@ -231,8 +410,7 @@ void BM_MutlipleIndices(benchmark::State& state) {
       ASSERT_THAT(put_result.status().code(), Eq(StatusProto::UNKNOWN));
       continue;
     }
-    put_result = icings.at(i % icings.size())->Put(doc);
-    ASSERT_THAT(put_result.status().code(), Eq(StatusProto::OK));
+    ASSERT_THAT(icings.at(i % icings.size())->Put(doc).status(), ProtoIsOk());
   }
 
   // QUERY!
@@ -255,13 +433,13 @@ void BM_MutlipleIndices(benchmark::State& state) {
       continue;
     }
     result = icings.at(0)->Search(search_spec, scoring_spec, result_spec);
-    ASSERT_THAT(result.status().code(), Eq(StatusProto::OK));
+    ASSERT_THAT(result.status(), ProtoIsOk());
     while (!result.results().empty()) {
       num_results += result.results_size();
       if (!icings.empty()) {
         result = icings.at(0)->GetNextPage(result.next_page_token());
       }
-      ASSERT_THAT(result.status().code(), Eq(StatusProto::OK));
+      ASSERT_THAT(result.status(), ProtoIsOk());
     }
   }
 
@@ -307,6 +485,781 @@ BENCHMARK(BM_MutlipleIndices)
     ->ArgPair(10, 32768)
     ->ArgPair(10, 131072);
 
+void BM_SearchNoStackOverflow(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TermMatchType::PREFIX,
+                                     StringIndexingConfig::TokenizerType::PLAIN)
+                  .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document that has the term "foo"
+  DocumentProto base_document = DocumentBuilder()
+                                    .SetSchema("Message")
+                                    .SetNamespace("namespace")
+                                    .AddStringProperty("body", "foo")
+                                    .Build();
+
+  // Insert a lot of documents with the term "foo"
+  int64_t num_docs = state.range(0);
+  for (int64_t i = 0; i < num_docs; ++i) {
+    DocumentProto document =
+        DocumentBuilder(base_document).SetUri(std::to_string(i)).Build();
+    ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+  }
+
+  // Do a query and exclude documents with the term "foo". The way this is
+  // currently implemented is that we'll iterate over all the documents in the
+  // index, then apply the exclusion check. Since all our documents have "foo",
+  // we'll consider it a "miss". Previously with recursion, we would have
+  // recursed until we got a success, which would never happen causing us to
+  // recurse through all the documents and trigger a stack overflow. With
+  // the iterative implementation, we should avoid this.
+  SearchSpecProto search_spec;
+  search_spec.set_query("-foo");
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+
+  ResultSpecProto result_spec;
+  ScoringSpecProto scoring_spec;
+  for (auto s : state) {
+    icing->Search(search_spec, scoring_spec, result_spec);
+  }
+}
+// For other reasons, we hit a limit when inserting the ~350,000th document. So
+// cap the limit to 1 << 18.
+BENCHMARK(BM_SearchNoStackOverflow)
+    ->Range(/*start=*/1 << 10, /*limit=*/1 << 18);
+
+// Added for b/184373205. Ensure that we can repeatedly put documents even if
+// the underlying mmapped areas grow past a few page sizes.
+void BM_RepeatedPut(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TermMatchType::PREFIX,
+                                     StringIndexingConfig::TokenizerType::PLAIN)
+                  .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document that has the term "foo"
+  DocumentProto base_document = DocumentBuilder()
+                                    .SetSchema("Message")
+                                    .SetNamespace("namespace")
+                                    .AddStringProperty("body", "foo")
+                                    .Build();
+
+  // Insert a lot of documents with the term "foo"
+  int64_t num_docs = state.range(0);
+  for (auto s : state) {
+    for (int64_t i = 0; i < num_docs; ++i) {
+      DocumentProto document =
+          DocumentBuilder(base_document).SetUri("uri").Build();
+      ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+    }
+  }
+}
+// For other reasons, we hit a limit when inserting the ~350,000th document. So
+// cap the limit to 1 << 18.
+BENCHMARK(BM_RepeatedPut)->Range(/*start=*/100, /*limit=*/1 << 18);
+
+// This is different from BM_RepeatedPut since we're just trying to benchmark
+// one Put call, not thousands of them at once.
+void BM_Put(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document
+  DocumentProto document = DocumentBuilder()
+                               .SetSchema("Message")
+                               .SetNamespace("namespace")
+                               .SetUri("uri")
+                               .Build();
+
+  for (auto s : state) {
+    benchmark::DoNotOptimize(icing->Put(document));
+  }
+}
+BENCHMARK(BM_Put);
+
+void BM_Get(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document
+  DocumentProto document = DocumentBuilder()
+                               .SetSchema("Message")
+                               .SetNamespace("namespace")
+                               .SetUri("uri")
+                               .Build();
+
+  ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+  for (auto s : state) {
+    benchmark::DoNotOptimize(
+        icing->Get("namespace", "uri", GetResultSpecProto::default_instance()));
+  }
+}
+BENCHMARK(BM_Get);
+
+void BM_Delete(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document
+  DocumentProto document = DocumentBuilder()
+                               .SetSchema("Message")
+                               .SetNamespace("namespace")
+                               .SetUri("uri")
+                               .Build();
+
+  ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+  for (auto s : state) {
+    state.PauseTiming();
+    icing->Put(document);
+    state.ResumeTiming();
+
+    benchmark::DoNotOptimize(icing->Delete("namespace", "uri"));
+  }
+}
+BENCHMARK(BM_Delete);
+
+void BM_PutMaxAllowedDocuments(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TermMatchType::PREFIX,
+                                     StringIndexingConfig::TokenizerType::PLAIN)
+                  .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document that has the term "foo"
+  DocumentProto base_document = DocumentBuilder()
+                                    .SetSchema("Message")
+                                    .SetNamespace("namespace")
+                                    .AddStringProperty("body", "foo")
+                                    .Build();
+
+  // Insert a lot of documents with the term "foo"
+  for (auto s : state) {
+    for (int64_t i = 0; i <= kMaxDocumentId; ++i) {
+      DocumentProto document =
+          DocumentBuilder(base_document).SetUri(std::to_string(i)).Build();
+      EXPECT_THAT(icing->Put(document).status(), ProtoIsOk());
+    }
+  }
+
+  DocumentProto document =
+      DocumentBuilder(base_document).SetUri("out_of_space_uri").Build();
+  PutResultProto put_result_proto = icing->Put(document);
+  EXPECT_THAT(put_result_proto.status(),
+              ProtoStatusIs(StatusProto::OUT_OF_SPACE));
+  EXPECT_THAT(put_result_proto.status().message(),
+              HasSubstr("Exceeded maximum number of documents"));
+}
+BENCHMARK(BM_PutMaxAllowedDocuments);
+
+void BM_QueryWithSnippet(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  std::string body = "message body";
+  for (int i = 0; i < 100; i++) {
+    body = body +
+           " invent invention inventory invest investigate investigation "
+           "investigator investment nvestor invisible invitation invite "
+           "involve involved involvement IraqiI rish island";
+  }
+  for (int i = 0; i < 50; i++) {
+    DocumentProto document = DocumentBuilder()
+                                 .SetKey("namespace", "uri" + std::to_string(i))
+                                 .SetSchema("Message")
+                                 .AddStringProperty("body", body)
+                                 .Build();
+    ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
+  }
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("i");
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(10000);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(10000);
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(10000);
+
+  for (auto s : state) {
+    SearchResultProto results = icing->Search(
+        search_spec, ScoringSpecProto::default_instance(), result_spec);
+  }
+}
+BENCHMARK(BM_QueryWithSnippet);
+
+void BM_NumericIndexing(benchmark::State& state) {
+  int num_documents = state.range(0);
+  int num_integers_per_doc = state.range(1);
+
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("integer")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
+      CreateIntegerGenerator(num_documents);
+  std::vector<DocumentProto> documents;
+  documents.reserve(num_documents);
+  for (int i = 0; i < num_documents; ++i) {
+    std::vector<int64_t> integers;
+    integers.reserve(num_integers_per_doc);
+    for (int j = 0; j < num_integers_per_doc; ++j) {
+      integers.push_back(integer_generator->Generate());
+    }
+
+    DocumentProto document =
+        DocumentBuilder()
+            .SetKey("namespace", "uri" + std::to_string(i))
+            .SetSchema("Message")
+            .AddStringProperty("body", "body hello world")
+            .AddInt64Property("integer", integers.begin(), integers.end())
+            .Build();
+    documents.push_back(std::move(document));
+  }
+
+  for (auto s : state) {
+    state.PauseTiming();
+    // Create the index.
+    IcingSearchEngineOptions options;
+    options.set_base_dir(test_dir);
+    options.set_index_merge_size(kIcingFullIndexSize);
+    std::unique_ptr<IcingSearchEngine> icing =
+        std::make_unique<IcingSearchEngine>(options);
+
+    ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+    state.ResumeTiming();
+
+    for (const DocumentProto& document : documents) {
+      ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+    }
+
+    state.PauseTiming();
+    icing.reset();
+    ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
+    state.ResumeTiming();
+  }
+}
+
+BENCHMARK(BM_NumericIndexing)
+    // Arguments: num_documents, num_integers_per_doc
+    ->ArgPair(1000000, 5);
+
+void BM_NumericExactQuery(benchmark::State& state) {
+  int num_documents = state.range(0);
+  int num_integers_per_doc = state.range(1);
+
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("integer")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
+      CreateIntegerGenerator(num_documents);
+  std::unordered_set<int64_t> chosen_integer_set;
+  for (int i = 0; i < num_documents; ++i) {
+    std::vector<int64_t> integers;
+    integers.reserve(num_integers_per_doc);
+    for (int j = 0; j < num_integers_per_doc; ++j) {
+      int64_t chosen_int = integer_generator->Generate();
+      integers.push_back(chosen_int);
+      chosen_integer_set.insert(chosen_int);
+    }
+
+    DocumentProto document =
+        DocumentBuilder()
+            .SetKey("namespace", "uri" + std::to_string(i))
+            .SetSchema("Message")
+            .AddStringProperty("body", "body hello world")
+            .AddInt64Property("integer", integers.begin(), integers.end())
+            .Build();
+    ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
+  }
+
+  SearchSpecProto search_spec;
+  search_spec.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+
+  std::vector<int64_t> chosen_integers(chosen_integer_set.begin(),
+                                       chosen_integer_set.end());
+  std::uniform_int_distribution<> distrib(0, chosen_integers.size() - 1);
+  std::default_random_engine e(/*seed=*/12345);
+  for (auto s : state) {
+    int64_t exact = chosen_integers[distrib(e)];
+    search_spec.set_query("integer == " + std::to_string(exact));
+
+    SearchResultProto results =
+        icing->Search(search_spec, scoring_spec, result_spec);
+    ASSERT_THAT(results.status(), ProtoIsOk());
+    ASSERT_GT(results.results_size(), 0);
+    if (results.next_page_token() != kInvalidNextPageToken) {
+      icing->InvalidateNextPageToken(results.next_page_token());
+    }
+  }
+}
+BENCHMARK(BM_NumericExactQuery)
+    // Arguments: num_documents, num_integers_per_doc
+    ->ArgPair(1000000, 5);
+
+void BM_NumericRangeQueryAll(benchmark::State& state) {
+  int num_documents = state.range(0);
+  int num_integers_per_doc = state.range(1);
+
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("integer")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
+      CreateIntegerGenerator(num_documents);
+  for (int i = 0; i < num_documents; ++i) {
+    std::vector<int64_t> integers;
+    integers.reserve(num_integers_per_doc);
+    for (int j = 0; j < num_integers_per_doc; ++j) {
+      integers.push_back(integer_generator->Generate());
+    }
+
+    DocumentProto document =
+        DocumentBuilder()
+            .SetKey("namespace", "uri" + std::to_string(i))
+            .SetSchema("Message")
+            .AddStringProperty("body", "body hello world")
+            .AddInt64Property("integer", integers.begin(), integers.end())
+            .Build();
+    ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
+  }
+
+  SearchSpecProto search_spec;
+  search_spec.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+  search_spec.set_query("integer >= " +
+                        std::to_string(std::numeric_limits<int64_t>::min()));
+
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+
+  for (auto s : state) {
+    SearchResultProto results =
+        icing->Search(search_spec, scoring_spec, result_spec);
+    ASSERT_THAT(results.status(), ProtoIsOk());
+    ASSERT_GT(results.results_size(), 0);
+    if (results.next_page_token() != kInvalidNextPageToken) {
+      icing->InvalidateNextPageToken(results.next_page_token());
+    }
+  }
+}
+BENCHMARK(BM_NumericRangeQueryAll)
+    // Arguments: num_documents, num_integers_per_doc
+    ->ArgPair(1000000, 5);
+
+void BM_JoinQueryQualifiedId(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("firstName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("lastName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("personQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  options.set_document_store_namespace_id_fingerprint(true);
+  options.set_use_new_qualified_id_join_index(true);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create Person documents (parent)
+  static constexpr int kNumPersonDocuments = 1000;
+  for (int i = 0; i < kNumPersonDocuments; ++i) {
+    std::string person_id = std::to_string(i);
+    DocumentProto person =
+        DocumentBuilder()
+            .SetKey("pkg$db/namespace", "person" + person_id)
+            .SetSchema("Person")
+            .AddStringProperty("firstName", "first" + person_id)
+            .AddStringProperty("lastName", "last" + person_id)
+            .AddStringProperty("emailAddress",
+                               "person" + person_id + "@gmail.com")
+            .Build();
+    ASSERT_THAT(icing->Put(std::move(person)).status(), ProtoIsOk());
+  }
+
+  // Create Email documents (child)
+  static constexpr int kNumEmailDocuments = 1000;
+  std::uniform_int_distribution<> distrib(0, kNumPersonDocuments - 1);
+  std::default_random_engine e(/*seed=*/12345);
+  for (int i = 0; i < kNumEmailDocuments; ++i) {
+    std::string email_id = std::to_string(i);
+    std::string person_id = std::to_string(distrib(e));
+    DocumentProto email =
+        DocumentBuilder()
+            .SetKey("namespace", "email" + email_id)
+            .SetSchema("Email")
+            .AddStringProperty("subject", "test subject " + email_id)
+            .AddStringProperty("body", "message body")
+            .AddStringProperty("personQualifiedId",
+                               "pkg$db/namespace#person" + person_id)
+            .Build();
+    ASSERT_THAT(icing->Put(std::move(email)).status(), ProtoIsOk());
+  }
+
+  // Parent SearchSpec
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("firstName:first");
+
+  // JoinSpec
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("personQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::MAX);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+  nested_search_spec->set_query("subject:test");
+  *nested_spec->mutable_scoring_spec() = ScoringSpecProto::default_instance();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  static constexpr int kNumPerPage = 10;
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(kNumPerPage);
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  ScoringSpecProto score_spec = ScoringSpecProto::default_instance();
+
+  const auto child_count_reduce_func =
+      [](int child_count, const SearchResultProto::ResultProto& result) -> int {
+    return child_count + result.joined_results_size();
+  };
+  for (auto s : state) {
+    int total_parent_count = 0;
+    int total_child_count = 0;
+    SearchResultProto results =
+        icing->Search(search_spec, score_spec, result_spec);
+    total_parent_count += results.results_size();
+    total_child_count +=
+        std::reduce(results.results().begin(), results.results().end(), 0,
+                    child_count_reduce_func);
+
+    ASSERT_THAT(total_parent_count, Eq(kNumPerPage));
+    ASSERT_THAT(total_child_count, ::testing::Ge(0));
+  }
+}
+BENCHMARK(BM_JoinQueryQualifiedId);
+
+void BM_PersistToDisk(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  std::default_random_engine random;
+  int num_types = kAvgNumNamespaces * kAvgNumTypes;
+  ExactStringPropertyGenerator property_generator;
+  SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
+      /*num_properties=*/state.range(1), &property_generator);
+  SchemaProto schema = schema_generator.GenerateSchema(num_types);
+  EvenDistributionTypeSelector type_selector(schema);
+
+  // Generate documents.
+  int num_docs = state.range(0);
+  std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
+  const std::vector<DocumentProto> random_docs =
+      GenerateRandomDocuments(&type_selector, num_docs, language);
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    // Create the index.
+    IcingSearchEngineOptions options;
+    options.set_base_dir(test_dir);
+    options.set_index_merge_size(kIcingFullIndexSize);
+    options.set_use_persistent_hash_map(true);
+    std::unique_ptr<IcingSearchEngine> icing =
+        std::make_unique<IcingSearchEngine>(options);
+
+    ASSERT_THAT(icing->Reset().status(), ProtoIsOk());
+    ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+    for (const DocumentProto& doc : random_docs) {
+      ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
+    }
+
+    state.ResumeTiming();
+
+    ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+    state.PauseTiming();
+    icing.reset();
+    ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
+    state.ResumeTiming();
+  }
+}
+BENCHMARK(BM_PersistToDisk)
+    // Arguments: num_indexed_documents, num_sections
+    ->ArgPair(1024, 5);
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/icing-search-engine_delete_test.cc b/icing/icing-search-engine_delete_test.cc
new file mode 100644
index 0000000..c3b1ccd
--- /dev/null
+++ b/icing/icing-search-engine_delete_test.cc
@@ -0,0 +1,768 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/icing-search-engine.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Return;
+using ::testing::SizeIs;
+using ::testing::StrEq;
+using ::testing::UnorderedElementsAre;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+  TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+                        std::unique_ptr<const Filesystem> filesystem,
+                        std::unique_ptr<const IcingFilesystem> icing_filesystem,
+                        std::unique_ptr<Clock> clock,
+                        std::unique_ptr<JniCache> jni_cache)
+      : IcingSearchEngine(options, std::move(filesystem),
+                          std::move(icing_filesystem), std::move(clock),
+                          std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to IcingSearchEngine::Delete*.
+class IcingSearchEngineDeleteTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      // If we've specified using the reverse-JNI method for segmentation (i.e.
+      // not ICU), then we won't have the ICU data file included to set up.
+      // Technically, we could choose to use reverse-JNI for segmentation AND
+      // include an ICU data file, but that seems unlikely and our current BUILD
+      // setup doesn't do this.
+      // File generated via icu_data_file rule in //icing/BUILD.
+      std::string icu_data_file_path =
+          GetTestFilePath("icing/icu.dat");
+      ICING_ASSERT_OK(
+          icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+    }
+    filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+  Filesystem filesystem_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_base_dir(GetTestBaseDir());
+  return icing_options;
+}
+
+SchemaProto CreateMessageSchema() {
+  return SchemaBuilder()
+      .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+          PropertyConfigBuilder()
+              .SetName("body")
+              .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+              .SetCardinality(CARDINALITY_REQUIRED)))
+      .Build();
+}
+
+SchemaProto CreateEmailSchema() {
+  return SchemaBuilder()
+      .AddType(SchemaTypeConfigBuilder()
+                   .SetType("Email")
+                   .AddProperty(PropertyConfigBuilder()
+                                    .SetName("body")
+                                    .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                       TOKENIZER_PLAIN)
+                                    .SetCardinality(CARDINALITY_REQUIRED))
+                   .AddProperty(PropertyConfigBuilder()
+                                    .SetName("subject")
+                                    .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                       TOKENIZER_PLAIN)
+                                    .SetCardinality(CARDINALITY_REQUIRED)))
+      .Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  return scoring_spec;
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteBySchemaType) {
+  SchemaProto schema;
+  // Add an email type
+  auto type = schema.add_types();
+  type->set_schema_type("email");
+  auto property = type->add_properties();
+  property->set_property_name("subject");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  property->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
+  // Add an message type
+  type = schema.add_types();
+  type->set_schema_type("message");
+  property = type->add_properties();
+  property->set_property_name("body");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  property->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("message")
+          .AddStringProperty("body", "message body1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri2")
+          .SetSchema("email")
+          .AddStringProperty("subject", "message body2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(7);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document1;
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  *expected_get_result_proto.mutable_document() = document2;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Delete the first type. The first doc should be irretrievable. The
+  // second should still be present.
+  DeleteBySchemaTypeResultProto result_proto =
+      icing.DeleteBySchemaType("message");
+  EXPECT_THAT(result_proto.status(), ProtoIsOk());
+  DeleteStatsProto exp_stats;
+  exp_stats.set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
+  exp_stats.set_latency_ms(7);
+  exp_stats.set_num_documents_deleted(1);
+  EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
+
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace1, uri1) not found.");
+  expected_get_result_proto.clear_document();
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  expected_get_result_proto.mutable_status()->clear_message();
+  *expected_get_result_proto.mutable_document() = document2;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Search for "message", only document2 should show up.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("message");
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteSchemaTypeByQuery) {
+  SchemaProto schema = CreateMessageSchema();
+  // Add an email type
+  SchemaProto tmp = CreateEmailSchema();
+  *schema.add_types() = tmp.types(0);
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema(schema.types(0).schema_type())
+          .AddStringProperty("body", "message body1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri2")
+          .SetSchema(schema.types(1).schema_type())
+          .AddStringProperty("subject", "subject subject2")
+          .AddStringProperty("body", "message body2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document1;
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  *expected_get_result_proto.mutable_document() = document2;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Delete the first type. The first doc should be irretrievable. The
+  // second should still be present.
+  SearchSpecProto search_spec;
+  search_spec.add_schema_type_filters(schema.types(0).schema_type());
+  EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
+
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace1, uri1) not found.");
+  expected_get_result_proto.clear_document();
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  expected_get_result_proto.mutable_status()->clear_message();
+  *expected_get_result_proto.mutable_document() = document2;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  search_spec = SearchSpecProto::default_instance();
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteByNamespace) {
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace3", "uri3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(7);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document1;
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  *expected_get_result_proto.mutable_document() = document2;
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  *expected_get_result_proto.mutable_document() = document3;
+  EXPECT_THAT(
+      icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Delete namespace1. Document1 and document2 should be irretrievable.
+  // Document3 should still be present.
+  DeleteByNamespaceResultProto result_proto =
+      icing.DeleteByNamespace("namespace1");
+  EXPECT_THAT(result_proto.status(), ProtoIsOk());
+  DeleteStatsProto exp_stats;
+  exp_stats.set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
+  exp_stats.set_latency_ms(7);
+  exp_stats.set_num_documents_deleted(2);
+  EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
+
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace1, uri1) not found.");
+  expected_get_result_proto.clear_document();
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace1, uri2) not found.");
+  expected_get_result_proto.clear_document();
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  expected_get_result_proto.mutable_status()->clear_message();
+  *expected_get_result_proto.mutable_document() = document3;
+  EXPECT_THAT(
+      icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Search for "message", only document3 should show up.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document3;
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("message");
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteNamespaceByQuery) {
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document1;
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  *expected_get_result_proto.mutable_document() = document2;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Delete the first namespace. The first doc should be irretrievable. The
+  // second should still be present.
+  SearchSpecProto search_spec;
+  search_spec.add_namespace_filters("namespace1");
+  EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
+
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace1, uri1) not found.");
+  expected_get_result_proto.clear_document();
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  expected_get_result_proto.mutable_status()->clear_message();
+  *expected_get_result_proto.mutable_document() = document2;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  search_spec = SearchSpecProto::default_instance();
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteByQuery) {
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(7);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document1;
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  *expected_get_result_proto.mutable_document() = document2;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Delete all docs containing 'body1'. The first doc should be irretrievable.
+  // The second should still be present.
+  SearchSpecProto search_spec;
+  search_spec.set_query("body1");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec);
+  EXPECT_THAT(result_proto.status(), ProtoIsOk());
+  DeleteByQueryStatsProto exp_stats;
+  exp_stats.set_latency_ms(7);
+  exp_stats.set_num_documents_deleted(1);
+  exp_stats.set_query_length(search_spec.query().length());
+  exp_stats.set_num_terms(1);
+  exp_stats.set_num_namespaces_filtered(0);
+  exp_stats.set_num_schema_types_filtered(0);
+  exp_stats.set_parse_query_latency_ms(7);
+  exp_stats.set_document_removal_latency_ms(7);
+  EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
+
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace1, uri1) not found.");
+  expected_get_result_proto.clear_document();
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  expected_get_result_proto.mutable_status()->clear_message();
+  *expected_get_result_proto.mutable_document() = document2;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  search_spec = SearchSpecProto::default_instance();
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteByQueryReturnInfo) {
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body3")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(7);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document1;
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  *expected_get_result_proto.mutable_document() = document2;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  *expected_get_result_proto.mutable_document() = document3;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Delete all docs to test the information is correctly grouped.
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  DeleteByQueryResultProto result_proto =
+      icing.DeleteByQuery(search_spec, true);
+  EXPECT_THAT(result_proto.status(), ProtoIsOk());
+  DeleteByQueryStatsProto exp_stats;
+  exp_stats.set_latency_ms(7);
+  exp_stats.set_num_documents_deleted(3);
+  exp_stats.set_query_length(search_spec.query().length());
+  exp_stats.set_num_terms(1);
+  exp_stats.set_num_namespaces_filtered(0);
+  exp_stats.set_num_schema_types_filtered(0);
+  exp_stats.set_parse_query_latency_ms(7);
+  exp_stats.set_document_removal_latency_ms(7);
+  EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
+
+  // Check that DeleteByQuery can return information for deleted documents.
+  DeleteByQueryResultProto::DocumentGroupInfo info1, info2;
+  info1.set_namespace_("namespace1");
+  info1.set_schema("Message");
+  info1.add_uris("uri1");
+  info2.set_namespace_("namespace2");
+  info2.set_schema("Message");
+  info2.add_uris("uri3");
+  info2.add_uris("uri2");
+  EXPECT_THAT(result_proto.deleted_documents(),
+              UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2)));
+
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance())
+          .status()
+          .code(),
+      Eq(StatusProto::NOT_FOUND));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance())
+          .status()
+          .code(),
+      Eq(StatusProto::NOT_FOUND));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance())
+          .status()
+          .code(),
+      Eq(StatusProto::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteByQueryNotFound) {
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document1;
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  *expected_get_result_proto.mutable_document() = document2;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Delete all docs containing 'foo', which should be none of them. Both docs
+  // should still be present.
+  SearchSpecProto search_spec;
+  search_spec.set_query("foo");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(icing.DeleteByQuery(search_spec).status(),
+              ProtoStatusIs(StatusProto::NOT_FOUND));
+
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  expected_get_result_proto.mutable_status()->clear_message();
+  *expected_get_result_proto.mutable_document() = document1;
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  expected_get_result_proto.mutable_status()->clear_message();
+  *expected_get_result_proto.mutable_document() = document2;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  search_spec = SearchSpecProto::default_instance();
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document1;
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/icing-search-engine_flush_benchmark.cc b/icing/icing-search-engine_flush_benchmark.cc
new file mode 100644
index 0000000..3196ef6
--- /dev/null
+++ b/icing/icing-search-engine_flush_benchmark.cc
@@ -0,0 +1,199 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <unistd.h>
+
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <ostream>
+#include <random>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <vector>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/document-generator.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/schema-generator.h"
+#include "icing/testing/tmp-directory.h"
+
+// Run on a Linux workstation:
+//    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing:icing-search-engine_flush_benchmark
+//
+//    $ blaze-bin/icing/icing-search-engine_flush_benchmark
+//    --benchmark_filter=all --benchmark_memory_usage
+//
+// Run on an Android device:
+//    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//    --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing:icing-search-engine_flush_benchmark
+//
+//    $ adb push blaze-bin/icing/icing-search-engine_flush_benchmark
+//    /data/local/tmp/
+//
+//    $ adb shell /data/local/tmp/icing-search-engine_flush_benchmark
+//    --benchmark_filter=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Assume that there will be roughly 10 packages, each using 3 of its own types.
+constexpr int kAvgNumNamespaces = 10;
+constexpr int kAvgNumTypes = 3;
+
+// ASSUME: Types will have at most ten properties. Types will be created with
+// [1, 10] properties.
+constexpr int kMaxNumProperties = 10;
+
+// Based on logs from Icing GMSCore.
+constexpr int kAvgDocumentSize = 300;
+
+// ASSUME: ~75% of the document's size comes from its content.
+constexpr float kContentSizePct = 0.7;
+
+// Average length of word in English is 4.7 characters.
+constexpr int kAvgTokenLen = 5;
+// Made up value. This results in a fairly reasonable language - the majority of
+// generated words are 3-9 characters, ~3% of words are >=20 chars, and the
+// longest ones are 27 chars, (roughly consistent with the longest,
+// non-contrived English words
+// https://en.wikipedia.org/wiki/Longest_word_in_English)
+constexpr int kTokenStdDev = 7;
+constexpr int kLanguageSize = 1000;
+
+// The number of documents to index.
+constexpr int kNumDocuments = 1024;
+
+std::vector<std::string> CreateNamespaces(int num_namespaces) {
+  std::vector<std::string> namespaces;
+  while (--num_namespaces >= 0) {
+    namespaces.push_back("comgooglepackage" + std::to_string(num_namespaces));
+  }
+  return namespaces;
+}
+
+// Creates a vector containing num_words randomly-generated words for use by
+// documents.
+template <typename Rand>
+std::vector<std::string> CreateLanguage(int num_words, Rand* r) {
+  std::vector<std::string> language;
+  std::normal_distribution<> norm_dist(kAvgTokenLen, kTokenStdDev);
+  while (--num_words >= 0) {
+    int word_length = 0;
+    while (word_length < 1) {
+      word_length = std::round(norm_dist(*r));
+    }
+    language.push_back(RandomString(kAlNumAlphabet, word_length, r));
+  }
+  return language;
+}
+
+class DestructibleDirectory {
+ public:
+  explicit DestructibleDirectory(const Filesystem& filesystem,
+                                 const std::string& dir)
+      : filesystem_(filesystem), dir_(dir) {
+    filesystem_.CreateDirectoryRecursively(dir_.c_str());
+  }
+  ~DestructibleDirectory() {
+    filesystem_.DeleteDirectoryRecursively(dir_.c_str());
+  }
+
+ private:
+  Filesystem filesystem_;
+  std::string dir_;
+};
+
+void BM_FlushBenchmark(benchmark::State& state) {
+  PersistType::Code persist_type =
+      (state.range(0)) ? PersistType::LITE : PersistType::FULL;
+  int num_documents_per_persist = state.range(1);
+
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark/flush";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  std::default_random_engine random;
+  int num_types = kAvgNumNamespaces * kAvgNumTypes;
+  ExactStringPropertyGenerator property_generator;
+  RandomSchemaGenerator<std::default_random_engine,
+                        ExactStringPropertyGenerator>
+      schema_generator(&random, &property_generator);
+  SchemaProto schema =
+      schema_generator.GenerateSchema(num_types, kMaxNumProperties);
+  EvenDistributionTypeSelector type_selector(schema);
+
+  std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
+  EvenDistributionNamespaceSelector namespace_selector(namespaces);
+
+  std::vector<std::string> language = CreateLanguage(kLanguageSize, &random);
+  UniformDistributionLanguageTokenGenerator<std::default_random_engine>
+      token_generator(language, &random);
+
+  DocumentGenerator<
+      EvenDistributionNamespaceSelector, EvenDistributionTypeSelector,
+      UniformDistributionLanguageTokenGenerator<std::default_random_engine>>
+      generator(&namespace_selector, &type_selector, &token_generator,
+                kAvgDocumentSize * kContentSizePct);
+
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+  for (auto s : state) {
+    for (int i = 0; i < kNumDocuments; ++i) {
+      icing->Put(generator.generateDoc());
+
+      if (i % num_documents_per_persist == num_documents_per_persist - 1) {
+        icing->PersistToDisk(persist_type);
+      }
+    }
+  }
+}
+BENCHMARK(BM_FlushBenchmark)
+    // First argument: lite_flush,
+    // Second argument: num_document_per_lite_flush
+    ->ArgPair(true, 1)
+    ->ArgPair(false, 1)
+    ->ArgPair(true, 32)
+    ->ArgPair(false, 32)
+    ->ArgPair(true, 1024)
+    ->ArgPair(false, 1024);
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/icing-search-engine_fuzz_test.cc b/icing/icing-search-engine_fuzz_test.cc
index d31f836..2cf19ad 100644
--- a/icing/icing-search-engine_fuzz_test.cc
+++ b/icing/icing-search-engine_fuzz_test.cc
@@ -18,11 +18,15 @@
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/document-builder.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/icing-search-engine.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/initialize.pb.h"
+#include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 
@@ -36,20 +40,6 @@ IcingSearchEngineOptions Setup() {
   return icing_options;
 }
 
-SchemaProto SetTypes() {
-  SchemaProto schema;
-  SchemaTypeConfigProto* type = schema.add_types();
-  type->set_schema_type("Message");
-  PropertyConfigProto* body = type->add_properties();
-  body->set_property_name("body");
-  body->set_data_type(PropertyConfigProto::DataType::STRING);
-  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-  return schema;
-}
-
 DocumentProto MakeDocument(const uint8_t* data, size_t size) {
   // TODO (sidchhabra): Added more optimized fuzzing techniques.
   DocumentProto document;
@@ -82,7 +72,15 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
   // TODO (b/145758378): Deleting directory should not be required.
   filesystem_.DeleteDirectoryRecursively(icing_options.base_dir().c_str());
   icing.Initialize();
-  SchemaProto schema_proto = SetTypes();
+
+  SchemaProto schema_proto =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
   icing.SetSchema(schema_proto);
 
   // Index
diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc
new file mode 100644
index 0000000..122e4af
--- /dev/null
+++ b/icing/icing-search-engine_initialization_test.cc
@@ -0,0 +1,6030 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/document-builder.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/file/version-util.h"
+#include "icing/icing-search-engine.h"
+#include "icing/index/data-indexing-handler.h"
+#include "icing/index/index-processor.h"
+#include "icing/index/index.h"
+#include "icing/index/integer-section-indexing-handler.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/integer-index.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/term-indexing-handler.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/join/join-processor.h"
+#include "icing/join/qualified-id-join-index-impl-v2.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/query/query-features.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-associated-score-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-log-creator.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::_;
+using ::testing::AtLeast;
+using ::testing::DoDefault;
+using ::testing::EndsWith;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Matcher;
+using ::testing::Ne;
+using ::testing::Return;
+using ::testing::SizeIs;
+
+constexpr std::string_view kIpsumText =
+    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis "
+    "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida "
+    "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam "
+    "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo "
+    "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, "
+    "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula "
+    "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et "
+    "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, "
+    "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis "
+    "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. "
+    "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. "
+    "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur "
+    "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
+    "placerat semper.";
+
+PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
+    Filesystem filesystem, const std::string& file_path) {
+  PortableFileBackedProtoLog<DocumentWrapper>::Header header;
+  filesystem.PRead(file_path.c_str(), &header,
+                   sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
+                   /*offset=*/0);
+  return header;
+}
+
+void WriteDocumentLogHeader(
+    Filesystem filesystem, const std::string& file_path,
+    PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
+  filesystem.Write(file_path.c_str(), &header,
+                   sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
+}
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+  TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+                        std::unique_ptr<const Filesystem> filesystem,
+                        std::unique_ptr<const IcingFilesystem> icing_filesystem,
+                        std::unique_ptr<Clock> clock,
+                        std::unique_ptr<JniCache> jni_cache)
+      : IcingSearchEngine(options, std::move(filesystem),
+                          std::move(icing_filesystem), std::move(clock),
+                          std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to
+// IcingSearchEngine::Initialize.
+class IcingSearchEngineInitializationTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      // If we've specified using the reverse-JNI method for segmentation (i.e.
+      // not ICU), then we won't have the ICU data file included to set up.
+      // Technically, we could choose to use reverse-JNI for segmentation AND
+      // include an ICU data file, but that seems unlikely and our current BUILD
+      // setup doesn't do this.
+      // File generated via icu_data_file rule in //icing/BUILD.
+      std::string icu_data_file_path =
+          GetTestFilePath("icing/icu.dat");
+      ICING_ASSERT_OK(
+          icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+    }
+    filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+
+    language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        lang_segmenter_,
+        language_segmenter_factory::Create(std::move(segmenter_options)));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        normalizer_,
+        normalizer_factory::Create(
+            /*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
+  }
+
+  void TearDown() override {
+    normalizer_.reset();
+    lang_segmenter_.reset();
+    filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  const Filesystem* filesystem() const { return &filesystem_; }
+
+  const IcingFilesystem* icing_filesystem() const { return &icing_filesystem_; }
+
+  Filesystem filesystem_;
+  IcingFilesystem icing_filesystem_;
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+  std::unique_ptr<Normalizer> normalizer_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+std::string GetVersionFilename() { return GetTestBaseDir() + "/version"; }
+
+std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; }
+
+std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; }
+
+std::string GetIntegerIndexDir() {
+  return GetTestBaseDir() + "/integer_index_dir";
+}
+
+std::string GetQualifiedIdJoinIndexDir() {
+  return GetTestBaseDir() + "/qualified_id_join_index_dir";
+}
+
+std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
+
+std::string GetHeaderFilename() {
+  return GetTestBaseDir() + "/icing_search_engine_header";
+}
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_base_dir(GetTestBaseDir());
+  icing_options.set_document_store_namespace_id_fingerprint(true);
+  icing_options.set_use_new_qualified_id_join_index(true);
+  return icing_options;
+}
+
+DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
+  return DocumentBuilder()
+      .SetKey(std::move(name_space), std::move(uri))
+      .SetSchema("Message")
+      .AddStringProperty("body", "message body")
+      .AddInt64Property("indexableInteger", 123)
+      .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+      .Build();
+}
+
+DocumentProto CreateEmailDocument(const std::string& name_space,
+                                  const std::string& uri, int score,
+                                  const std::string& subject_content,
+                                  const std::string& body_content) {
+  return DocumentBuilder()
+      .SetKey(name_space, uri)
+      .SetSchema("Email")
+      .SetScore(score)
+      .AddStringProperty("subject", subject_content)
+      .AddStringProperty("body", body_content)
+      .Build();
+}
+
+SchemaTypeConfigProto CreateMessageSchemaTypeConfig() {
+  return SchemaTypeConfigBuilder()
+      .SetType("Message")
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName("body")
+                       .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                       .SetCardinality(CARDINALITY_REQUIRED))
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName("indexableInteger")
+                       .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                       .SetCardinality(CARDINALITY_REQUIRED))
+      .Build();
+}
+
+SchemaTypeConfigProto CreateEmailSchemaTypeConfig() {
+  return SchemaTypeConfigBuilder()
+      .SetType("Email")
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName("body")
+                       .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                       .SetCardinality(CARDINALITY_REQUIRED))
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName("subject")
+                       .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                       .SetCardinality(CARDINALITY_REQUIRED))
+      .Build();
+}
+
+SchemaProto CreateMessageSchema() {
+  return SchemaBuilder().AddType(CreateMessageSchemaTypeConfig()).Build();
+}
+
+SchemaProto CreateEmailSchema() {
+  return SchemaBuilder().AddType(CreateEmailSchemaTypeConfig()).Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  return scoring_spec;
+}
+
+// TODO(b/272145329): create SearchSpecBuilder, JoinSpecBuilder,
+// SearchResultProtoBuilder and ResultProtoBuilder for unit tests and build all
+// instances by them.
+
+TEST_F(IcingSearchEngineInitializationTest, UninitializedInstanceFailsSafely) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
+  SchemaProto email_schema = CreateMessageSchema();
+  EXPECT_THAT(icing.SetSchema(email_schema).status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(icing.GetSchema().status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+  DocumentProto doc = CreateMessageDocument("namespace", "uri");
+  EXPECT_THAT(icing.Put(doc).status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(icing
+                  .Get(doc.namespace_(), doc.uri(),
+                       GetResultSpecProto::default_instance())
+                  .status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type())
+                  .status()
+                  .code(),
+              Eq(StatusProto::FAILED_PRECONDITION));
+
+  SearchSpecProto search_spec = SearchSpecProto::default_instance();
+  ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance();
+  ResultSpecProto result_spec = ResultSpecProto::default_instance();
+  EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  constexpr int kSomePageToken = 12;
+  EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  icing.InvalidateNextPageToken(kSomePageToken);  // Verify this doesn't crash.
+
+  EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(icing.Optimize().status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, SimpleInitialization) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(DocumentProto(document)).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializingAgainSavesNonPersistedData) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document;
+
+  ASSERT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       MaxIndexMergeSizeReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_index_merge_size(std::numeric_limits<int32_t>::max());
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       NegativeMergeSizeReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_index_merge_size(-1);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       ZeroMergeSizeReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_index_merge_size(0);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, GoodIndexMergeSizeReturnsOk) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  // One is fine, if a bit weird. It just means that the lite index will be
+  // smaller and will request a merge any time content is added to it.
+  options.set_index_merge_size(1);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       NegativeMaxTokenLenReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_max_token_length(-1);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       ZeroMaxTokenLenReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_max_token_length(0);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       NegativeCompressionLevelReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_compression_level(-1);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       GreaterThanMaxCompressionLevelReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_compression_level(10);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, GoodCompressionLevelReturnsOk) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_compression_level(0);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       ReinitializingWithDifferentCompressionLevelReturnsOk) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_compression_level(3);
+  {
+    IcingSearchEngine icing(options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+    DocumentProto document = CreateMessageDocument("namespace", "uri");
+    ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+    ASSERT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+  }
+  options.set_compression_level(9);
+  {
+    IcingSearchEngine icing(options, GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  }
+  options.set_compression_level(0);
+  {
+    IcingSearchEngine icing(options, GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest, FailToCreateDocStore) {
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  // This fails DocumentStore::Create()
+  ON_CALL(*mock_filesystem, CreateDirectoryRecursively(_))
+      .WillByDefault(Return(false));
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+
+  InitializeResultProto initialize_result_proto = icing.Initialize();
+  EXPECT_THAT(initialize_result_proto.status(),
+              ProtoStatusIs(StatusProto::INTERNAL));
+  EXPECT_THAT(initialize_result_proto.status().message(),
+              HasSubstr("Could not create directory"));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitMarkerFilePreviousFailuresAtThreshold) {
+  Filesystem filesystem;
+  DocumentProto email1 =
+      CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
+  email1.set_creation_timestamp_ms(10000);
+  DocumentProto email2 =
+      CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
+  email2.set_creation_timestamp_ms(10000);
+
+  {
+    // Create an index with a few documents.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoIsOk());
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(0));
+    ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+  }
+
+  // Write an init marker file with 5 previously failed attempts.
+  std::string marker_filepath = GetTestBaseDir() + "/init_marker";
+
+  {
+    ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
+    int network_init_attempts = GHostToNetworkL(5);
+    // Write the updated number of attempts before we get started.
+    ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
+                                  &network_init_attempts,
+                                  sizeof(network_init_attempts)));
+    ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
+  }
+
+  {
+    // Create the index again and verify that initialization succeeds and no
+    // data is thrown out.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoIsOk());
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(5));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+            .document(),
+        EqualsProto(email1));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+            .document(),
+        EqualsProto(email2));
+  }
+
+  // The successful init should have thrown out the marker file.
+  ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitMarkerFilePreviousFailuresBeyondThreshold) {
+  Filesystem filesystem;
+  DocumentProto email1 =
+      CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
+  DocumentProto email2 =
+      CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
+
+  {
+    // Create an index with a few documents.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoIsOk());
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(0));
+    ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+  }
+
+  // Write an init marker file with 6 previously failed attempts.
+  std::string marker_filepath = GetTestBaseDir() + "/init_marker";
+
+  {
+    ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
+    int network_init_attempts = GHostToNetworkL(6);
+    // Write the updated number of attempts before we get started.
+    ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
+                                  &network_init_attempts,
+                                  sizeof(network_init_attempts)));
+    ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
+  }
+
+  {
+    // Create the index again and verify that initialization succeeds and all
+    // data is thrown out.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(),
+                ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(6));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoStatusIs(StatusProto::NOT_FOUND));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoStatusIs(StatusProto::NOT_FOUND));
+  }
+
+  // The successful init should have thrown out the marker file.
+  ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       SuccessiveInitFailuresIncrementsInitMarker) {
+  Filesystem filesystem;
+  DocumentProto email1 =
+      CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
+  DocumentProto email2 =
+      CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
+
+  {
+    // 1. Create an index with a few documents.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoIsOk());
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(0));
+    ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+  }
+
+  {
+    // 2. Create an index that will encounter an IO failure when trying to
+    // create the document log.
+    IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    std::string document_log_filepath =
+        icing_options.base_dir() + "/document_dir/document_log_v1";
+    ON_CALL(*mock_filesystem,
+            GetFileSize(Matcher<const char*>(Eq(document_log_filepath))))
+        .WillByDefault(Return(Filesystem::kBadFileSize));
+
+    TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
+                                std::make_unique<IcingFilesystem>(),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+
+    // Fail to initialize six times in a row.
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(0));
+
+    init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(1));
+
+    init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(2));
+
+    init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(3));
+
+    init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(4));
+
+    init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(5));
+  }
+
+  {
+    // 3. Create the index again and verify that initialization succeeds and all
+    // data is thrown out.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    ASSERT_THAT(init_result.status(),
+                ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+    ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+                Eq(6));
+
+    EXPECT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoStatusIs(StatusProto::NOT_FOUND));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoStatusIs(StatusProto::NOT_FOUND));
+  }
+
+  // The successful init should have thrown out the marker file.
+  std::string marker_filepath = GetTestBaseDir() + "/init_marker";
+  ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromMissingHeaderFile) {
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      CreateMessageDocument("namespace", "uri");
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() =
+      CreateMessageDocument("namespace", "uri");
+
+  {
+    // Basic initialization/setup
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+                ProtoIsOk());
+    EXPECT_THAT(
+        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
+    SearchResultProto search_result_proto =
+        icing.Search(search_spec, GetDefaultScoringSpec(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
+
+  // We should be able to recover from this and access all our previous data
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Checks that DocumentLog is still ok
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Checks that the term index is still ok so we can search over it
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+
+  // Checks that the integer index is still ok so we can search over it
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("indexableInteger == 123");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  SearchResultProto search_result_google::protobuf =
+      icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Checks that Schema is still since it'll be needed to validate the document
+  EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+              ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineInitializationTest, UnableToRecoverFromCorruptSchema) {
+  {
+    // Basic initialization/setup
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+                ProtoIsOk());
+
+    GetResultProto expected_get_result_proto;
+    expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+    *expected_get_result_proto.mutable_document() =
+        CreateMessageDocument("namespace", "uri");
+
+    EXPECT_THAT(
+        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  const std::string schema_file =
+      absl_ports::StrCat(GetSchemaDir(), "/schema.pb");
+  const std::string corrupt_data = "1234";
+  EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(),
+                                  corrupt_data.size()));
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(),
+              ProtoStatusIs(StatusProto::INTERNAL));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       UnableToRecoverFromCorruptDocumentLog) {
+  {
+    // Basic initialization/setup
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+                ProtoIsOk());
+
+    GetResultProto expected_get_result_proto;
+    expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+    *expected_get_result_proto.mutable_document() =
+        CreateMessageDocument("namespace", "uri");
+
+    EXPECT_THAT(
+        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  const std::string document_log_file = absl_ports::StrCat(
+      GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+  const std::string corrupt_data = "1234";
+  EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
+                                  corrupt_data.data(), corrupt_data.size()));
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(),
+              ProtoStatusIs(StatusProto::INTERNAL));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       RecoverFromInconsistentSchemaStore) {
+  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+  DocumentProto document2_with_additional_property =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("additional", "content")
+          .AddStringProperty("body", "message body")
+          .AddInt64Property("indexableInteger", 123)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  {
+    // Initializes folder and schema
+    IcingSearchEngine icing(options, GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
+                    // Add non-indexable property "additional"
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("additional")
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(document2_with_additional_property).status(),
+                ProtoIsOk());
+
+    // Won't get us anything because "additional" isn't marked as an indexed
+    // property in the schema
+    SearchSpecProto search_spec;
+    search_spec.set_query("additional:content");
+    search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+    SearchResultProto expected_search_result_proto;
+    expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+    SearchResultProto search_result_proto =
+        icing.Search(search_spec, GetDefaultScoringSpec(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  {
+    // This schema will change the SchemaTypeIds from the previous schema_
+    // (since SchemaTypeIds are assigned based on order of the types, and this
+    // new schema changes the ordering of previous types)
+    SchemaProto new_schema;
+    auto type = new_schema.add_types();
+    type->set_schema_type("Email");
+
+    // Switching a non-indexable property to indexable changes the SectionIds
+    // (since SectionIds are assigned based on alphabetical order of indexed
+    // sections, marking "additional" as an indexed property will push the
+    // "body" and "indexableInteger" property to different SectionIds)
+    *new_schema.add_types() =
+        SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
+            .AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("additional")
+                    .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                    .SetCardinality(CARDINALITY_OPTIONAL))
+            .Build();
+
+    // Write the marker file
+    std::string marker_filepath =
+        absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
+    ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
+    ASSERT_TRUE(sfd.is_valid());
+
+    // Write the new schema
+    FakeClock fake_clock;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+    ICING_EXPECT_OK(schema_store->SetSchema(
+        new_schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+  }  // Will persist new schema
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // We can insert a Email document since we kept the new schema
+  DocumentProto email_document =
+      DocumentBuilder()
+          .SetKey("namespace", "email_uri")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = email_document;
+
+  EXPECT_THAT(icing.Get("namespace", "email_uri",
+                        GetResultSpecProto::default_instance()),
+              EqualsProto(expected_get_result_proto));
+
+  // Verify term search
+  SearchSpecProto search_spec1;
+
+  // The section restrict will ensure we are using the correct, updated
+  // SectionId in the Index
+  search_spec1.set_query("additional:content");
+
+  // Schema type filter will ensure we're using the correct, updated
+  // SchemaTypeId in the DocumentStore
+  search_spec1.add_schema_type_filters("Message");
+  search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_search_result_proto1;
+  expected_search_result_proto1.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto1.mutable_results()->Add()->mutable_document() =
+      document2_with_additional_property;
+
+  SearchResultProto search_result_proto1 =
+      icing.Search(search_spec1, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto1));
+
+  // Verify numeric (integer) search
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("indexableInteger == 123");
+  search_spec1.add_schema_type_filters("Message");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  SearchResultProto expected_search_result_google::protobuf;
+  expected_search_result_google::protobuf.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() =
+      document2_with_additional_property;
+  *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() =
+      document1;
+
+  SearchResultProto search_result_google::protobuf =
+      icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_google::protobuf));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       RecoverFromInconsistentDocumentStore) {
+  // Test the following scenario: document store is ahead of term, integer and
+  // qualified id join index. IcingSearchEngine should be able to recover all
+  // indices. Several additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Integer index directory should be unaffected.
+  //   - Qualified id join index directory should be unaffected.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index shouldn't take effect.
+  //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
+  //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
+  //     discarded.
+  //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
+  //     underlying storage sub directory (path_expr =
+  //     "*/qualified_id_join_index_dir/*") should be discarded.
+  // - Still, we need to replay and reindex documents.
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message1 =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body one")
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message2 =
+      DocumentBuilder()
+          .SetKey("namespace", "message/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body two")
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+  {
+    // Initializes folder and schema, index one document
+    TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(message1).status(), ProtoIsOk());
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  {
+    FakeClock fake_clock;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+
+    // Puts message2 into DocumentStore but doesn't index it.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/
+            icing_options.document_store_namespace_id_fingerprint(),
+            /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    std::unique_ptr<DocumentStore> document_store =
+        std::move(create_result.document_store);
+
+    ICING_EXPECT_OK(document_store->Put(message2));
+  }
+
+  // Mock filesystem to observe and check the behavior of all indices.
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+      .WillRepeatedly(DoDefault());
+  // Ensure term index directory should never be discarded.
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(EndsWith("/index_dir")))
+      .Times(0);
+  // Ensure integer index directory should never be discarded, and Clear()
+  // should never be called (i.e. storage sub directory
+  // "*/integer_index_dir/*" should never be discarded).
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+      .Times(0);
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+      .Times(0);
+  // Ensure qualified id join index directory should never be discarded, and
+  // Clear() should never be called (i.e. storage sub directory
+  // "*/qualified_id_join_index_dir/*" should never be discarded).
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                    EndsWith("/qualified_id_join_index_dir")))
+      .Times(0);
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                    HasSubstr("/qualified_id_join_index_dir/")))
+      .Times(0);
+
+  TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+  InitializeResultProto initialize_result = icing.Initialize();
+  EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+  // Index Restoration should be triggered here and document2 should be
+  // indexed.
+  EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+  EXPECT_THAT(
+      initialize_result.initialize_stats().integer_index_restoration_cause(),
+      Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+  EXPECT_THAT(initialize_result.initialize_stats()
+                  .qualified_id_join_index_restoration_cause(),
+              Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = message1;
+
+  // DocumentStore kept the additional document
+  EXPECT_THAT(icing.Get("namespace", "message/1",
+                        GetResultSpecProto::default_instance()),
+              EqualsProto(expected_get_result_proto));
+
+  *expected_get_result_proto.mutable_document() = message2;
+  EXPECT_THAT(icing.Get("namespace", "message/2",
+                        GetResultSpecProto::default_instance()),
+              EqualsProto(expected_get_result_proto));
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      message2;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      message1;
+
+  // We indexed the additional document in all indices.
+  // Verify term search
+  SearchSpecProto search_spec1;
+  search_spec1.set_query("message");
+  search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+  SearchResultProto search_result_proto1 =
+      icing.Search(search_spec1, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Verify numeric (integer) search
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("indexableInteger == 123");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  SearchResultProto search_result_google::protobuf =
+      icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Verify join search: join a query for `name:person` with a child query for
+  // `body:message` based on the child's `senderQualifiedId` field.
+  SearchSpecProto search_spec3;
+  search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec3.set_query("name:person");
+  JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("senderQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+  nested_search_spec->set_query("body:message");
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+  result_spec3.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  SearchResultProto expected_join_search_result_proto;
+  expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto =
+      expected_join_search_result_proto.mutable_results()->Add();
+  *result_proto->mutable_document() = person;
+  *result_proto->mutable_joined_results()->Add()->mutable_document() = message2;
+  *result_proto->mutable_joined_results()->Add()->mutable_document() = message1;
+
+  SearchResultProto search_result_proto3 = icing.Search(
+      search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+  EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_join_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptedDocumentStore) {
+  // Test the following scenario: some document store derived files are
+  // corrupted. IcingSearchEngine should be able to recover the document store,
+  // and since NamespaceIds were reassigned, we should rebuild qualified id join
+  // index as well. Several additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Integer index directory should be unaffected.
+  //   - Should discard the entire qualified id join index directory and start
+  //     it from scratch.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index shouldn't take effect.
+  //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
+  //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
+  //     discarded.
+  //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
+  //     underlying storage sub directory (path_expr =
+  //     "*/qualified_id_join_index_dir/*") should be discarded.
+  // - Still, we need to replay and reindex documents (for qualified id join
+  //   index).
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto personDummy =
+      DocumentBuilder()
+          .SetKey("namespace2", "personDummy")
+          .SetSchema("Person")
+          .AddStringProperty("name", "personDummy")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto person1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto person2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace2", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body one")
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace2#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+  {
+    // Initializes folder and schema, index one document
+    TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    // "namespace2" (in personDummy) will be assigned NamespaceId = 0.
+    EXPECT_THAT(icing.Put(personDummy).status(), ProtoIsOk());
+    // "namespace1" (in person1) will be assigned NamespaceId = 1.
+    EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+
+    // Now delete personDummy.
+    EXPECT_THAT(
+        icing.Delete(personDummy.namespace_(), personDummy.uri()).status(),
+        ProtoIsOk());
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  {
+    FakeClock fake_clock;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+
+    // Manually corrupt one of the derived files of DocumentStore without
+    // updating checksum in DocumentStore header.
+    std::string score_cache_filename = GetDocumentDir() + "/score_cache";
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<DocumentAssociatedScoreData>>
+            score_cache,
+        FileBackedVector<DocumentAssociatedScoreData>::Create(
+            *filesystem(), std::move(score_cache_filename),
+            MemoryMappedFile::READ_WRITE_AUTO_SYNC));
+    ICING_ASSERT_OK_AND_ASSIGN(const DocumentAssociatedScoreData* score_data,
+                               score_cache->Get(/*idx=*/0));
+    ICING_ASSERT_OK(score_cache->Set(
+        /*idx=*/0,
+        DocumentAssociatedScoreData(score_data->corpus_id(),
+                                    score_data->document_score() + 1,
+                                    score_data->creation_timestamp_ms(),
+                                    score_data->length_in_tokens())));
+    ICING_ASSERT_OK(score_cache->PersistToDisk());
+  }
+
+  // Mock filesystem to observe and check the behavior of all indices.
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+      .WillRepeatedly(DoDefault());
+  // Ensure term index directory should never be discarded.
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(EndsWith("/index_dir")))
+      .Times(0);
+  // Ensure integer index directory should never be discarded, and Clear()
+  // should never be called (i.e. storage sub directory
+  // "*/integer_index_dir/*" should never be discarded).
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+      .Times(0);
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+      .Times(0);
+  // Ensure qualified id join index directory should be discarded once, and
+  // Clear() should never be called (i.e. storage sub directory
+  // "*/qualified_id_join_index_dir/*" should never be discarded).
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                    EndsWith("/qualified_id_join_index_dir")))
+      .Times(1);
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                    HasSubstr("/qualified_id_join_index_dir/")))
+      .Times(0);
+
+  TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+  InitializeResultProto initialize_result = icing.Initialize();
+  EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+  // DocumentStore should be recovered. When reassigning NamespaceId, the order
+  // will be the document traversal order: [person1, person2, message].
+  // Therefore, "namespace1" will have id = 0 and "namespace2" will have id = 1.
+  EXPECT_THAT(
+      initialize_result.initialize_stats().document_store_recovery_cause(),
+      Eq(InitializeStatsProto::IO_ERROR));
+  // Term, integer index should be unaffected.
+  EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(
+      initialize_result.initialize_stats().integer_index_restoration_cause(),
+      Eq(InitializeStatsProto::NONE));
+  // Qualified id join index should be rebuilt.
+  EXPECT_THAT(initialize_result.initialize_stats()
+                  .qualified_id_join_index_restoration_cause(),
+              Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
+
+  // Verify join search: join a query for `name:person` with a child query for
+  // `body:message` based on the child's `senderQualifiedId` field. message2
+  // should be joined to person2 correctly.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("name:person");
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("senderQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+  nested_search_spec->set_query("body:message");
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  ResultSpecProto result_spec = ResultSpecProto::default_instance();
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  SearchResultProto expected_join_search_result_proto;
+  expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto =
+      expected_join_search_result_proto.mutable_results()->Add();
+  *result_proto->mutable_document() = person2;
+  *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+
+  *expected_join_search_result_proto.mutable_results()
+       ->Add()
+       ->mutable_document() = person1;
+
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, ScoringSpecProto::default_instance(), result_spec);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_join_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
+  // Test the following scenario: term index is corrupted (e.g. checksum doesn't
+  // match). IcingSearchEngine should be able to recover term index. Several
+  // additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Should discard the entire term index directory and start it from
+  //     scratch.
+  //   - Integer index directory should be unaffected.
+  //   - Qualified id join index directory should be unaffected.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index shouldn't take effect since we start it
+  //     from scratch.
+  //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
+  //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
+  //     discarded.
+  //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
+  //     underlying storage sub directory (path_expr =
+  //     "*/qualified_id_join_index_dir/*") should be discarded.
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body")
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("body:message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      message;
+
+  {
+    // Initializes folder and schema, index one document
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    SearchResultProto search_result_proto =
+        icing.Search(search_spec, GetDefaultScoringSpec(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  // Manually corrupt term index
+  {
+    const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb";
+    ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str()));
+    ASSERT_TRUE(fd.is_valid());
+    ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+  }
+
+  // Mock filesystem to observe and check the behavior of all indices.
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+      .WillRepeatedly(DoDefault());
+  // Ensure term index directory should be discarded once.
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(EndsWith("/index_dir")))
+      .Times(1);
+  // Ensure integer index directory should never be discarded, and Clear()
+  // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
+  // should never be discarded).
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+      .Times(0);
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+      .Times(0);
+  // Ensure qualified id join index directory should never be discarded, and
+  // Clear() should never be called (i.e. storage sub directory
+  // "*/qualified_id_join_index_dir/*" should never be discarded).
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                    EndsWith("/qualified_id_join_index_dir")))
+      .Times(0);
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                    HasSubstr("/qualified_id_join_index_dir/")))
+      .Times(0);
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+  InitializeResultProto initialize_result = icing.Initialize();
+  EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+  EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::IO_ERROR));
+  EXPECT_THAT(
+      initialize_result.initialize_stats().integer_index_restoration_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result.initialize_stats()
+                  .qualified_id_join_index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+
+  // Check that our index is ok by searching over the restored index
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) {
+  // Test the following scenario: integer index is corrupted (e.g. checksum
+  // doesn't match). IcingSearchEngine should be able to recover integer index.
+  // Several additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Should discard the entire integer index directory and start it from
+  //     scratch.
+  //   - Qualified id join index directory should be unaffected.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index shouldn't take effect.
+  //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
+  //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
+  //     discarded, since we start it from scratch.
+  //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
+  //     underlying storage sub directory (path_expr =
+  //     "*/qualified_id_join_index_dir/*") should be discarded.
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body")
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("indexableInteger == 123");
+  search_spec.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      message;
+
+  {
+    // Initializes folder and schema, index one document
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    SearchResultProto search_result_proto =
+        icing.Search(search_spec, GetDefaultScoringSpec(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  // Manually corrupt integer index
+  {
+    const std::string integer_index_metadata_file =
+        GetIntegerIndexDir() + "/integer_index.m";
+    ScopedFd fd(
+        filesystem()->OpenForWrite(integer_index_metadata_file.c_str()));
+    ASSERT_TRUE(fd.is_valid());
+    ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+  }
+
+  // Mock filesystem to observe and check the behavior of all indices.
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+      .WillRepeatedly(DoDefault());
+  // Ensure term index directory should never be discarded.
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(EndsWith("/index_dir")))
+      .Times(0);
+  // Ensure integer index directory should be discarded once, and Clear()
+  // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
+  // should never be discarded) since we start it from scratch.
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+      .Times(1);
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+      .Times(0);
+  // Ensure qualified id join index directory should never be discarded, and
+  // Clear() should never be called (i.e. storage sub directory
+  // "*/qualified_id_join_index_dir/*" should never be discarded).
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                    EndsWith("/qualified_id_join_index_dir")))
+      .Times(0);
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                    HasSubstr("/qualified_id_join_index_dir/")))
+      .Times(0);
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+  InitializeResultProto initialize_result = icing.Initialize();
+  EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+  EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(
+      initialize_result.initialize_stats().integer_index_restoration_cause(),
+      Eq(InitializeStatsProto::IO_ERROR));
+  EXPECT_THAT(initialize_result.initialize_stats()
+                  .qualified_id_join_index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+
+  // Check that our index is ok by searching over the restored index
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       RecoverFromIntegerIndexBucketSplitThresholdChange) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("indexableInteger")
+                  .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddInt64Property("indexableInteger", 123)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // 1. Create an index with a message document.
+  {
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+  }
+
+  // 2. Create the index again with different
+  //    integer_index_bucket_split_threshold. This should trigger index
+  //    restoration.
+  {
+    // Mock filesystem to observe and check the behavior of all indices.
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+        .WillRepeatedly(DoDefault());
+    // Ensure term index directory should never be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/index_dir")))
+        .Times(0);
+    // Ensure integer index directory should be discarded once, and Clear()
+    // should never be called (i.e. storage sub directory
+    // "*/integer_index_dir/*" should never be discarded) since we start it from
+    // scratch.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+        .Times(1);
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+        .Times(0);
+    // Ensure qualified id join index directory should never be discarded, and
+    // Clear() should never be called (i.e. storage sub directory
+    // "*/qualified_id_join_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                      EndsWith("/qualified_id_join_index_dir")))
+        .Times(0);
+    EXPECT_CALL(
+        *mock_filesystem,
+        DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+        .Times(0);
+
+    static constexpr int32_t kNewIntegerIndexBucketSplitThreshold = 1000;
+    IcingSearchEngineOptions options = GetDefaultIcingOptions();
+    ASSERT_THAT(kNewIntegerIndexBucketSplitThreshold,
+                Ne(options.integer_index_bucket_split_threshold()));
+    options.set_integer_index_bucket_split_threshold(
+        kNewIntegerIndexBucketSplitThreshold);
+
+    TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+                                std::make_unique<IcingFilesystem>(),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+    InitializeResultProto initialize_result = icing.Initialize();
+    ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(
+        initialize_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::IO_ERROR));
+    EXPECT_THAT(initialize_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+
+    // Verify integer index works normally
+    SearchSpecProto search_spec;
+    search_spec.set_query("indexableInteger == 123");
+    search_spec.set_search_type(
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+    search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+    SearchResultProto results =
+        icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    ASSERT_THAT(results.results(), SizeIs(1));
+    EXPECT_THAT(results.results(0).document().uri(), Eq("message/1"));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       RecoverFromCorruptQualifiedIdJoinIndex) {
+  // Test the following scenario: qualified id join index is corrupted (e.g.
+  // checksum doesn't match). IcingSearchEngine should be able to recover
+  // qualified id join index. Several additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Integer index directory should be unaffected.
+  //   - Should discard the entire qualified id join index directory and start
+  //     it from scratch.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index shouldn't take effect.
+  //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
+  //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
+  //     discarded.
+  //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
+  //     underlying storage sub directory (path_expr =
+  //     "*/qualified_id_join_index_dir/*") should be discarded, since we start
+  //     it from scratch.
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body")
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // Prepare join search spec to join a query for `name:person` with a child
+  // query for `body:message` based on the child's `senderQualifiedId` field.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("name:person");
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("senderQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+  nested_search_spec->set_query("body:message");
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  ResultSpecProto result_spec = ResultSpecProto::default_instance();
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto =
+      expected_search_result_proto.mutable_results()->Add();
+  *result_proto->mutable_document() = person;
+  *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+
+  {
+    // Initializes folder and schema, index one document
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    SearchResultProto search_result_proto =
+        icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  // Manually corrupt qualified id join index
+  {
+    const std::string qualified_id_join_index_metadata_file =
+        GetQualifiedIdJoinIndexDir() + "/metadata";
+    ScopedFd fd(filesystem()->OpenForWrite(
+        qualified_id_join_index_metadata_file.c_str()));
+    ASSERT_TRUE(fd.is_valid());
+    ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+  }
+
+  // Mock filesystem to observe and check the behavior of all indices.
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+      .WillRepeatedly(DoDefault());
+  // Ensure term index directory should never be discarded.
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(EndsWith("/index_dir")))
+      .Times(0);
+  // Ensure integer index directory should never be discarded, and Clear()
+  // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
+  // should never be discarded).
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+      .Times(0);
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+      .Times(0);
+  // Ensure qualified id join index directory should be discarded once, and
+  // Clear() should never be called (i.e. storage sub directory
+  // "*/qualified_id_join_index_dir/*" should never be discarded).
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                    EndsWith("/qualified_id_join_index_dir")))
+      .Times(1);
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                    HasSubstr("/qualified_id_join_index_dir/")))
+      .Times(0);
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+  InitializeResultProto initialize_result = icing.Initialize();
+  EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+  EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(
+      initialize_result.initialize_stats().integer_index_restoration_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result.initialize_stats()
+                  .qualified_id_join_index_restoration_cause(),
+              Eq(InitializeStatsProto::IO_ERROR));
+
+  // Check that our index is ok by searching over the restored index
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
+  // Test the following scenario: losing the entire term index. Since we need
+  // flash index magic to determine the version, in this test we will throw out
+  // the entire term index and re-initialize an empty one, to bypass
+  // undetermined version state change and correctly trigger "lose term index"
+  // scenario.
+  // IcingSearchEngine should be able to recover term index. Several additional
+  // behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should not be discarded (but instead just being
+  //     rebuilt by replaying all docs).
+  //   - Integer index directory should be unaffected.
+  //   - Qualified id join index directory should be unaffected.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index shouldn't take effect since it is empty.
+  //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
+  //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
+  //     discarded.
+  //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
+  //     underlying storage sub directory (path_expr =
+  //     "*/qualified_id_join_index_dir/*") should be discarded.
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", kIpsumText)
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // 1. Create an index with 3 message documents.
+  {
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/2").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/3").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+  }
+
+  // 2. Delete and re-initialize an empty term index to trigger
+  // RestoreIndexIfNeeded.
+  {
+    std::string idx_subdir = GetIndexDir() + "/idx";
+    ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<Index> index,
+        Index::Create(Index::Options(GetIndexDir(),
+                                     /*index_merge_size=*/100,
+                                     /*lite_index_sort_at_indexing=*/true,
+                                     /*lite_index_sort_size=*/50),
+                      filesystem(), icing_filesystem()));
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  // 3. Create the index again. This should trigger index restoration.
+  {
+    // Mock filesystem to observe and check the behavior of all indices.
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+        .WillRepeatedly(DoDefault());
+    // Ensure term index directory should never be discarded since we've already
+    // lost it.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/index_dir")))
+        .Times(0);
+    // Ensure integer index directory should never be discarded, and Clear()
+    // should never be called (i.e. storage sub directory
+    // "*/integer_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+        .Times(0);
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+        .Times(0);
+    // Ensure qualified id join index directory should never be discarded, and
+    // Clear() should never be called (i.e. storage sub directory
+    // "*/qualified_id_join_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                      EndsWith("/qualified_id_join_index_dir")))
+        .Times(0);
+    EXPECT_CALL(
+        *mock_filesystem,
+        DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+        .Times(0);
+
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::move(mock_filesystem),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+    InitializeResultProto initialize_result = icing.Initialize();
+    ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+    EXPECT_THAT(
+        initialize_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+
+    // Verify term index works normally
+    SearchSpecProto search_spec1;
+    search_spec1.set_query("body:consectetur");
+    search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+    SearchResultProto results1 =
+        icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(results1.status(), ProtoIsOk());
+    EXPECT_THAT(results1.next_page_token(), Eq(0));
+    // All documents should be retrievable.
+    ASSERT_THAT(results1.results(), SizeIs(3));
+    EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+    // Verify integer index works normally
+    SearchSpecProto search_spec2;
+    search_spec2.set_query("indexableInteger == 123");
+    search_spec2.set_search_type(
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+    search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+    SearchResultProto results2 =
+        icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    ASSERT_THAT(results2.results(), SizeIs(3));
+    EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+    // Verify qualified id join index works normally: join a query for
+    // `name:person` with a child query for `body:consectetur` based on the
+    // child's `senderQualifiedId` field.
+    SearchSpecProto search_spec3;
+    search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+    search_spec3.set_query("name:person");
+    JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+    join_spec->set_parent_property_expression(
+        std::string(JoinProcessor::kQualifiedIdExpr));
+    join_spec->set_child_property_expression("senderQualifiedId");
+    join_spec->set_aggregation_scoring_strategy(
+        JoinSpecProto::AggregationScoringStrategy::COUNT);
+    JoinSpecProto::NestedSpecProto* nested_spec =
+        join_spec->mutable_nested_spec();
+    SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+    nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+    nested_search_spec->set_query("body:consectetur");
+    *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+    *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+    ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+    result_spec3.set_max_joined_children_per_parent_to_return(
+        std::numeric_limits<int32_t>::max());
+
+    SearchResultProto results3 = icing.Search(
+        search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+    ASSERT_THAT(results3.results(), SizeIs(1));
+    EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+    EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+    EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+                Eq("message/3"));
+    EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+                Eq("message/2"));
+    EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+                Eq("message/1"));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
+  // Test the following scenario: losing the entire integer index directory.
+  // IcingSearchEngine should be able to recover integer index. Several
+  // additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Integer index directory should not be discarded since we've already
+  //     lost it. Start it from scratch.
+  //   - Qualified id join index directory should be unaffected.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index shouldn't take effect.
+  //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
+  //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
+  //     discarded, since we start it from scratch.
+  //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
+  //     underlying storage sub directory (path_expr =
+  //     "*/qualified_id_join_index_dir/*") should be discarded.
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", kIpsumText)
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // 1. Create an index with 3 message documents.
+  {
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/2").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/3").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+  }
+
+  // 2. Delete the integer index file to trigger RestoreIndexIfNeeded.
+  std::string integer_index_dir = GetIntegerIndexDir();
+  filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str());
+
+  // 3. Create the index again. This should trigger index restoration.
+  {
+    // Mock filesystem to observe and check the behavior of all indices.
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+        .WillRepeatedly(DoDefault());
+    // Ensure term index directory should never be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/index_dir")))
+        .Times(0);
+    // Ensure integer index directory should never be discarded since we've
+    // already lost it, and Clear() should never be called (i.e. storage sub
+    // directory "*/integer_index_dir/*" should never be discarded) since we
+    // start it from scratch.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+        .Times(0);
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+        .Times(0);
+    // Ensure qualified id join index directory should never be discarded, and
+    // Clear() should never be called (i.e. storage sub directory
+    // "*/qualified_id_join_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                      EndsWith("/qualified_id_join_index_dir")))
+        .Times(0);
+    EXPECT_CALL(
+        *mock_filesystem,
+        DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+        .Times(0);
+
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::move(mock_filesystem),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+    InitializeResultProto initialize_result = icing.Initialize();
+    ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(
+        initialize_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+    EXPECT_THAT(initialize_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+
+    // Verify term index works normally
+    SearchSpecProto search_spec1;
+    search_spec1.set_query("body:consectetur");
+    search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+    SearchResultProto results1 =
+        icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(results1.status(), ProtoIsOk());
+    EXPECT_THAT(results1.next_page_token(), Eq(0));
+    // All documents should be retrievable.
+    ASSERT_THAT(results1.results(), SizeIs(3));
+    EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+    // Verify integer index works normally
+    SearchSpecProto search_spec2;
+    search_spec2.set_query("indexableInteger == 123");
+    search_spec2.set_search_type(
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+    search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+    SearchResultProto results2 =
+        icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    ASSERT_THAT(results2.results(), SizeIs(3));
+    EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+    // Verify qualified id join index works normally: join a query for
+    // `name:person` with a child query for `body:consectetur` based on the
+    // child's `senderQualifiedId` field.
+    SearchSpecProto search_spec3;
+    search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+    search_spec3.set_query("name:person");
+    JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+    join_spec->set_parent_property_expression(
+        std::string(JoinProcessor::kQualifiedIdExpr));
+    join_spec->set_child_property_expression("senderQualifiedId");
+    join_spec->set_aggregation_scoring_strategy(
+        JoinSpecProto::AggregationScoringStrategy::COUNT);
+    JoinSpecProto::NestedSpecProto* nested_spec =
+        join_spec->mutable_nested_spec();
+    SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+    nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+    nested_search_spec->set_query("body:consectetur");
+    *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+    *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+    ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+    result_spec3.set_max_joined_children_per_parent_to_return(
+        std::numeric_limits<int32_t>::max());
+
+    SearchResultProto results3 = icing.Search(
+        search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+    ASSERT_THAT(results3.results(), SizeIs(1));
+    EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+    EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+    EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+                Eq("message/3"));
+    EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+                Eq("message/2"));
+    EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+                Eq("message/1"));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       RestoreIndexLoseQualifiedIdJoinIndex) {
+  // Test the following scenario: losing the entire qualified id join index
+  // directory. IcingSearchEngine should be able to recover qualified id join
+  // index. Several additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Integer index directory should be unaffected.
+  //   - Qualified id join index directory should not be discarded since we've
+  //     already lost it. Start it from scratch.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index shouldn't take effect.
+  //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
+  //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
+  //     discarded.
+  //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
+  //     underlying storage sub directory (path_expr =
+  //     "*/qualified_id_join_index_dir/*") should be discarded, since we start
+  //     it from scratch.
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", kIpsumText)
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // 1. Create an index with 3 message documents.
+  {
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/2").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/3").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+  }
+
+  // 2. Delete the qualified id join index file to trigger RestoreIndexIfNeeded.
+  std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir();
+  filesystem()->DeleteDirectoryRecursively(qualified_id_join_index_dir.c_str());
+
+  // 3. Create the index again. This should trigger index restoration.
+  {
+    // Mock filesystem to observe and check the behavior of all indices.
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+        .WillRepeatedly(DoDefault());
+    // Ensure term index directory should never be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/index_dir")))
+        .Times(0);
+    // Ensure integer index directory should never be discarded since we've
+    // already lost it, and Clear() should never be called (i.e. storage sub
+    // directory "*/integer_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+        .Times(0);
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+        .Times(0);
+    // Ensure qualified id join index directory should never be discarded, and
+    // Clear() should never be called (i.e. storage sub directory
+    // "*/qualified_id_join_index_dir/*" should never be discarded)
+    // since we start it from scratch.
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                      EndsWith("/qualified_id_join_index_dir")))
+        .Times(0);
+    EXPECT_CALL(
+        *mock_filesystem,
+        DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+        .Times(0);
+
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::move(mock_filesystem),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+    InitializeResultProto initialize_result = icing.Initialize();
+    ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(
+        initialize_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+
+    // Verify term index works normally
+    SearchSpecProto search_spec1;
+    search_spec1.set_query("body:consectetur");
+    search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+    SearchResultProto results1 =
+        icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(results1.status(), ProtoIsOk());
+    EXPECT_THAT(results1.next_page_token(), Eq(0));
+    // All documents should be retrievable.
+    ASSERT_THAT(results1.results(), SizeIs(3));
+    EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+    // Verify integer index works normally
+    SearchSpecProto search_spec2;
+    search_spec2.set_query("indexableInteger == 123");
+    search_spec2.set_search_type(
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+    search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+    SearchResultProto results2 =
+        icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    ASSERT_THAT(results2.results(), SizeIs(3));
+    EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+    // Verify qualified id join index works normally: join a query for
+    // `name:person` with a child query for `body:consectetur` based on the
+    // child's `senderQualifiedId` field.
+    SearchSpecProto search_spec3;
+    search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+    search_spec3.set_query("name:person");
+    JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+    join_spec->set_parent_property_expression(
+        std::string(JoinProcessor::kQualifiedIdExpr));
+    join_spec->set_child_property_expression("senderQualifiedId");
+    join_spec->set_aggregation_scoring_strategy(
+        JoinSpecProto::AggregationScoringStrategy::COUNT);
+    JoinSpecProto::NestedSpecProto* nested_spec =
+        join_spec->mutable_nested_spec();
+    SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+    nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+    nested_search_spec->set_query("body:consectetur");
+    *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+    *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+    ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+    result_spec3.set_max_joined_children_per_parent_to_return(
+        std::numeric_limits<int32_t>::max());
+
+    SearchResultProto results3 = icing.Search(
+        search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+    ASSERT_THAT(results3.results(), SizeIs(1));
+    EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+    EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+    EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+                Eq("message/3"));
+    EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+                Eq("message/2"));
+    EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+                Eq("message/1"));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       RestoreIndexTruncateLiteIndexWithoutReindexing) {
+  // Test the following scenario: term lite index is *completely* ahead of
+  // document store. IcingSearchEngine should be able to recover term index.
+  // Several additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Integer index directory should be unaffected.
+  //   - Qualified id join index directory should be unaffected.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index should take effect and throw out the
+  //     entire lite index. This should be sufficient to make term index
+  //     consistent with document store, so reindexing should not take place.
+  //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
+  //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
+  //     discarded.
+  //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
+  //     underlying storage sub directory (path_expr =
+  //     "*/qualified_id_join_index_dir/*") should be discarded.
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", kIpsumText)
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // 1. Create an index with a LiteIndex that will only allow a person and a
+  //    message document before needing a merge.
+  {
+    IcingSearchEngineOptions options = GetDefaultIcingOptions();
+    options.set_index_merge_size(person.ByteSizeLong() +
+                                 message.ByteSizeLong());
+    TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    // Add two message documents. These should get merged into the main index.
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/2").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+  }
+
+  // 2. Manually add some data into term lite index and increment
+  // last_added_document_id, but don't merge into the main index. This will
+  // cause mismatched last_added_document_id with term index.
+  //   - Document store: [0, 1, 2]
+  //   - Term index
+  //     - Main index: [0, 1, 2]
+  //     - Lite index: [3]
+  //   - Integer index: [0, 1, 2]
+  //   - Qualified id join index: [0, 1, 2]
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<Index> index,
+        Index::Create(
+            Index::Options(GetIndexDir(),
+                           /*index_merge_size=*/message.ByteSizeLong(),
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/8),
+            filesystem(), icing_filesystem()));
+    DocumentId original_last_added_doc_id = index->last_added_document_id();
+    index->set_last_added_document_id(original_last_added_doc_id + 1);
+    Index::Editor editor =
+        index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
+                    TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+    ICING_ASSERT_OK(editor.BufferTerm("foo"));
+    ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+  }
+
+  // 3. Create the index again.
+  {
+    // Mock filesystem to observe and check the behavior of all indices.
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+        .WillRepeatedly(DoDefault());
+    // Ensure term index directory should never be discarded. since we only call
+    // TruncateTo for term index.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/index_dir")))
+        .Times(0);
+    // Ensure integer index directory should never be discarded, and Clear()
+    // should never be called (i.e. storage sub directory
+    // "*/integer_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+        .Times(0);
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+        .Times(0);
+    // Ensure qualified id join index directory should never be discarded, and
+    // Clear() should never be called (i.e. storage sub directory
+    // "*/qualified_id_join_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                      EndsWith("/qualified_id_join_index_dir")))
+        .Times(0);
+    EXPECT_CALL(
+        *mock_filesystem,
+        DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+        .Times(0);
+
+    IcingSearchEngineOptions options = GetDefaultIcingOptions();
+    options.set_index_merge_size(message.ByteSizeLong());
+    TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+                                std::make_unique<IcingFilesystem>(),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+    InitializeResultProto initialize_result = icing.Initialize();
+    ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+    // Since truncating lite index is sufficient to make term index consistent
+    // with document store, replaying documents or reindex shouldn't take place.
+    EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(
+        initialize_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+
+    // Verify term index works normally
+    SearchSpecProto search_spec1;
+    search_spec1.set_query("body:consectetur");
+    search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+    SearchResultProto results1 =
+        icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(results1.status(), ProtoIsOk());
+    EXPECT_THAT(results1.next_page_token(), Eq(0));
+    // Only the documents that were in the main index should be retrievable.
+    ASSERT_THAT(results1.results(), SizeIs(2));
+    EXPECT_THAT(results1.results(0).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results1.results(1).document().uri(), Eq("message/1"));
+
+    // Verify integer index works normally
+    SearchSpecProto search_spec2;
+    search_spec2.set_query("indexableInteger == 123");
+    search_spec2.set_search_type(
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+    search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+    SearchResultProto results2 =
+        icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    ASSERT_THAT(results2.results(), SizeIs(2));
+    EXPECT_THAT(results2.results(0).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results2.results(1).document().uri(), Eq("message/1"));
+
+    // Verify qualified id join index works normally: join a query for
+    // `name:person` with a child query for `body:consectetur` based on the
+    // child's `senderQualifiedId` field.
+    SearchSpecProto search_spec3;
+    search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+    search_spec3.set_query("name:person");
+    JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+    join_spec->set_parent_property_expression(
+        std::string(JoinProcessor::kQualifiedIdExpr));
+    join_spec->set_child_property_expression("senderQualifiedId");
+    join_spec->set_aggregation_scoring_strategy(
+        JoinSpecProto::AggregationScoringStrategy::COUNT);
+    JoinSpecProto::NestedSpecProto* nested_spec =
+        join_spec->mutable_nested_spec();
+    SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+    nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+    nested_search_spec->set_query("body:consectetur");
+    *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+    *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+    ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+    result_spec3.set_max_joined_children_per_parent_to_return(
+        std::numeric_limits<int32_t>::max());
+
+    SearchResultProto results3 = icing.Search(
+        search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+    ASSERT_THAT(results3.results(), SizeIs(1));
+    EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+    EXPECT_THAT(results3.results(0).joined_results(), SizeIs(2));
+    EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+                Eq("message/2"));
+    EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+                Eq("message/1"));
+  }
+
+  // 4. Since document 3 doesn't exist, testing query = "foo" is not enough to
+  // verify the correctness of term index restoration. Instead, we have to check
+  // hits for "foo" should not be found in term index.
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<Index> index,
+        Index::Create(
+            Index::Options(GetIndexDir(),
+                           /*index_merge_size=*/message.ByteSizeLong(),
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/8),
+            filesystem(), icing_filesystem()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+        index->GetIterator("foo", /*term_start_index=*/0,
+                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                           TermMatchType::EXACT_ONLY));
+    EXPECT_THAT(doc_hit_info_iter->Advance(),
+                StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       RestoreIndexTruncateLiteIndexWithReindexing) {
+  // Test the following scenario: term lite index is *partially* ahead of
+  // document store. IcingSearchEngine should be able to recover term index.
+  // Several additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Integer index directory should be unaffected.
+  //   - Qualified id join index directory should be unaffected.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index should take effect and throw out the
+  //     entire lite index. However, some valid data in term lite index were
+  //     discarded together, so reindexing should still take place to recover
+  //     them after truncating.
+  //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
+  //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
+  //     discarded.
+  //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
+  //     underlying storage sub directory (path_expr =
+  //     "*/qualified_id_join_index_dir/*") should be discarded.
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", kIpsumText)
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // 1. Create an index with a LiteIndex that will only allow a person and a
+  //    message document before needing a merge.
+  {
+    IcingSearchEngineOptions options = GetDefaultIcingOptions();
+    options.set_index_merge_size(message.ByteSizeLong());
+    TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    // Add two message documents. These should get merged into the main index.
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/2").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    // Add one document. This one should get remain in the lite index.
+    message = DocumentBuilder(message).SetUri("message/3").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+  }
+
+  // 2. Manually add some data into term lite index and increment
+  //    last_added_document_id, but don't merge into the main index. This will
+  //    cause mismatched last_added_document_id with term index.
+  //   - Document store: [0, 1, 2, 3]
+  //   - Term index
+  //     - Main index: [0, 1, 2]
+  //     - Lite index: [3, 4]
+  //   - Integer index: [0, 1, 2, 3]
+  //   - Qualified id join index: [0, 1, 2, 3]
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<Index> index,
+        Index::Create(
+            Index::Options(GetIndexDir(),
+                           /*index_merge_size=*/message.ByteSizeLong(),
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/8),
+            filesystem(), icing_filesystem()));
+    DocumentId original_last_added_doc_id = index->last_added_document_id();
+    index->set_last_added_document_id(original_last_added_doc_id + 1);
+    Index::Editor editor =
+        index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
+                    TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+    ICING_ASSERT_OK(editor.BufferTerm("foo"));
+    ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+  }
+
+  // 3. Create the index again.
+  {
+    // Mock filesystem to observe and check the behavior of all indices.
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+        .WillRepeatedly(DoDefault());
+    // Ensure term index directory should never be discarded. since we only call
+    // TruncateTo for term index.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/index_dir")))
+        .Times(0);
+    // Ensure integer index directory should never be discarded, and Clear()
+    // should never be called (i.e. storage sub directory
+    // "*/integer_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+        .Times(0);
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+        .Times(0);
+    // Ensure qualified id join index directory should never be discarded, and
+    // Clear() should never be called (i.e. storage sub directory
+    // "*/qualified_id_join_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                      EndsWith("/qualified_id_join_index_dir")))
+        .Times(0);
+    EXPECT_CALL(
+        *mock_filesystem,
+        DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+        .Times(0);
+
+    IcingSearchEngineOptions options = GetDefaultIcingOptions();
+    options.set_index_merge_size(message.ByteSizeLong());
+    TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+                                std::make_unique<IcingFilesystem>(),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+    InitializeResultProto initialize_result = icing.Initialize();
+    ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+    // Truncating lite index not only deletes data ahead document store, but
+    // also deletes valid data. Therefore, we still have to replay documents and
+    // reindex.
+    EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+    EXPECT_THAT(
+        initialize_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+
+    // Verify term index works normally
+    SearchSpecProto search_spec1;
+    search_spec1.set_query("body:consectetur");
+    search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+    SearchResultProto results1 =
+        icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(results1.status(), ProtoIsOk());
+    EXPECT_THAT(results1.next_page_token(), Eq(0));
+    // Only the documents that were in the main index should be retrievable.
+    ASSERT_THAT(results1.results(), SizeIs(3));
+    EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+    // Verify integer index works normally
+    SearchSpecProto search_spec2;
+    search_spec2.set_query("indexableInteger == 123");
+    search_spec2.set_search_type(
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+    search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+    SearchResultProto results2 =
+        icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    ASSERT_THAT(results2.results(), SizeIs(3));
+    EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+    // Verify qualified id join index works normally: join a query for
+    // `name:person` with a child query for `body:consectetur` based on the
+    // child's `senderQualifiedId` field.
+    SearchSpecProto search_spec3;
+    search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+    search_spec3.set_query("name:person");
+    JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+    join_spec->set_parent_property_expression(
+        std::string(JoinProcessor::kQualifiedIdExpr));
+    join_spec->set_child_property_expression("senderQualifiedId");
+    join_spec->set_aggregation_scoring_strategy(
+        JoinSpecProto::AggregationScoringStrategy::COUNT);
+    JoinSpecProto::NestedSpecProto* nested_spec =
+        join_spec->mutable_nested_spec();
+    SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+    nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+    nested_search_spec->set_query("body:consectetur");
+    *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+    *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+    ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+    result_spec3.set_max_joined_children_per_parent_to_return(
+        std::numeric_limits<int32_t>::max());
+
+    SearchResultProto results3 = icing.Search(
+        search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+    ASSERT_THAT(results3.results(), SizeIs(1));
+    EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+    EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+    EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+                Eq("message/3"));
+    EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+                Eq("message/2"));
+    EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+                Eq("message/1"));
+  }
+
+  // 4. Since document 4 doesn't exist, testing query = "foo" is not enough to
+  // verify the correctness of term index restoration. Instead, we have to check
+  // hits for "foo" should not be found in term index.
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<Index> index,
+        Index::Create(
+            Index::Options(GetIndexDir(),
+                           /*index_merge_size=*/message.ByteSizeLong(),
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/8),
+            filesystem(), icing_filesystem()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+        index->GetIterator("foo", /*term_start_index=*/0,
+                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                           TermMatchType::EXACT_ONLY));
+    EXPECT_THAT(doc_hit_info_iter->Advance(),
+                StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       RestoreIndexTruncateMainIndexWithoutReindexing) {
+  // Test the following scenario: term main index is *completely* ahead of
+  // document store. IcingSearchEngine should be able to recover term index.
+  // Several additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Integer index directory should be unaffected.
+  //   - Qualified id join index directory should be unaffected.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index should take effect and throw out the
+  //     entire lite and main index. This should be sufficient to make term
+  //     index consistent with document store (in this case, document store is
+  //     empty as well), so reindexing should not take place.
+  //   - "Clear()" should be called for integer index. It is a special case when
+  //     document store has no document. Since there is no integer index storage
+  //     sub directories (path_expr = "*/integer_index_dir/*"), nothing will be
+  //     discarded.
+  //   - "Clear()" should be called for qualified id join index. It is a special
+  //     case when document store has no document.
+
+  // 1. Create an index with no document.
+  {
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  }
+
+  // 2. Manually add some data into term lite index and increment
+  //    last_added_document_id. Merge some of them into the main index and keep
+  //    others in the lite index. This will cause mismatched document id with
+  //    document store.
+  //   - Document store: []
+  //   - Term index
+  //     - Main index: [0]
+  //     - Lite index: [1]
+  //   - Integer index: []
+  //   - Qualified id join index: []
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<Index> index,
+        Index::Create(
+            // index merge size is not important here because we will manually
+            // invoke merge below.
+            Index::Options(GetIndexDir(), /*index_merge_size=*/100,
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/50),
+            filesystem(), icing_filesystem()));
+    // Add hits for document 0 and merge.
+    ASSERT_THAT(index->last_added_document_id(), kInvalidDocumentId);
+    index->set_last_added_document_id(0);
+    Index::Editor editor =
+        index->Edit(/*document_id=*/0, /*section_id=*/0,
+                    TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+    ICING_ASSERT_OK(editor.BufferTerm("foo"));
+    ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+    ICING_ASSERT_OK(index->Merge());
+
+    // Add hits for document 1 and don't merge.
+    index->set_last_added_document_id(1);
+    editor = index->Edit(/*document_id=*/1, /*section_id=*/0,
+                         TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+    ICING_ASSERT_OK(editor.BufferTerm("bar"));
+    ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+  }
+
+  // 3. Create the index again. This should throw out the lite and main index.
+  {
+    // Mock filesystem to observe and check the behavior of all indices.
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+        .WillRepeatedly(DoDefault());
+    // Ensure term index directory should never be discarded. since we only call
+    // TruncateTo for term index.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/index_dir")))
+        .Times(0);
+    // Ensure integer index directory should never be discarded. Even though
+    // Clear() was called, it shouldn't take effect since there is no storage
+    // sub directory ("*/integer_index_dir/*") and nothing will be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+        .Times(0);
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+        .Times(0);
+    // Ensure qualified id join index directory should never be discarded.
+    // Clear() was called and should discard and reinitialize the underlying
+    // mapper.
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                      EndsWith("/qualified_id_join_index_dir")))
+        .Times(0);
+    EXPECT_CALL(
+        *mock_filesystem,
+        DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+        .Times(AtLeast(1));
+
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::move(mock_filesystem),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+    InitializeResultProto initialize_result = icing.Initialize();
+    ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+    // Since truncating main index is sufficient to make term index consistent
+    // with document store, replaying documents or reindexing shouldn't take
+    // place.
+    EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(
+        initialize_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+  }
+
+  // 4. Since document 0, 1 don't exist, testing queries = "foo", "bar" are not
+  // enough to verify the correctness of term index restoration. Instead, we
+  // have to check hits for "foo", "bar" should not be found in term index.
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<Index> index,
+        Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100,
+                                     /*lite_index_sort_at_indexing=*/true,
+                                     /*lite_index_sort_size=*/50),
+                      filesystem(), icing_filesystem()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+        index->GetIterator("foo", /*term_start_index=*/0,
+                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                           TermMatchType::EXACT_ONLY));
+    EXPECT_THAT(doc_hit_info_iter->Advance(),
+                StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        doc_hit_info_iter,
+        index->GetIterator("bar", /*term_start_index=*/0,
+                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                           TermMatchType::EXACT_ONLY));
+    EXPECT_THAT(doc_hit_info_iter->Advance(),
+                StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       RestoreIndexTruncateMainIndexWithReindexing) {
+  // Test the following scenario: term main index is *partially* ahead of
+  // document store. IcingSearchEngine should be able to recover term index.
+  // Several additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Integer index directory should be unaffected.
+  //   - Qualified id join index directory should be unaffected.
+  // - In RestoreIndexIfNecessary():
+  //   - "TruncateTo()" for term index should take effect and throw out the
+  //     entire lite and main index. However, some valid data in term main index
+  //     were discarded together, so reindexing should still take place to
+  //     recover them after truncating.
+  //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
+  //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
+  //     discarded.
+  //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
+  //     underlying storage sub directory (path_expr =
+  //     "*/qualified_id_join_index_dir/*") should be discarded.
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", kIpsumText)
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // 1. Create an index with 3 message documents.
+  {
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/2").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/3").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+  }
+
+  // 2. Manually add some data into term lite index and increment
+  //    last_added_document_id. Merge some of them into the main index and keep
+  //    others in the lite index. This will cause mismatched document id with
+  //    document store.
+  //   - Document store: [0, 1, 2, 3]
+  //   - Term index
+  //     - Main index: [0, 1, 2, 3, 4]
+  //     - Lite index: [5]
+  //   - Integer index: [0, 1, 2, 3]
+  //   - Qualified id join index: [0, 1, 2, 3]
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<Index> index,
+        Index::Create(
+            Index::Options(GetIndexDir(),
+                           /*index_merge_size=*/message.ByteSizeLong(),
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/8),
+            filesystem(), icing_filesystem()));
+    // Add hits for document 4 and merge.
+    DocumentId original_last_added_doc_id = index->last_added_document_id();
+    index->set_last_added_document_id(original_last_added_doc_id + 1);
+    Index::Editor editor =
+        index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
+                    TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+    ICING_ASSERT_OK(editor.BufferTerm("foo"));
+    ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+    ICING_ASSERT_OK(index->Merge());
+
+    // Add hits for document 5 and don't merge.
+    index->set_last_added_document_id(original_last_added_doc_id + 2);
+    editor = index->Edit(original_last_added_doc_id + 2, /*section_id=*/0,
+                         TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+    ICING_ASSERT_OK(editor.BufferTerm("bar"));
+    ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+  }
+
+  // 3. Create the index again. This should throw out the lite and main index
+  // and trigger index restoration.
+  {
+    // Mock filesystem to observe and check the behavior of all indices.
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+        .WillRepeatedly(DoDefault());
+    // Ensure term index directory should never be discarded. since we only call
+    // TruncateTo for term index.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/index_dir")))
+        .Times(0);
+    // Ensure integer index directory should never be discarded, and Clear()
+    // should never be called (i.e. storage sub directory
+    // "*/integer_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+        .Times(0);
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+        .Times(0);
+    // Ensure qualified id join index directory should never be discarded, and
+    // Clear() should never be called (i.e. storage sub directory
+    // "*/qualified_id_join_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                      EndsWith("/qualified_id_join_index_dir")))
+        .Times(0);
+    EXPECT_CALL(
+        *mock_filesystem,
+        DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+        .Times(0);
+
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::move(mock_filesystem),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+    InitializeResultProto initialize_result = icing.Initialize();
+    ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+    // Truncating main index not only deletes data ahead document store, but
+    // also deletes valid data. Therefore, we still have to replay documents and
+    // reindex.
+    EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+    EXPECT_THAT(
+        initialize_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+
+    // Verify term index works normally
+    SearchSpecProto search_spec1;
+    search_spec1.set_query("body:consectetur");
+    search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+    SearchResultProto results1 =
+        icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(results1.status(), ProtoIsOk());
+    EXPECT_THAT(results1.next_page_token(), Eq(0));
+    // Only the first document should be retrievable.
+    ASSERT_THAT(results1.results(), SizeIs(3));
+    EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+    // Verify integer index works normally
+    SearchSpecProto search_spec2;
+    search_spec2.set_query("indexableInteger == 123");
+    search_spec2.set_search_type(
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+    search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+    SearchResultProto results2 =
+        icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    ASSERT_THAT(results2.results(), SizeIs(3));
+    EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+    // Verify qualified id join index works normally: join a query for
+    // `name:person` with a child query for `body:consectetur` based on the
+    // child's `senderQualifiedId` field.
+    SearchSpecProto search_spec3;
+    search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+    search_spec3.set_query("name:person");
+    JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+    join_spec->set_parent_property_expression(
+        std::string(JoinProcessor::kQualifiedIdExpr));
+    join_spec->set_child_property_expression("senderQualifiedId");
+    join_spec->set_aggregation_scoring_strategy(
+        JoinSpecProto::AggregationScoringStrategy::COUNT);
+    JoinSpecProto::NestedSpecProto* nested_spec =
+        join_spec->mutable_nested_spec();
+    SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+    nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+    nested_search_spec->set_query("body:consectetur");
+    *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+    *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+    ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+    result_spec3.set_max_joined_children_per_parent_to_return(
+        std::numeric_limits<int32_t>::max());
+
+    SearchResultProto results3 = icing.Search(
+        search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+    ASSERT_THAT(results3.results(), SizeIs(1));
+    EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+    EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+    EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+                Eq("message/3"));
+    EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+                Eq("message/2"));
+    EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+                Eq("message/1"));
+  }
+
+  // 4. Since document 4, 5 don't exist, testing queries = "foo", "bar" are not
+  // enough to verify the correctness of term index restoration. Instead, we
+  // have to check hits for "foo", "bar" should not be found in term index.
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<Index> index,
+        Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100,
+                                     /*lite_index_sort_at_indexing=*/true,
+                                     /*lite_index_sort_size=*/50),
+                      filesystem(), icing_filesystem()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
+        index->GetIterator("foo", /*term_start_index=*/0,
+                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                           TermMatchType::EXACT_ONLY));
+    EXPECT_THAT(doc_hit_info_iter->Advance(),
+                StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        doc_hit_info_iter,
+        index->GetIterator("bar", /*term_start_index=*/0,
+                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                           TermMatchType::EXACT_ONLY));
+    EXPECT_THAT(doc_hit_info_iter->Advance(),
+                StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       RestoreIndexTruncateIntegerIndexWithoutReindexing) {
+  // Test the following scenario: integer index is *completely* ahead of
+  // document store. IcingSearchEngine should be able to recover integer index.
+  // Several additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Integer index directory should be unaffected.
+  //   - Qualified id join index directory should be unaffected.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index shouldn't take effect.
+  //   - "Clear()" should be called for integer index and throw out all integer
+  //     index storages, i.e. all storage sub directories (path_expr =
+  //     "*/integer_index_dir/*") should be discarded. This should be sufficient
+  //     to make integer index consistent with document store (in this case,
+  //     document store is empty as well), so reindexing should not take place.
+  //   - "Clear()" should be called for qualified id join index. It is a special
+  //     case when document store has no document.
+
+  // 1. Create an index with no document.
+  {
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  }
+
+  // 2. Manually add some data into integer index and increment
+  //    last_added_document_id. This will cause mismatched document id with
+  //    document store.
+  //   - Document store: []
+  //   - Term index: []
+  //   - Integer index: [0]
+  //   - Qualified id join index: []
+  {
+    Filesystem filesystem;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem, GetIntegerIndexDir(),
+                             /*num_data_threshold_for_bucket_split=*/65536,
+                             /*pre_mapping_fbv=*/false));
+    // Add hits for document 0.
+    ASSERT_THAT(integer_index->last_added_document_id(), kInvalidDocumentId);
+    integer_index->set_last_added_document_id(0);
+    std::unique_ptr<NumericIndex<int64_t>::Editor> editor = integer_index->Edit(
+        /*property_path=*/"indexableInteger", /*document_id=*/0,
+        /*section_id=*/0);
+    ICING_ASSERT_OK(editor->BufferKey(123));
+    ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+  }
+
+  // 3. Create the index again. This should trigger index restoration.
+  {
+    // Mock filesystem to observe and check the behavior of all indices.
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+        .WillRepeatedly(DoDefault());
+    // Ensure term index directory should never be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/index_dir")))
+        .Times(0);
+    // Ensure integer index directory should never be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+        .Times(0);
+    // Clear() should be called to truncate integer index and thus storage sub
+    // directory (path_expr = "*/integer_index_dir/*") should be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+        .Times(1);
+    // Ensure qualified id join index directory should never be discarded.
+    // Clear() was called and should discard and reinitialize the underlying
+    // mapper.
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                      EndsWith("/qualified_id_join_index_dir")))
+        .Times(0);
+    EXPECT_CALL(
+        *mock_filesystem,
+        DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+        .Times(AtLeast(1));
+
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::move(mock_filesystem),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+    InitializeResultProto initialize_result = icing.Initialize();
+    ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    // Since truncating integer index is sufficient to make it consistent with
+    // document store, replaying documents or reindexing shouldn't take place.
+    EXPECT_THAT(
+        initialize_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+
+    // Verify that numeric query safely wiped out the pre-existing hit for
+    // 'indexableInteger' == 123. Add a new document without that value for
+    // 'indexableInteger' that will take docid=0. If the integer index was not
+    // rebuilt correctly, then it will still have the previously added hit for
+    // 'indexableInteger' == 123 for docid 0 and incorrectly return this new
+    // doc in a query.
+    DocumentProto another_message =
+        DocumentBuilder()
+            .SetKey("namespace", "message/1")
+            .SetSchema("Message")
+            .AddStringProperty("body", kIpsumText)
+            .AddInt64Property("indexableInteger", 456)
+            .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+            .Build();
+    EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
+    // Verify integer index works normally
+    SearchSpecProto search_spec;
+    search_spec.set_query("indexableInteger == 123");
+    search_spec.set_search_type(
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+    search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+    SearchResultProto results =
+        icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(results.results(), IsEmpty());
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       RestoreIndexTruncateIntegerIndexWithReindexing) {
+  // Test the following scenario: integer index is *partially* ahead of document
+  // store. IcingSearchEngine should be able to recover integer index. Several
+  // additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Integer index directory should be unaffected.
+  //   - Qualified id join index directory should be unaffected.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index shouldn't take effect.
+  //   - "Clear()" should be called for integer index and throw out all integer
+  //     index storages, i.e. all storage sub directories (path_expr =
+  //     "*/integer_index_dir/*") should be discarded. However, some valid data
+  //     in integer index were discarded together, so reindexing should still
+  //     take place to recover them after clearing.
+  //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
+  //     underlying storage sub directory (path_expr =
+  //     "*/qualified_id_join_index_dir/*") should be discarded.
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", kIpsumText)
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // 1. Create an index with message 3 documents.
+  {
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/2").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/3").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+  }
+
+  // 2. Manually add some data into integer index and increment
+  //    last_added_document_id. This will cause mismatched document id with
+  //    document store.
+  //   - Document store: [0, 1, 2, 3]
+  //   - Term index: [0, 1, 2, 3]
+  //   - Integer index: [0, 1, 2, 3, 4]
+  //   - Qualified id join index: [0, 1, 2, 3]
+  {
+    Filesystem filesystem;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem, GetIntegerIndexDir(),
+                             /*num_data_threshold_for_bucket_split=*/65536,
+                             /*pre_mapping_fbv=*/false));
+    // Add hits for document 4.
+    DocumentId original_last_added_doc_id =
+        integer_index->last_added_document_id();
+    integer_index->set_last_added_document_id(original_last_added_doc_id + 1);
+    std::unique_ptr<NumericIndex<int64_t>::Editor> editor = integer_index->Edit(
+        /*property_path=*/"indexableInteger",
+        /*document_id=*/original_last_added_doc_id + 1, /*section_id=*/0);
+    ICING_ASSERT_OK(editor->BufferKey(456));
+    ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+  }
+
+  // 3. Create the index again. This should trigger index restoration.
+  {
+    // Mock filesystem to observe and check the behavior of all indices.
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+        .WillRepeatedly(DoDefault());
+    // Ensure term index directory should never be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/index_dir")))
+        .Times(0);
+    // Ensure integer index directory should never be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+        .Times(0);
+    // Clear() should be called to truncate integer index and thus storage sub
+    // directory (path_expr = "*/integer_index_dir/*") should be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+        .Times(1);
+    // Ensure qualified id join index directory should never be discarded, and
+    // Clear() should never be called (i.e. storage sub directory
+    // "*/qualified_id_join_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                      EndsWith("/qualified_id_join_index_dir")))
+        .Times(0);
+    EXPECT_CALL(
+        *mock_filesystem,
+        DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+        .Times(0);
+
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::move(mock_filesystem),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+    InitializeResultProto initialize_result = icing.Initialize();
+    ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(
+        initialize_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+    EXPECT_THAT(initialize_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+
+    // Verify term index works normally
+    SearchSpecProto search_spec1;
+    search_spec1.set_query("body:consectetur");
+    search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+    SearchResultProto results1 =
+        icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(results1.status(), ProtoIsOk());
+    EXPECT_THAT(results1.next_page_token(), Eq(0));
+    // All documents should be retrievable.
+    ASSERT_THAT(results1.results(), SizeIs(3));
+    EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+    // Verify integer index works normally
+    SearchSpecProto search_spec2;
+    search_spec2.set_query("indexableInteger == 123");
+    search_spec2.set_search_type(
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+    search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+    SearchResultProto results2 =
+        icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    ASSERT_THAT(results2.results(), SizeIs(3));
+    EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+    // Verify qualified id join index works normally: join a query for
+    // `name:person` with a child query for `body:consectetur` based on the
+    // child's `senderQualifiedId` field.
+    SearchSpecProto search_spec3;
+    search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+    search_spec3.set_query("name:person");
+    JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+    join_spec->set_parent_property_expression(
+        std::string(JoinProcessor::kQualifiedIdExpr));
+    join_spec->set_child_property_expression("senderQualifiedId");
+    join_spec->set_aggregation_scoring_strategy(
+        JoinSpecProto::AggregationScoringStrategy::COUNT);
+    JoinSpecProto::NestedSpecProto* nested_spec =
+        join_spec->mutable_nested_spec();
+    SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+    nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+    nested_search_spec->set_query("body:consectetur");
+    *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+    *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+    ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+    result_spec3.set_max_joined_children_per_parent_to_return(
+        std::numeric_limits<int32_t>::max());
+
+    SearchResultProto results3 = icing.Search(
+        search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+    ASSERT_THAT(results3.results(), SizeIs(1));
+    EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+    EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+    EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+                Eq("message/3"));
+    EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+                Eq("message/2"));
+    EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+                Eq("message/1"));
+
+    // Verify that numeric index safely wiped out the pre-existing hit for
+    // 'indexableInteger' == 456. Add a new document without that value for
+    // 'indexableInteger' that will take docid=0. If the integer index was not
+    // rebuilt correctly, then it will still have the previously added hit for
+    // 'indexableInteger' == 456 for docid 0 and incorrectly return this new
+    // doc in a query.
+    DocumentProto another_message =
+        DocumentBuilder()
+            .SetKey("namespace", "message/4")
+            .SetSchema("Message")
+            .AddStringProperty("body", kIpsumText)
+            .AddStringProperty("senderQualifiedId", "namespace#person")
+            .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+            .Build();
+    EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
+    // Verify integer index works normally
+    SearchSpecProto search_spec;
+    search_spec.set_query("indexableInteger == 456");
+    search_spec.set_search_type(
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+    search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+    SearchResultProto results =
+        icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(results.results(), IsEmpty());
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       RestoreIndexTruncateQualifiedIdJoinIndexWithoutReindexing) {
+  // Test the following scenario: qualified id join index is *completely* ahead
+  // of document store. IcingSearchEngine should be able to recover qualified id
+  // join index. Several additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Integer index directory should be unaffected.
+  //   - Qualified id join index directory should be unaffected.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index shouldn't take effect.
+  //   - "Clear()" should be called for integer index. It is a special case when
+  //     document store has no document. Since there is no integer index storage
+  //     sub directories (path_expr = "*/integer_index_dir/*"), nothing will be
+  //     discarded.
+  //   - "Clear()" should be called for qualified id join index and throw out
+  //     all data, i.e. discarding the underlying mapper (path_expr =
+  //     "*/qualified_id_join_index_dir/*") and reinitialize. This should be
+  //     sufficient to make qualified id join index consistent with document
+  //     store (in this case, document store is empty as well), so reindexing
+  //     should not take place.
+
+  // 1. Create an index with no document.
+  {
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  }
+
+  // 2. Manually add some data into integer index and increment
+  //    last_added_document_id. This will cause mismatched document id with
+  //    document store.
+  //   - Document store: []
+  //   - Term index: []
+  //   - Integer index: []
+  //   - Qualified id join index: [0]
+  {
+    Filesystem filesystem;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem,
+                                           GetQualifiedIdJoinIndexDir(),
+                                           /*pre_mapping_fbv=*/false));
+    // Add data for document 0.
+    ASSERT_THAT(qualified_id_join_index->last_added_document_id(),
+                kInvalidDocumentId);
+    qualified_id_join_index->set_last_added_document_id(0);
+    ICING_ASSERT_OK(qualified_id_join_index->Put(
+        /*schema_type_id=*/0, /*joinable_property_id=*/0, /*document_id=*/0,
+        /*ref_namespace_fingerprint_ids=*/
+        {NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+                                        /*target_str=*/"uri")}));
+  }
+
+  // 3. Create the index again. This should trigger index restoration.
+  {
+    // Mock filesystem to observe and check the behavior of all indices.
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+        .WillRepeatedly(DoDefault());
+    // Ensure term index directory should never be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/index_dir")))
+        .Times(0);
+    // Ensure integer index directory should never be discarded. Even though
+    // Clear() was called, it shouldn't take effect since there is no storage
+    // sub directory ("*/integer_index_dir/*") and nothing will be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+        .Times(0);
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+        .Times(0);
+    // Ensure qualified id join index directory should never be discarded.
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                      EndsWith("/qualified_id_join_index_dir")))
+        .Times(0);
+    // Clear() should be called to truncate qualified id join index and thus
+    // underlying storage sub directory (path_expr =
+    // "*/qualified_id_join_index_dir/*") should be discarded.
+    EXPECT_CALL(
+        *mock_filesystem,
+        DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+        .Times(AtLeast(1));
+
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::move(mock_filesystem),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+    InitializeResultProto initialize_result = icing.Initialize();
+    ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(
+        initialize_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    // Since truncating qualified id join index is sufficient to make it
+    // consistent with document store, replaying documents or reindexing
+    // shouldn't take place.
+    EXPECT_THAT(initialize_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+  }
+
+  // 4. Since document 0 doesn't exist, testing join query is not enough to
+  // verify the correctness of qualified id join index restoration. Instead, we
+  // have to check the previously added data should not be found in qualified id
+  // join index.
+  {
+    Filesystem filesystem;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem,
+                                           GetQualifiedIdJoinIndexDir(),
+                                           /*pre_mapping_fbv=*/false));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        auto iterator, qualified_id_join_index->GetIterator(
+                           /*schema_type_id=*/0, /*joinable_property_id=*/0));
+    EXPECT_THAT(iterator->Advance(),
+                StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       RestoreIndexTruncateQualifiedIdJoinIndexWithReindexing) {
+  // Test the following scenario: qualified id join index is *partially* ahead
+  // of document store. IcingSearchEngine should be able to recover qualified id
+  // join index. Several additional behaviors are also tested:
+  // - Index directory handling:
+  //   - Term index directory should be unaffected.
+  //   - Integer index directory should be unaffected.
+  //   - Qualified id join index directory should be unaffected.
+  // - Truncate indices:
+  //   - "TruncateTo()" for term index shouldn't take effect.
+  //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
+  //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
+  //     discarded.
+  //   - "Clear()" should be called for qualified id join index and throw out
+  //     all data, i.e. discarding the underlying mapper (path_expr =
+  //     "*/qualified_id_join_index_dir/*") and reinitialize. However, some
+  //     valid data in qualified id join index were discarded together, so
+  //     reindexing should still take place to recover them after clearing.
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", kIpsumText)
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // 1. Create an index with message 3 documents.
+  {
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/2").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/3").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+  }
+
+  // 2. Manually add some data into qualified id join index and increment
+  //    last_added_document_id. This will cause mismatched document id with
+  //    document store.
+  //   - Document store: [0, 1, 2, 3]
+  //   - Term index: [0, 1, 2, 3]
+  //   - Integer index: [0, 1, 2, 3]
+  //   - Qualified id join index: [0, 1, 2, 3, 4]
+  {
+    Filesystem filesystem;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem,
+                                           GetQualifiedIdJoinIndexDir(),
+                                           /*pre_mapping_fbv=*/false));
+    // Add data for document 4.
+    DocumentId original_last_added_doc_id =
+        qualified_id_join_index->last_added_document_id();
+    qualified_id_join_index->set_last_added_document_id(
+        original_last_added_doc_id + 1);
+    ICING_ASSERT_OK(qualified_id_join_index->Put(
+        /*schema_type_id=*/1, /*joinable_property_id=*/0,
+        /*document_id=*/original_last_added_doc_id + 1,
+        /*ref_namespace_fingerprint_ids=*/
+        {NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+                                        /*target_str=*/"person")}));
+  }
+
+  // 3. Create the index again. This should trigger index restoration.
+  {
+    // Mock filesystem to observe and check the behavior of all indices.
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+        .WillRepeatedly(DoDefault());
+    // Ensure term index directory should never be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/index_dir")))
+        .Times(0);
+    // Ensure integer index directory should never be discarded, and Clear()
+    // should never be called (i.e. storage sub directory
+    // "*/integer_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+        .Times(0);
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+        .Times(0);
+    // Ensure qualified id join index directory should never be discarded.
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                      EndsWith("/qualified_id_join_index_dir")))
+        .Times(0);
+    // Clear() should be called to truncate qualified id join index and thus
+    // underlying storage sub directory (path_expr =
+    // "*/qualified_id_join_index_dir/*") should be discarded.
+    EXPECT_CALL(
+        *mock_filesystem,
+        DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+        .Times(AtLeast(1));
+
+    TestIcingSearchEngine icing(
+        GetDefaultIcingOptions(), std::move(mock_filesystem),
+        std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+        GetTestJniCache());
+    InitializeResultProto initialize_result = icing.Initialize();
+    ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(
+        initialize_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+
+    // Verify term index works normally
+    SearchSpecProto search_spec1;
+    search_spec1.set_query("body:consectetur");
+    search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+    SearchResultProto results1 =
+        icing.Search(search_spec1, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(results1.status(), ProtoIsOk());
+    EXPECT_THAT(results1.next_page_token(), Eq(0));
+    // All documents should be retrievable.
+    ASSERT_THAT(results1.results(), SizeIs(3));
+    EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
+
+    // Verify integer index works normally
+    SearchSpecProto search_spec2;
+    search_spec2.set_query("indexableInteger == 123");
+    search_spec2.set_search_type(
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+    search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+    SearchResultProto results2 =
+        icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    ASSERT_THAT(results2.results(), SizeIs(3));
+    EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
+    EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
+    EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
+
+    // Verify qualified id join index works normally: join a query for
+    // `name:person` with a child query for `body:consectetur` based on the
+    // child's `senderQualifiedId` field.
+
+    // Add document 4 without "senderQualifiedId". If join index is not rebuilt
+    // correctly, then it will still have the previously added senderQualifiedId
+    // for document 4 and include document 4 incorrectly in the right side.
+    DocumentProto another_message =
+        DocumentBuilder()
+            .SetKey("namespace", "message/4")
+            .SetSchema("Message")
+            .AddStringProperty("body", kIpsumText)
+            .AddInt64Property("indexableInteger", 123)
+            .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+            .Build();
+    EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
+
+    SearchSpecProto search_spec3;
+    search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+    search_spec3.set_query("name:person");
+    JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+    join_spec->set_parent_property_expression(
+        std::string(JoinProcessor::kQualifiedIdExpr));
+    join_spec->set_child_property_expression("senderQualifiedId");
+    join_spec->set_aggregation_scoring_strategy(
+        JoinSpecProto::AggregationScoringStrategy::COUNT);
+    JoinSpecProto::NestedSpecProto* nested_spec =
+        join_spec->mutable_nested_spec();
+    SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+    nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+    nested_search_spec->set_query("body:consectetur");
+    *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+    *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+    ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+    result_spec3.set_max_joined_children_per_parent_to_return(
+        std::numeric_limits<int32_t>::max());
+
+    SearchResultProto results3 = icing.Search(
+        search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+    ASSERT_THAT(results3.results(), SizeIs(1));
+    EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
+    EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
+    EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
+                Eq("message/3"));
+    EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
+                Eq("message/2"));
+    EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
+                Eq("message/1"));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       DocumentWithNoIndexedPropertyDoesntCauseRestoreIndex) {
+  // 1. Create an index with a single document in it that has no indexed
+  // content.
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // Set a schema for a single type that has no indexed properties.
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("Message")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("unindexedField")
+                                     .SetDataTypeString(TERM_MATCH_UNKNOWN,
+                                                        TOKENIZER_NONE)
+                                     .SetCardinality(CARDINALITY_REQUIRED))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("unindexedInteger")
+                                     .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+                                     .SetCardinality(CARDINALITY_REQUIRED)))
+            .Build();
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    // Add a document that contains no indexed properties.
+    DocumentProto document =
+        DocumentBuilder()
+            .SetKey("icing", "fake_type/0")
+            .SetSchema("Message")
+            .AddStringProperty("unindexedField",
+                               "Don't you dare search over this!")
+            .AddInt64Property("unindexedInteger", -123)
+            .Build();
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  // 2. Create the index again. This should NOT trigger a recovery of any kind.
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    EXPECT_THAT(init_result.status(), ProtoIsOk());
+    EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+                Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(
+        init_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(init_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex) {
+  // 1. Create an index with a single document in it that has no valid indexed
+  // tokens in its content.
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("Message")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("body")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_REQUIRED))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("indexableInteger")
+                                     .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("senderQualifiedId")
+                                     .SetDataTypeJoinableString(
+                                         JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    // Set a schema for a single type that has no term, integer, join indexed
+    // contents.
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    // Add a document that contains:
+    // - No valid indexed string content - just punctuation
+    // - No integer content - since it is an optional property
+    // - No qualified id content - since it is an optional property
+    DocumentProto document = DocumentBuilder()
+                                 .SetKey("icing", "fake_type/0")
+                                 .SetSchema("Message")
+                                 .AddStringProperty("body", "?...!")
+                                 .Build();
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  // 2. Create the index again. This should NOT trigger a recovery of any kind.
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto init_result = icing.Initialize();
+    EXPECT_THAT(init_result.status(), ProtoIsOk());
+    EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+                Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(
+        init_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(init_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializeShouldLogFunctionLatency) {
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(10);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  InitializeResultProto initialize_result_proto = icing.Initialize();
+  EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializeShouldLogNumberOfDocuments) {
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("icing", "fake_type/1")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message body")
+                                .AddInt64Property("indexableInteger", 123)
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("icing", "fake_type/2")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message body")
+                                .AddInt64Property("indexableInteger", 456)
+                                .Build();
+
+  {
+    // Initialize and put a document.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+                Eq(0));
+
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+                Eq(1));
+
+    // Put another document.
+    ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+                Eq(2));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize) {
+  // Even though the fake timer will return 10, all the latency numbers related
+  // to recovery / restoration should be 0 during the first-time initialization.
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(10);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  InitializeResultProto initialize_result_proto = icing.Initialize();
+  EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .document_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .document_store_recovery_latency_ms(),
+              Eq(0));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().document_store_data_status(),
+      Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .integer_index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .qualified_id_join_index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+      Eq(0));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .schema_store_recovery_latency_ms(),
+              Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializeShouldLogRecoveryCausePartialDataLoss) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .AddInt64Property("indexableInteger", 123)
+                               .Build();
+
+  {
+    // Initialize and put a document.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  {
+    // Append a non-checksummed document. This will mess up the checksum of the
+    // proto log, forcing it to rewind and later return a DATA_LOSS error.
+    const std::string serialized_document = document.SerializeAsString();
+    const std::string document_log_file = absl_ports::StrCat(
+        GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+
+    int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str());
+    filesystem()->PWrite(document_log_file.c_str(), file_size,
+                         serialized_document.data(),
+                         serialized_document.size());
+  }
+
+  {
+    // Document store will rewind to previous checkpoint. The cause should be
+    // DATA_LOSS and the data status should be PARTIAL_LOSS.
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetTimerElapsedMilliseconds(10);
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(InitializeStatsProto::DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Eq(10));
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::PARTIAL_LOSS));
+    // Document store rewinds to previous checkpoint and all derived files were
+    // regenerated.
+    // - Last stored doc id will be consistent with last added document ids in
+    //   term/integer indices, so there will be no index restoration.
+    // - Qualified id join index depends on document store derived files and
+    //   since they were regenerated, we should rebuild qualified id join index.
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .integer_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .index_restoration_latency_ms(),
+                Eq(10));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Eq(0));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializeShouldLogRecoveryCauseCompleteDataLoss) {
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("icing", "fake_type/1")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message body")
+                                .AddInt64Property("indexableInteger", 123)
+                                .Build();
+
+  const std::string document_log_file = absl_ports::StrCat(
+      GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+  int64_t corruptible_offset;
+
+  {
+    // Initialize and put a document.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // There's some space at the beginning of the file (e.g. header, kmagic,
+    // etc) that is necessary to initialize the FileBackedProtoLog. We can't
+    // corrupt that region, so we need to figure out the offset at which
+    // documents will be written to - which is the file size after
+    // initialization.
+    corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str());
+
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  }
+
+  {
+    // "Corrupt" the content written in the log. Make the corrupt document
+    // smaller than our original one so we don't accidentally write past our
+    // file.
+    DocumentProto document =
+        DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+    std::string serialized_document = document.SerializeAsString();
+    ASSERT_TRUE(filesystem()->PWrite(
+        document_log_file.c_str(), corruptible_offset,
+        serialized_document.data(), serialized_document.size()));
+
+    PortableFileBackedProtoLog<DocumentWrapper>::Header header =
+        ReadDocumentLogHeader(*filesystem(), document_log_file);
+
+    // Set dirty bit to true to reflect that something changed in the log.
+    header.SetDirtyFlag(true);
+    header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+    WriteDocumentLogHeader(*filesystem(), document_log_file, header);
+  }
+
+  {
+    // Document store will completely rewind. The cause should be DATA_LOSS and
+    // the data status should be COMPLETE_LOSS.
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetTimerElapsedMilliseconds(10);
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(InitializeStatsProto::DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Eq(10));
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::COMPLETE_LOSS));
+    // The complete rewind of ground truth causes us to clear the index, but
+    // that's not considered a restoration.
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .integer_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .index_restoration_latency_ms(),
+                Eq(0));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Eq(0));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializeShouldLogRecoveryCauseIndexInconsistentWithGroundTruth) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .AddInt64Property("indexableInteger", 123)
+                               .Build();
+  {
+    // Initialize and put a document.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  {
+    // Delete and re-initialize an empty index file to trigger
+    // RestoreIndexIfNeeded.
+    std::string idx_subdir = GetIndexDir() + "/idx";
+    ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<Index> index,
+        Index::Create(Index::Options(GetIndexDir(),
+                                     /*index_merge_size=*/100,
+                                     /*lite_index_sort_at_indexing=*/true,
+                                     /*lite_index_sort_size=*/50),
+                      filesystem(), icing_filesystem()));
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  {
+    // Index is empty but ground truth is not. Index should be restored due to
+    // the inconsistency.
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetTimerElapsedMilliseconds(10);
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .integer_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .index_restoration_latency_ms(),
+                Eq(10));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Eq(0));
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Eq(0));
+  }
+}
+
+TEST_F(
+    IcingSearchEngineInitializationTest,
+    InitializeShouldLogRecoveryCauseIntegerIndexInconsistentWithGroundTruth) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .AddInt64Property("indexableInteger", 123)
+                               .Build();
+  {
+    // Initialize and put a document.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  {
+    // Delete the integer index file to trigger RestoreIndexIfNeeded.
+    std::string integer_index_dir = GetIntegerIndexDir();
+    filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str());
+  }
+
+  {
+    // Index is empty but ground truth is not. Index should be restored due to
+    // the inconsistency.
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetTimerElapsedMilliseconds(10);
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .integer_index_restoration_cause(),
+                Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .index_restoration_latency_ms(),
+                Eq(10));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Eq(0));
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Eq(0));
+  }
+}
+
+TEST_F(
+    IcingSearchEngineInitializationTest,
+    InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexInconsistentWithGroundTruth) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body")
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  {
+    // Initialize and put documents.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+  }
+
+  {
+    // Delete the qualified id join index file to trigger RestoreIndexIfNeeded.
+    std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir();
+    filesystem()->DeleteDirectoryRecursively(
+        qualified_id_join_index_dir.c_str());
+  }
+
+  {
+    // Index is empty but ground truth is not. Index should be restored due to
+    // the inconsistency.
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetTimerElapsedMilliseconds(10);
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .integer_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .index_restoration_latency_ms(),
+                Eq(10));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Eq(0));
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Eq(0));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializeShouldLogRecoveryCauseSchemaChangesOutOfSync) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .AddInt64Property("indexableInteger", 123)
+                               .Build();
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  {
+    // Initialize and put one document.
+    IcingSearchEngine icing(options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  {
+    // Simulate a schema change where power is lost after the schema is written.
+    SchemaProto new_schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("subject")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    // Write the marker file
+    std::string marker_filepath =
+        absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
+    ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
+    ASSERT_TRUE(sfd.is_valid());
+
+    // Write the new schema
+    FakeClock fake_clock;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+    ICING_EXPECT_OK(schema_store->SetSchema(
+        new_schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+  }
+
+  {
+    // Both document store and index should be recovered from checksum mismatch.
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetTimerElapsedMilliseconds(10);
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .integer_index_restoration_cause(),
+                Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .index_restoration_latency_ms(),
+                Eq(10));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Eq(10));
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Eq(0));
+  }
+
+  {
+    // No recovery should be needed.
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetTimerElapsedMilliseconds(10);
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .integer_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .index_restoration_latency_ms(),
+                Eq(0));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Eq(0));
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Eq(0));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializeShouldLogRecoveryCauseIndexIOError) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .AddInt64Property("indexableInteger", 123)
+                               .Build();
+  {
+    // Initialize and put one document.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  std::string lite_index_buffer_file_path =
+      absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb");
+  auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
+  EXPECT_CALL(*mock_icing_filesystem, OpenForWrite(_))
+      .WillRepeatedly(DoDefault());
+  // This fails Index::Create() once.
+  EXPECT_CALL(*mock_icing_filesystem,
+              OpenForWrite(Eq(lite_index_buffer_file_path)))
+      .WillOnce(Return(-1))
+      .WillRepeatedly(DoDefault());
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(10);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::move(mock_icing_filesystem),
+                              std::move(fake_clock), GetTestJniCache());
+
+  InitializeResultProto initialize_result_proto = icing.Initialize();
+  EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_cause(),
+      Eq(InitializeStatsProto::IO_ERROR));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .integer_index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .qualified_id_join_index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+      Eq(10));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .document_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .document_store_recovery_latency_ms(),
+              Eq(0));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().document_store_data_status(),
+      Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .schema_store_recovery_latency_ms(),
+              Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializeShouldLogRecoveryCauseIntegerIndexIOError) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .AddInt64Property("indexableInteger", 123)
+                               .Build();
+  {
+    // Initialize and put one document.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  std::string integer_index_metadata_file =
+      absl_ports::StrCat(GetIntegerIndexDir(), "/integer_index.m");
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  EXPECT_CALL(*mock_filesystem, OpenForWrite(_)).WillRepeatedly(DoDefault());
+  // This fails IntegerIndex::Create() once.
+  EXPECT_CALL(*mock_filesystem, OpenForWrite(Eq(integer_index_metadata_file)))
+      .WillOnce(Return(-1))
+      .WillRepeatedly(DoDefault());
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(10);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+
+  InitializeResultProto initialize_result_proto = icing.Initialize();
+  EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .integer_index_restoration_cause(),
+              Eq(InitializeStatsProto::IO_ERROR));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .qualified_id_join_index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+      Eq(10));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .document_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .document_store_recovery_latency_ms(),
+              Eq(0));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().document_store_data_status(),
+      Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .schema_store_recovery_latency_ms(),
+              Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexIOError) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body")
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  {
+    // Initialize and put documents.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(person).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(message).status(), ProtoIsOk());
+  }
+
+  std::string qualified_id_join_index_metadata_file =
+      absl_ports::StrCat(GetQualifiedIdJoinIndexDir(), "/metadata");
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  EXPECT_CALL(*mock_filesystem, PRead(A<const char*>(), _, _, _))
+      .WillRepeatedly(DoDefault());
+  // This fails QualifiedIdJoinIndexImplV2::Create() once.
+  EXPECT_CALL(
+      *mock_filesystem,
+      PRead(Matcher<const char*>(Eq(qualified_id_join_index_metadata_file)), _,
+            _, _))
+      .WillOnce(Return(false))
+      .WillRepeatedly(DoDefault());
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(10);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+
+  InitializeResultProto initialize_result_proto = icing.Initialize();
+  EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .integer_index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .qualified_id_join_index_restoration_cause(),
+              Eq(InitializeStatsProto::IO_ERROR));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+      Eq(10));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .document_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .document_store_recovery_latency_ms(),
+              Eq(0));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().document_store_data_status(),
+      Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .schema_store_recovery_latency_ms(),
+              Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializeShouldLogRecoveryCauseDocStoreIOError) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .AddInt64Property("indexableInteger", 123)
+                               .Build();
+  {
+    // Initialize and put one document.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  std::string document_store_header_file_path =
+      absl_ports::StrCat(GetDocumentDir(), "/document_store_header");
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  EXPECT_CALL(*mock_filesystem, Read(A<const char*>(), _, _))
+      .WillRepeatedly(DoDefault());
+  // This fails DocumentStore::InitializeDerivedFiles() once.
+  EXPECT_CALL(
+      *mock_filesystem,
+      Read(Matcher<const char*>(Eq(document_store_header_file_path)), _, _))
+      .WillOnce(Return(false))
+      .WillRepeatedly(DoDefault());
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(10);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+
+  InitializeResultProto initialize_result_proto = icing.Initialize();
+  EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .document_store_recovery_cause(),
+              Eq(InitializeStatsProto::IO_ERROR));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .document_store_recovery_latency_ms(),
+              Eq(10));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().document_store_data_status(),
+      Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .integer_index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .qualified_id_join_index_restoration_cause(),
+              Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+      Eq(10));
+  EXPECT_THAT(
+      initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result_proto.initialize_stats()
+                  .schema_store_recovery_latency_ms(),
+              Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializeShouldLogRecoveryCauseSchemaStoreIOError) {
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  }
+
+  {
+    // Delete the schema store type mapper to trigger an I/O error.
+    std::string schema_store_header_file_path =
+        GetSchemaDir() + "/schema_type_mapper";
+    ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(
+        schema_store_header_file_path.c_str()));
+  }
+
+  {
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetTimerElapsedMilliseconds(10);
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(InitializeStatsProto::IO_ERROR));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Eq(10));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Eq(0));
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().document_store_data_status(),
+        Eq(InitializeStatsProto::NO_DATA_LOSS));
+    EXPECT_THAT(
+        initialize_result_proto.initialize_stats().index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .integer_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result_proto.initialize_stats()
+                    .index_restoration_latency_ms(),
+                Eq(0));
+  }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+       InitializeShouldLogNumberOfSchemaTypes) {
+  {
+    // Initialize an empty storage.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    // There should be 0 schema types.
+    EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+                Eq(0));
+
+    // Set a schema with one type config.
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    // There should be 1 schema type.
+    EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+                Eq(1));
+
+    // Create and set a schema with two type configs: Email and Message.
+    SchemaProto schema = CreateEmailSchema();
+    *schema.add_types() = CreateMessageSchemaTypeConfig();
+
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+                Eq(2));
+  }
+}
+
+// TODO(b/275121148): deprecate this test after rollout join index v2.
+class IcingSearchEngineInitializationSwitchJoinIndexTest
+    : public IcingSearchEngineInitializationTest,
+      public ::testing::WithParamInterface<bool> {};
+TEST_P(IcingSearchEngineInitializationSwitchJoinIndexTest, SwitchJoinIndex) {
+  bool use_join_index_v2 = GetParam();
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", kIpsumText)
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // 1. Create an index with message 3 documents.
+  {
+    IcingSearchEngineOptions options = GetDefaultIcingOptions();
+    options.set_document_store_namespace_id_fingerprint(true);
+    options.set_use_new_qualified_id_join_index(use_join_index_v2);
+
+    TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/2").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+    message = DocumentBuilder(message).SetUri("message/3").Build();
+    EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+  }
+
+  // 2. Create the index again changing join index version. This should trigger
+  //    join index restoration.
+  {
+    // Mock filesystem to observe and check the behavior of all indices.
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+        .WillRepeatedly(DoDefault());
+    // Ensure term index directory should never be discarded.
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/index_dir")))
+        .Times(0);
+    // Ensure integer index directory should never be discarded, and Clear()
+    // should never be called (i.e. storage sub directory
+    // "*/integer_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+        .Times(0);
+    EXPECT_CALL(*mock_filesystem,
+                DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+        .Times(0);
+    // Ensure qualified id join index directory should be discarded once, and
+    // Clear() should never be called (i.e. storage sub directory
+    // "*/qualified_id_join_index_dir/*" should never be discarded).
+    EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+                                      EndsWith("/qualified_id_join_index_dir")))
+        .Times(1);
+    EXPECT_CALL(
+        *mock_filesystem,
+        DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+        .Times(0);
+
+    IcingSearchEngineOptions options = GetDefaultIcingOptions();
+    options.set_document_store_namespace_id_fingerprint(true);
+    options.set_use_new_qualified_id_join_index(!use_join_index_v2);
+
+    TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+                                std::make_unique<IcingFilesystem>(),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+    InitializeResultProto initialize_result = icing.Initialize();
+    ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+                Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(
+        initialize_result.initialize_stats().integer_index_restoration_cause(),
+        Eq(InitializeStatsProto::NONE));
+    EXPECT_THAT(initialize_result.initialize_stats()
+                    .qualified_id_join_index_restoration_cause(),
+                Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+
+    // Verify qualified id join index works normally: join a query for
+    // `name:person` with a child query for `body:consectetur` based on the
+    // child's `senderQualifiedId` field.
+    SearchSpecProto search_spec;
+    search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+    search_spec.set_query("name:person");
+    JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+    join_spec->set_parent_property_expression(
+        std::string(JoinProcessor::kQualifiedIdExpr));
+    join_spec->set_child_property_expression("senderQualifiedId");
+    join_spec->set_aggregation_scoring_strategy(
+        JoinSpecProto::AggregationScoringStrategy::COUNT);
+    JoinSpecProto::NestedSpecProto* nested_spec =
+        join_spec->mutable_nested_spec();
+    SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+    nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+    nested_search_spec->set_query("body:consectetur");
+    *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+    *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+    ResultSpecProto result_spec = ResultSpecProto::default_instance();
+    result_spec.set_max_joined_children_per_parent_to_return(
+        std::numeric_limits<int32_t>::max());
+
+    SearchResultProto results = icing.Search(
+        search_spec, ScoringSpecProto::default_instance(), result_spec);
+    ASSERT_THAT(results.results(), SizeIs(1));
+    EXPECT_THAT(results.results(0).document().uri(), Eq("person"));
+    EXPECT_THAT(results.results(0).joined_results(), SizeIs(3));
+    EXPECT_THAT(results.results(0).joined_results(0).document().uri(),
+                Eq("message/3"));
+    EXPECT_THAT(results.results(0).joined_results(1).document().uri(),
+                Eq("message/2"));
+    EXPECT_THAT(results.results(0).joined_results(2).document().uri(),
+                Eq("message/1"));
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(IcingSearchEngineInitializationSwitchJoinIndexTest,
+                         IcingSearchEngineInitializationSwitchJoinIndexTest,
+                         testing::Values(true, false));
+
+class IcingSearchEngineInitializationVersionChangeTest
+    : public IcingSearchEngineInitializationTest,
+      public ::testing::WithParamInterface<version_util::VersionInfo> {};
+
+TEST_P(IcingSearchEngineInitializationVersionChangeTest,
+       RecoverFromVersionChange) {
+  // TODO(b/280697513): test backup schema migration
+  // Test the following scenario: version change. All derived data should be
+  // rebuilt. We test this by manually adding some invalid derived data and
+  // verifying they're removed due to rebuild.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person1 =
+      DocumentBuilder()
+          .SetKey("namespace", "person/1")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto person2 =
+      DocumentBuilder()
+          .SetKey("namespace", "person/2")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message")
+          .SetSchema("Message")
+          .AddStringProperty("body", "correct message")
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person/1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+  {
+    // Initializes folder and schema, index person1 and person2
+    TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  {
+    // Manually:
+    // - Put message into DocumentStore
+    // - But add some incorrect data for message into 3 indices
+    // - Change version file
+    //
+    // These will make sure last_added_document_id is consistent with
+    // last_stored_document_id, so if Icing didn't handle version change
+    // correctly, then the index won't be rebuilt.
+    FakeClock fake_clock;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+
+    // Put message into DocumentStore
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/
+            icing_options.document_store_namespace_id_fingerprint(),
+            /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    std::unique_ptr<DocumentStore> document_store =
+        std::move(create_result.document_store);
+    ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, document_store->Put(message));
+
+    // Index doc_id with incorrect data
+    Index::Options options(GetIndexDir(), /*index_merge_size=*/1024 * 1024,
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/1024 * 8);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<Index> index,
+        Index::Create(options, filesystem(), icing_filesystem()));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(*filesystem(), GetIntegerIndexDir(),
+                             /*num_data_threshold_for_bucket_split=*/65536,
+                             /*pre_mapping_fbv=*/false));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
+        QualifiedIdJoinIndexImplV2::Create(*filesystem(),
+                                           GetQualifiedIdJoinIndexDir(),
+                                           /*pre_mapping_fbv=*/false));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+        TermIndexingHandler::Create(
+            &fake_clock, normalizer_.get(), index.get(),
+            /*build_property_existence_metadata_hits=*/true));
+    ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
+                                   integer_section_indexing_handler,
+                               IntegerSectionIndexingHandler::Create(
+                                   &fake_clock, integer_index.get()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexingHandler>
+            qualified_id_join_indexing_handler,
+        QualifiedIdJoinIndexingHandler::Create(
+            &fake_clock, document_store.get(), qualified_id_join_index.get()));
+    std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+    handlers.push_back(std::move(term_indexing_handler));
+    handlers.push_back(std::move(integer_section_indexing_handler));
+    handlers.push_back(std::move(qualified_id_join_indexing_handler));
+    IndexProcessor index_processor(std::move(handlers), &fake_clock);
+
+    DocumentProto incorrect_message =
+        DocumentBuilder()
+            .SetKey("namespace", "message")
+            .SetSchema("Message")
+            .AddStringProperty("body", "wrong message")
+            .AddInt64Property("indexableInteger", 456)
+            .AddStringProperty("senderQualifiedId", "namespace#person/2")
+            .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+            .Build();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        TokenizedDocument tokenized_document,
+        TokenizedDocument::Create(schema_store.get(), lang_segmenter_.get(),
+                                  std::move(incorrect_message)));
+    ICING_ASSERT_OK(index_processor.IndexDocument(tokenized_document, doc_id));
+
+    // Change existing data's version file
+    const version_util::VersionInfo& existing_version_info = GetParam();
+    ICING_ASSERT_OK(version_util::WriteVersion(
+        *filesystem(), GetVersionFilename(), existing_version_info));
+  }
+
+  // Mock filesystem to observe and check the behavior of all indices.
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+  InitializeResultProto initialize_result = icing.Initialize();
+  EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+  // Index Restoration should be triggered here. Incorrect data should be
+  // deleted and correct data of message should be indexed.
+  EXPECT_THAT(
+      initialize_result.initialize_stats().document_store_recovery_cause(),
+      Eq(InitializeStatsProto::VERSION_CHANGED));
+  EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::VERSION_CHANGED));
+  EXPECT_THAT(
+      initialize_result.initialize_stats().integer_index_restoration_cause(),
+      Eq(InitializeStatsProto::VERSION_CHANGED));
+  EXPECT_THAT(initialize_result.initialize_stats()
+                  .qualified_id_join_index_restoration_cause(),
+              Eq(InitializeStatsProto::VERSION_CHANGED));
+
+  // Manually check version file
+  ICING_ASSERT_OK_AND_ASSIGN(
+      version_util::VersionInfo version_info_after_init,
+      version_util::ReadVersion(*filesystem(), GetVersionFilename(),
+                                GetIndexDir()));
+  EXPECT_THAT(version_info_after_init.version, Eq(version_util::kVersion));
+  EXPECT_THAT(version_info_after_init.max_version,
+              Eq(std::max(version_util::kVersion, GetParam().max_version)));
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      message;
+
+  // Verify term search
+  SearchSpecProto search_spec1;
+  search_spec1.set_query("body:correct");
+  search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+  SearchResultProto search_result_proto1 =
+      icing.Search(search_spec1, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Verify numeric (integer) search
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("indexableInteger == 123");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  SearchResultProto search_result_google::protobuf =
+      icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Verify join search: join a query for `name:person` with a child query for
+  // `body:message` based on the child's `senderQualifiedId` field.
+  SearchSpecProto search_spec3;
+  search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec3.set_query("name:person");
+  JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("senderQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+  nested_search_spec->set_query("body:message");
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+  result_spec3.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  SearchResultProto expected_join_search_result_proto;
+  expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  // Person 1 with message
+  SearchResultProto::ResultProto* result_proto =
+      expected_join_search_result_proto.mutable_results()->Add();
+  *result_proto->mutable_document() = person1;
+  *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+  // Person 2 without children
+  *expected_join_search_result_proto.mutable_results()
+       ->Add()
+       ->mutable_document() = person2;
+
+  SearchResultProto search_result_proto3 = icing.Search(
+      search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+  EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_join_search_result_proto));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    IcingSearchEngineInitializationVersionChangeTest,
+    IcingSearchEngineInitializationVersionChangeTest,
+    testing::Values(
+        // Manually change existing data set's version to kVersion + 1. When
+        // initializing, it will detect "rollback".
+        version_util::VersionInfo(
+            /*version_in=*/version_util::kVersion + 1,
+            /*max_version_in=*/version_util::kVersion + 1),
+
+        // Currently we don't have any "upgrade" that requires rebuild derived
+        // files, so skip this case until we have a case for it.
+
+        // Manually change existing data set's version to kVersion - 1 and
+        // max_version to kVersion. When initializing, it will detect "roll
+        // forward".
+        version_util::VersionInfo(
+            /*version_in=*/version_util::kVersion - 1,
+            /*max_version_in=*/version_util::kVersion),
+
+        // Manually change existing data set's version to 0 and max_version to
+        // 0. When initializing, it will detect "version 0 upgrade".
+        //
+        // Note: in reality, version 0 won't be written into version file, but
+        // it is ok here since it is hack to simulate version 0 situation.
+        version_util::VersionInfo(
+            /*version_in=*/0,
+            /*max_version_in=*/0),
+
+        // Manually change existing data set's version to 0 and max_version to
+        // kVersion. When initializing, it will detect "version 0 roll forward".
+        //
+        // Note: in reality, version 0 won't be written into version file, but
+        // it is ok here since it is hack to simulate version 0 situation.
+        version_util::VersionInfo(
+            /*version_in=*/0,
+            /*max_version_in=*/version_util::kVersion)));
+
+class IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest
+    : public IcingSearchEngineInitializationTest,
+      public ::testing::WithParamInterface<std::tuple<bool, bool>> {};
+TEST_P(IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
+       ChangePropertyExistenceHitsFlagTest) {
+  bool before_build_property_existence_metadata_hits = std::get<0>(GetParam());
+  bool after_build_property_existence_metadata_hits = std::get<1>(GetParam());
+  bool flag_changed = before_build_property_existence_metadata_hits !=
+                      after_build_property_existence_metadata_hits;
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Value")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("timestamp")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("score")
+                                        .SetDataType(TYPE_DOUBLE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Create a document with every property.
+  DocumentProto document0 = DocumentBuilder()
+                                .SetKey("icing", "uri0")
+                                .SetSchema("Value")
+                                .SetCreationTimestampMs(1)
+                                .AddStringProperty("body", "foo")
+                                .AddInt64Property("timestamp", 123)
+                                .AddDoubleProperty("score", 456.789)
+                                .Build();
+  // Create a document with missing body.
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("icing", "uri1")
+                                .SetSchema("Value")
+                                .SetCreationTimestampMs(1)
+                                .AddInt64Property("timestamp", 123)
+                                .AddDoubleProperty("score", 456.789)
+                                .Build();
+  // Create a document with missing timestamp.
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("icing", "uri2")
+                                .SetSchema("Value")
+                                .SetCreationTimestampMs(1)
+                                .AddStringProperty("body", "foo")
+                                .AddDoubleProperty("score", 456.789)
+                                .Build();
+
+  // 1. Create an index with the 3 documents.
+  {
+    IcingSearchEngineOptions options = GetDefaultIcingOptions();
+    options.set_build_property_existence_metadata_hits(
+        before_build_property_existence_metadata_hits);
+    TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document0).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  }
+
+  // 2. Create the index again with
+  // after_build_property_existence_metadata_hits.
+  //
+  // Mock filesystem to observe and check the behavior of all indices.
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+      .WillRepeatedly(DoDefault());
+  // Ensure that the term index is rebuilt if the flag is changed.
+  EXPECT_CALL(*mock_filesystem,
+              DeleteDirectoryRecursively(EndsWith("/index_dir")))
+      .Times(flag_changed ? 1 : 0);
+
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_build_property_existence_metadata_hits(
+      after_build_property_existence_metadata_hits);
+  TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+  InitializeResultProto initialize_result = icing.Initialize();
+  ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+  // Ensure that the term index is rebuilt if the flag is changed.
+  EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+              Eq(flag_changed ? InitializeStatsProto::IO_ERROR
+                              : InitializeStatsProto::NONE));
+  EXPECT_THAT(
+      initialize_result.initialize_stats().integer_index_restoration_cause(),
+      Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(initialize_result.initialize_stats()
+                  .qualified_id_join_index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+
+  // Get all documents that have "body".
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec.add_enabled_features(std::string(kHasPropertyFunctionFeature));
+  search_spec.add_enabled_features(
+      std::string(kListFilterQueryLanguageFeature));
+  search_spec.set_query("hasProperty(\"body\")");
+  SearchResultProto results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                           ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  if (after_build_property_existence_metadata_hits) {
+    EXPECT_THAT(results.results(), SizeIs(2));
+    EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
+    EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
+  } else {
+    EXPECT_THAT(results.results(), IsEmpty());
+  }
+
+  // Get all documents that have "timestamp".
+  search_spec.set_query("hasProperty(\"timestamp\")");
+  results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                         ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  if (after_build_property_existence_metadata_hits) {
+    EXPECT_THAT(results.results(), SizeIs(2));
+    EXPECT_THAT(results.results(0).document(), EqualsProto(document1));
+    EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
+  } else {
+    EXPECT_THAT(results.results(), IsEmpty());
+  }
+
+  // Get all documents that have "score".
+  search_spec.set_query("hasProperty(\"score\")");
+  results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                         ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  if (after_build_property_existence_metadata_hits) {
+    EXPECT_THAT(results.results(), SizeIs(3));
+    EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
+    EXPECT_THAT(results.results(1).document(), EqualsProto(document1));
+    EXPECT_THAT(results.results(2).document(), EqualsProto(document0));
+  } else {
+    EXPECT_THAT(results.results(), IsEmpty());
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
+    IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
+    testing::Values(std::make_tuple(false, false), std::make_tuple(false, true),
+                    std::make_tuple(true, false), std::make_tuple(true, true)));
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/icing-search-engine_optimize_test.cc b/icing/icing-search-engine_optimize_test.cc
new file mode 100644
index 0000000..61b594c
--- /dev/null
+++ b/icing/icing-search-engine_optimize_test.cc
@@ -0,0 +1,1855 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <unistd.h>
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/join/join-processor.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/query/query-features.h"
+#include "icing/schema-builder.h"
+#include "icing/store/document-log-creator.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::Lt;
+using ::testing::Return;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+  TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+                        std::unique_ptr<const Filesystem> filesystem,
+                        std::unique_ptr<const IcingFilesystem> icing_filesystem,
+                        std::unique_ptr<Clock> clock,
+                        std::unique_ptr<JniCache> jni_cache)
+      : IcingSearchEngine(options, std::move(filesystem),
+                          std::move(icing_filesystem), std::move(clock),
+                          std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to
+// IcingSearchEngine::Optimize.
+class IcingSearchEngineOptimizeTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      // If we've specified using the reverse-JNI method for segmentation (i.e.
+      // not ICU), then we won't have the ICU data file included to set up.
+      // Technically, we could choose to use reverse-JNI for segmentation AND
+      // include an ICU data file, but that seems unlikely and our current BUILD
+      // setup doesn't do this.
+      // File generated via icu_data_file rule in //icing/BUILD.
+      std::string icu_data_file_path =
+          GetTestFilePath("icing/icu.dat");
+      ICING_ASSERT_OK(
+          icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+    }
+    filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+  Filesystem filesystem_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_base_dir(GetTestBaseDir());
+  return icing_options;
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  return scoring_spec;
+}
+
+// TODO(b/272145329): create SearchSpecBuilder, JoinSpecBuilder,
+// SearchResultProtoBuilder and ResultProtoBuilder for unit tests and build all
+// instances by them.
+
+TEST_F(IcingSearchEngineOptimizeTest,
+       AllPageTokensShouldBeInvalidatedAfterOptimization) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body one")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body two")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+
+  // Searches and gets the first page, 1 result
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
+  uint64_t next_page_token = search_result_proto.next_page_token();
+  // Since the token is a random number, we don't need to verify
+  expected_search_result_proto.set_next_page_token(next_page_token);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+  // Now document1 is still to be fetched.
+
+  OptimizeResultProto optimize_result_proto;
+  optimize_result_proto.mutable_status()->set_code(StatusProto::OK);
+  optimize_result_proto.mutable_status()->set_message("");
+  OptimizeResultProto actual_result = icing.Optimize();
+  actual_result.clear_optimize_stats();
+  ASSERT_THAT(actual_result, EqualsProto(optimize_result_proto));
+
+  // Tries to fetch the second page, no results since all tokens have been
+  // invalidated during Optimize()
+  expected_search_result_proto.clear_results();
+  expected_search_result_proto.clear_next_page_token();
+  search_result_proto = icing.GetNextPage(next_page_token);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, OptimizationShouldRemoveDeletedDocs) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body one")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace, uri1) not found.");
+  {
+    IcingSearchEngine icing(icing_options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+    // Deletes document1
+    ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
+    const std::string document_log_path =
+        icing_options.base_dir() + "/document_dir/" +
+        DocumentLogCreator::GetDocumentLogFilename();
+    int64_t document_log_size_before =
+        filesystem()->GetFileSize(document_log_path.c_str());
+    ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+    int64_t document_log_size_after =
+        filesystem()->GetFileSize(document_log_path.c_str());
+
+    // Validates that document can't be found right after Optimize()
+    EXPECT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
+    // Validates that document is actually removed from document log
+    EXPECT_THAT(document_log_size_after, Lt(document_log_size_before));
+  }  // Destroys IcingSearchEngine to make sure nothing is cached.
+
+  IcingSearchEngine icing(icing_options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(
+      icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+       OptimizationShouldDeleteTemporaryDirectory) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+  IcingSearchEngine icing(icing_options, GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a tmp dir that will be used in Optimize() to swap files,
+  // this validates that any tmp dirs will be deleted before using.
+  const std::string tmp_dir =
+      icing_options.base_dir() + "/document_dir_optimize_tmp";
+
+  const std::string tmp_file = tmp_dir + "/file";
+  ASSERT_TRUE(filesystem()->CreateDirectory(tmp_dir.c_str()));
+  ScopedFd fd(filesystem()->OpenForWrite(tmp_file.c_str()));
+  ASSERT_TRUE(fd.is_valid());
+  ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+  fd.reset();
+
+  EXPECT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+  EXPECT_FALSE(filesystem()->DirectoryExists(tmp_dir.c_str()));
+  EXPECT_FALSE(filesystem()->FileExists(tmp_file.c_str()));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, GetOptimizeInfoHasCorrectStats) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body one")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri2")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message body two")
+                                .SetCreationTimestampMs(100)
+                                .SetTtlMs(500)
+                                .Build();
+
+  {
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetSystemTimeMilliseconds(1000);
+
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // Just initialized, nothing is optimizable yet.
+    GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
+    EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+    EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+    EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+    EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+    // Only have active documents, nothing is optimizable yet.
+    optimize_info = icing.GetOptimizeInfo();
+    EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+    EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+    EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+    EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+
+    // Deletes document1
+    ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
+
+    optimize_info = icing.GetOptimizeInfo();
+    EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+    EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1));
+    EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0));
+    EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+    int64_t first_estimated_optimizable_bytes =
+        optimize_info.estimated_optimizable_bytes();
+
+    // Add a second document, but it'll be expired since the time (1000) is
+    // greater than the document's creation timestamp (100) + the document's ttl
+    // (500)
+    ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+    optimize_info = icing.GetOptimizeInfo();
+    EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+    EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2));
+    EXPECT_THAT(optimize_info.estimated_optimizable_bytes(),
+                Gt(first_estimated_optimizable_bytes));
+    EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+
+    // Optimize
+    ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+  }
+
+  {
+    // Recreate with new time
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetSystemTimeMilliseconds(5000);
+
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // Nothing is optimizable now that everything has been optimized away.
+    GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
+    EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+    EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+    EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+    EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(4000));
+  }
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, GetAndPutShouldWorkAfterOptimization) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body one")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body two")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body three")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document4 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri4")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body four")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document5 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri5")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body five")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
+    ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+    // Validates that Get() and Put() are good right after Optimize()
+    *expected_get_result_proto.mutable_document() = document1;
+    EXPECT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+            .status()
+            .code(),
+        Eq(StatusProto::NOT_FOUND));
+    *expected_get_result_proto.mutable_document() = document3;
+    EXPECT_THAT(
+        icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
+    EXPECT_THAT(icing.Put(document4).status(), ProtoIsOk());
+  }  // Destroys IcingSearchEngine to make sure nothing is cached.
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  *expected_get_result_proto.mutable_document() = document1;
+  EXPECT_THAT(
+      icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+          .status()
+          .code(),
+      Eq(StatusProto::NOT_FOUND));
+  *expected_get_result_proto.mutable_document() = document3;
+  EXPECT_THAT(
+      icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+  *expected_get_result_proto.mutable_document() = document4;
+  EXPECT_THAT(
+      icing.Get("namespace", "uri4", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  EXPECT_THAT(icing.Put(document5).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+       GetAndPutShouldWorkAfterOptimizationWithEmptyDocuments) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto empty_document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto empty_document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto empty_document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(std::move(schema)).status(), ProtoIsOk());
+
+  ASSERT_THAT(icing.Put(empty_document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(empty_document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
+  ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+  // Validates that Get() and Put() are good right after Optimize()
+  *expected_get_result_proto.mutable_document() = empty_document1;
+  EXPECT_THAT(
+      icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+          .status()
+          .code(),
+      Eq(StatusProto::NOT_FOUND));
+  EXPECT_THAT(icing.Put(empty_document3).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, DeleteShouldWorkAfterOptimization) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body one")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body two")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+    // Validates that Delete() works right after Optimize()
+    EXPECT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
+
+    GetResultProto expected_get_result_proto;
+    expected_get_result_proto.mutable_status()->set_code(
+        StatusProto::NOT_FOUND);
+    expected_get_result_proto.mutable_status()->set_message(
+        "Document (namespace, uri1) not found.");
+    EXPECT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
+
+    expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+    expected_get_result_proto.mutable_status()->clear_message();
+    *expected_get_result_proto.mutable_document() = document2;
+    EXPECT_THAT(
+        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
+  }  // Destroys IcingSearchEngine to make sure nothing is cached.
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace, uri1) not found.");
+  EXPECT_THAT(
+      icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace, uri2) not found.");
+  EXPECT_THAT(
+      icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, OptimizationFailureUninitializesIcing) {
+  // Setup filesystem to fail
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  bool just_swapped_files = false;
+  auto create_dir_lambda = [this, &just_swapped_files](const char* dir_name) {
+    if (just_swapped_files) {
+      // We should fail the first call immediately after swapping files.
+      just_swapped_files = false;
+      return false;
+    }
+    return filesystem()->CreateDirectoryRecursively(dir_name);
+  };
+  ON_CALL(*mock_filesystem, CreateDirectoryRecursively)
+      .WillByDefault(create_dir_lambda);
+
+  auto swap_lambda = [&just_swapped_files](const char* first_dir,
+                                           const char* second_dir) {
+    just_swapped_files = true;
+    return false;
+  };
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
+                                      HasSubstr("document_dir")))
+      .WillByDefault(swap_lambda);
+  TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // The mocks should cause an unrecoverable error during Optimize - returning
+  // INTERNAL.
+  ASSERT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::INTERNAL));
+
+  // Ordinary operations should fail safely.
+  SchemaProto simple_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("type0").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop0")
+                  .SetDataType(TYPE_STRING)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  DocumentProto simple_doc = DocumentBuilder()
+                                 .SetKey("namespace0", "uri0")
+                                 .SetSchema("type0")
+                                 .AddStringProperty("prop0", "foo")
+                                 .Build();
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("foo");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  ResultSpecProto result_spec;
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+  EXPECT_THAT(icing.SetSchema(simple_schema).status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(icing.Put(simple_doc).status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(icing
+                  .Get(simple_doc.namespace_(), simple_doc.uri(),
+                       GetResultSpecProto::default_instance())
+                  .status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+  // Reset should get icing back to a safe (empty) and working state.
+  EXPECT_THAT(icing.Reset().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(simple_schema).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(simple_doc).status(), ProtoIsOk());
+  EXPECT_THAT(icing
+                  .Get(simple_doc.namespace_(), simple_doc.uri(),
+                       GetResultSpecProto::default_instance())
+                  .status(),
+              ProtoIsOk());
+  EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
+              ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, SetSchemaShouldWorkAfterOptimization) {
+  // Creates 3 test schemas
+  SchemaProto schema1 =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  SchemaProto schema2 = SchemaProto(schema1);
+  *schema2.mutable_types(0)->add_properties() =
+      PropertyConfigBuilder()
+          .SetName("property2")
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .Build();
+
+  SchemaProto schema3 = SchemaProto(schema2);
+  *schema3.mutable_types(0)->add_properties() =
+      PropertyConfigBuilder()
+          .SetName("property3")
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .Build();
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+    // Validates that SetSchema() works right after Optimize()
+    EXPECT_THAT(icing.SetSchema(schema2).status(), ProtoIsOk());
+  }  // Destroys IcingSearchEngine to make sure nothing is cached.
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(schema3).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, SearchShouldWorkAfterOptimization) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body")
+          .AddInt64Property("indexableInteger", 123)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  SearchSpecProto search_spec1;
+  search_spec1.set_term_match_type(TermMatchType::PREFIX);
+  search_spec1.set_query("m");
+
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("indexableInteger == 123");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document;
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+    // Validates that Search() works right after Optimize()
+    // Term search
+    SearchResultProto search_result_proto1 =
+        icing.Search(search_spec1, GetDefaultScoringSpec(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+                                          expected_search_result_proto));
+
+    // Numeric (integer) search
+    SearchResultProto search_result_google::protobuf =
+        icing.Search(search_spec2, GetDefaultScoringSpec(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+                                          expected_search_result_proto));
+  }  // Destroys IcingSearchEngine to make sure nothing is cached.
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Verify term search
+  SearchResultProto search_result_proto1 =
+      icing.Search(search_spec1, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Verify numeric (integer) search
+  SearchResultProto search_result_google::protobuf =
+      icing.Search(search_spec2, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+       JoinShouldWorkAfterOptimizationDeleteParent) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person1 =
+      DocumentBuilder()
+          .SetKey("namespace", "person1")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person one")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto person2 =
+      DocumentBuilder()
+          .SetKey("namespace", "person2")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person two")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  DocumentProto message1 =
+      DocumentBuilder()
+          .SetKey("namespace", "message1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body one")
+          .AddStringProperty("senderQualifiedId", "namespace#person1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message2 =
+      DocumentBuilder()
+          .SetKey("namespace", "message2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body two")
+          .AddStringProperty("senderQualifiedId", "namespace#person1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message3 =
+      DocumentBuilder()
+          .SetKey("namespace", "message3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body three")
+          .AddStringProperty("senderQualifiedId", "namespace#person2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // Prepare join search spec to join a query for `name:person` with a child
+  // query for `body:message` based on the child's `senderQualifiedId` field.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("name:person");
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("senderQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+  nested_search_spec->set_query("body:message");
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  ResultSpecProto result_spec = ResultSpecProto::default_instance();
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  // Person1 is going to be deleted below. Only person2 which is joined with
+  // message3 should match the query.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto =
+      expected_search_result_proto.mutable_results()->Add();
+  *result_proto->mutable_document() = person2;
+  *result_proto->mutable_joined_results()->Add()->mutable_document() = message3;
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(message1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(message2).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(message3).status(), ProtoIsOk());
+    // Delete parent document: person1
+    ASSERT_THAT(icing.Delete("namespace", "person1").status(), ProtoIsOk());
+    ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+    // Validates that join search query works right after Optimize()
+    SearchResultProto search_result_proto =
+        icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
+  }  // Destroys IcingSearchEngine to make sure nothing is cached.
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+       JoinShouldWorkAfterOptimizationDeleteChild) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person1 =
+      DocumentBuilder()
+          .SetKey("namespace", "person1")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person one")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto person2 =
+      DocumentBuilder()
+          .SetKey("namespace", "person2")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person two")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  DocumentProto message1 =
+      DocumentBuilder()
+          .SetKey("namespace", "message1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body one")
+          .AddStringProperty("senderQualifiedId", "namespace#person1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message2 =
+      DocumentBuilder()
+          .SetKey("namespace", "message2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body two")
+          .AddStringProperty("senderQualifiedId", "namespace#person1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message3 =
+      DocumentBuilder()
+          .SetKey("namespace", "message3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body three")
+          .AddStringProperty("senderQualifiedId", "namespace#person2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // Prepare join search spec to join a query for `name:person` with a child
+  // query for `body:message` based on the child's `senderQualifiedId` field.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("name:person");
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("senderQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+  nested_search_spec->set_query("body:message");
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  ResultSpecProto result_spec = ResultSpecProto::default_instance();
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  // Message1 and message3 are going to be deleted below. Both person1 and
+  // person2 should be included even though person2 has no child (since we're
+  // doing left join).
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto1 =
+      expected_search_result_proto.mutable_results()->Add();
+  *result_proto1->mutable_document() = person1;
+  *result_proto1->mutable_joined_results()->Add()->mutable_document() =
+      message2;
+  SearchResultProto::ResultProto* result_google::protobuf =
+      expected_search_result_proto.mutable_results()->Add();
+  *result_google::protobuf->mutable_document() = person2;
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(message1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(message2).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(message3).status(), ProtoIsOk());
+    // Delete child documents: message1 and message3
+    ASSERT_THAT(icing.Delete("namespace", "message1").status(), ProtoIsOk());
+    ASSERT_THAT(icing.Delete("namespace", "message3").status(), ProtoIsOk());
+    ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+    // Validates that join search query works right after Optimize()
+    SearchResultProto search_result_proto =
+        icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
+  }  // Destroys IcingSearchEngine to make sure nothing is cached.
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+       IcingShouldWorkFineIfOptimizationIsAborted) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  DocumentProto message1 =
+      DocumentBuilder()
+          .SetKey("namespace", "message1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body one")
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  {
+    // Initializes a normal icing to create files needed
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(person).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(message1).status(), ProtoIsOk());
+  }
+
+  // Creates a mock filesystem in which DeleteDirectoryRecursively() always
+  // fails. This will fail IcingSearchEngine::OptimizeDocumentStore() and makes
+  // it return ABORTED_ERROR.
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  ON_CALL(*mock_filesystem,
+          DeleteDirectoryRecursively(HasSubstr("_optimize_tmp")))
+      .WillByDefault(Return(false));
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::ABORTED));
+
+  // Now optimization is aborted, we verify that document-related functions
+  // still work as expected.
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = message1;
+  EXPECT_THAT(icing.Get("namespace", "message1",
+                        GetResultSpecProto::default_instance()),
+              EqualsProto(expected_get_result_proto));
+
+  DocumentProto message2 =
+      DocumentBuilder()
+          .SetKey("namespace", "message2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body two")
+          .AddInt64Property("indexableInteger", 123)
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  EXPECT_THAT(icing.Put(message2).status(), ProtoIsOk());
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      message2;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      message1;
+
+  // Verify term search
+  SearchSpecProto search_spec1;
+  search_spec1.set_query("body:m");
+  search_spec1.set_term_match_type(TermMatchType::PREFIX);
+
+  SearchResultProto search_result_proto1 =
+      icing.Search(search_spec1, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Verify numeric (integer) search
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("indexableInteger == 123");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  SearchResultProto search_result_google::protobuf =
+      icing.Search(search_spec2, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Verify join search: join a query for `name:person` with a child query for
+  // `body:message` based on the child's `senderQualifiedId` field.
+  SearchSpecProto search_spec3;
+  search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec3.set_query("name:person");
+  JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("senderQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+  nested_search_spec->set_query("body:message");
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+  result_spec3.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  SearchResultProto expected_join_search_result_proto;
+  expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto =
+      expected_join_search_result_proto.mutable_results()->Add();
+  *result_proto->mutable_document() = person;
+  *result_proto->mutable_joined_results()->Add()->mutable_document() = message2;
+  *result_proto->mutable_joined_results()->Add()->mutable_document() = message1;
+
+  SearchResultProto search_result_proto3 =
+      icing.Search(search_spec3, GetDefaultScoringSpec(), result_spec3);
+  EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_join_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+       OptimizationShouldRecoverIfFileDirectoriesAreMissing) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body")
+          .AddInt64Property("indexableInteger", 123)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // Creates a mock filesystem in which SwapFiles() always fails and deletes the
+  // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
+                                      HasSubstr("document_dir")))
+      .WillByDefault([this](const char* one, const char* two) {
+        filesystem()->DeleteDirectoryRecursively(one);
+        filesystem()->DeleteDirectoryRecursively(two);
+        return false;
+      });
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // Optimize() fails due to filesystem error
+  OptimizeResultProto result = icing.Optimize();
+  EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+  // Should rebuild the index for data loss.
+  EXPECT_THAT(result.optimize_stats().index_restoration_mode(),
+              Eq(OptimizeStatsProto::FULL_INDEX_REBUILD));
+
+  // Document is not found because original file directory is missing
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace, uri) not found.");
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  DocumentProto new_document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "new body")
+          .AddInt64Property("indexableInteger", 456)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec1;
+  search_spec1.set_query("m");
+  search_spec1.set_term_match_type(TermMatchType::PREFIX);
+
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("indexableInteger == 123");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+  // Searching old content returns nothing because original file directory is
+  // missing
+  // Term search
+  SearchResultProto search_result_proto1 =
+      icing.Search(search_spec1, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Numeric (integer) search
+  SearchResultProto search_result_google::protobuf =
+      icing.Search(search_spec2, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Searching new content returns the new document
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      new_document;
+  // Term search
+  search_spec1.set_query("n");
+  search_result_proto1 = icing.Search(search_spec1, GetDefaultScoringSpec(),
+                                      ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Numeric (integer) search
+  search_spec2.set_query("indexableInteger == 456");
+  search_result_google::protobuf = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                      ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+       OptimizationShouldRecoverIfDataFilesAreMissing) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body")
+          .AddInt64Property("indexableInteger", 123)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // Creates a mock filesystem in which SwapFiles() always fails and empties the
+  // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
+                                      HasSubstr("document_dir")))
+      .WillByDefault([this](const char* one, const char* two) {
+        filesystem()->DeleteDirectoryRecursively(one);
+        filesystem()->CreateDirectoryRecursively(one);
+        filesystem()->DeleteDirectoryRecursively(two);
+        filesystem()->CreateDirectoryRecursively(two);
+        return false;
+      });
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // Optimize() fails due to filesystem error
+  OptimizeResultProto result = icing.Optimize();
+  EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+  // Should rebuild the index for data loss.
+  EXPECT_THAT(result.optimize_stats().index_restoration_mode(),
+              Eq(OptimizeStatsProto::FULL_INDEX_REBUILD));
+
+  // Document is not found because original files are missing
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace, uri) not found.");
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  DocumentProto new_document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "new body")
+          .AddInt64Property("indexableInteger", 456)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec1;
+  search_spec1.set_query("m");
+  search_spec1.set_term_match_type(TermMatchType::PREFIX);
+
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("indexableInteger == 123");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+  // Searching old content returns nothing because original files are missing
+  // Term search
+  SearchResultProto search_result_proto1 =
+      icing.Search(search_spec1, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Numeric (integer) search
+  SearchResultProto search_result_google::protobuf =
+      icing.Search(search_spec2, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Searching new content returns the new document
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      new_document;
+  // Term search
+  search_spec1.set_query("n");
+  search_result_proto1 = icing.Search(search_spec1, GetDefaultScoringSpec(),
+                                      ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+
+  // Numeric (integer) search
+  search_spec2.set_query("indexableInteger == 456");
+  search_result_google::protobuf = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                      ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+                                        expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, OptimizeThresholdTest) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body one")
+          .AddInt64Property("indexableInteger", 1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri2")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message body two")
+                                .AddInt64Property("indexableInteger", 2)
+                                .SetCreationTimestampMs(9000)
+                                .SetTtlMs(500)
+                                .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body three")
+          .AddInt64Property("indexableInteger", 3)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(5);
+  fake_clock->SetSystemTimeMilliseconds(10000);
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  // Set the threshold to 0.9 to test that the threshold works.
+  options.set_optimize_rebuild_index_threshold(0.9);
+  auto icing = std::make_unique<TestIcingSearchEngine>(
+      options, std::make_unique<Filesystem>(),
+      std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+      GetTestJniCache());
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Add three documents.
+  ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk());
+
+  // Delete the first document.
+  ASSERT_THAT(icing->Delete(document1.namespace_(), document1.uri()).status(),
+              ProtoIsOk());
+  ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+  OptimizeStatsProto expected;
+  expected.set_latency_ms(5);
+  expected.set_document_store_optimize_latency_ms(5);
+  expected.set_index_restoration_latency_ms(5);
+  expected.set_num_original_documents(3);
+  expected.set_num_deleted_documents(1);
+  expected.set_num_expired_documents(1);
+  expected.set_num_original_namespaces(1);
+  expected.set_num_deleted_namespaces(0);
+  expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION);
+
+  // Run Optimize
+  OptimizeResultProto result = icing->Optimize();
+  // Depending on how many blocks the documents end up spread across, it's
+  // possible that Optimize can remove documents without shrinking storage. The
+  // first Optimize call will also write the OptimizeStatusProto for the first
+  // time which will take up 1 block. So make sure that before_size is no less
+  // than after_size - 1 block.
+  uint32_t page_size = getpagesize();
+  EXPECT_THAT(result.optimize_stats().storage_size_before(),
+              Ge(result.optimize_stats().storage_size_after() - page_size));
+  result.mutable_optimize_stats()->clear_storage_size_before();
+  result.mutable_optimize_stats()->clear_storage_size_after();
+  EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+  fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(5);
+  fake_clock->SetSystemTimeMilliseconds(20000);
+  icing = std::make_unique<TestIcingSearchEngine>(
+      options, std::make_unique<Filesystem>(),
+      std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+      GetTestJniCache());
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+
+  expected = OptimizeStatsProto();
+  expected.set_latency_ms(5);
+  expected.set_document_store_optimize_latency_ms(5);
+  expected.set_index_restoration_latency_ms(5);
+  expected.set_num_original_documents(1);
+  expected.set_num_deleted_documents(0);
+  expected.set_num_expired_documents(0);
+  expected.set_num_original_namespaces(1);
+  expected.set_num_deleted_namespaces(0);
+  expected.set_time_since_last_optimize_ms(10000);
+  expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION);
+
+  // Run Optimize
+  result = icing->Optimize();
+  EXPECT_THAT(result.optimize_stats().storage_size_before(),
+              Eq(result.optimize_stats().storage_size_after()));
+  result.mutable_optimize_stats()->clear_storage_size_before();
+  result.mutable_optimize_stats()->clear_storage_size_after();
+  EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+  // Delete the last document.
+  ASSERT_THAT(icing->Delete(document3.namespace_(), document3.uri()).status(),
+              ProtoIsOk());
+
+  expected = OptimizeStatsProto();
+  expected.set_latency_ms(5);
+  expected.set_document_store_optimize_latency_ms(5);
+  expected.set_index_restoration_latency_ms(5);
+  expected.set_num_original_documents(1);
+  expected.set_num_deleted_documents(1);
+  expected.set_num_expired_documents(0);
+  expected.set_num_original_namespaces(1);
+  expected.set_num_deleted_namespaces(1);
+  expected.set_time_since_last_optimize_ms(0);
+  // Should rebuild the index since all documents are removed.
+  expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
+
+  // Run Optimize
+  result = icing->Optimize();
+  EXPECT_THAT(result.optimize_stats().storage_size_before(),
+              Ge(result.optimize_stats().storage_size_after()));
+  result.mutable_optimize_stats()->clear_storage_size_before();
+  result.mutable_optimize_stats()->clear_storage_size_after();
+  EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("indexableInteger")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body one")
+          .AddInt64Property("indexableInteger", 1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri2")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message body two")
+                                .AddInt64Property("indexableInteger", 2)
+                                .SetCreationTimestampMs(9000)
+                                .SetTtlMs(500)
+                                .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body three")
+          .AddInt64Property("indexableInteger", 3)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(5);
+  fake_clock->SetSystemTimeMilliseconds(10000);
+  // Use the default Icing options, so that a change to the default value will
+  // require updating this test.
+  auto icing = std::make_unique<TestIcingSearchEngine>(
+      GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+      std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+      GetTestJniCache());
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Add three documents.
+  ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk());
+
+  // Delete the first document.
+  ASSERT_THAT(icing->Delete(document1.namespace_(), document1.uri()).status(),
+              ProtoIsOk());
+  ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+  OptimizeStatsProto expected;
+  expected.set_latency_ms(5);
+  expected.set_document_store_optimize_latency_ms(5);
+  expected.set_index_restoration_latency_ms(5);
+  expected.set_num_original_documents(3);
+  expected.set_num_deleted_documents(1);
+  expected.set_num_expired_documents(1);
+  expected.set_num_original_namespaces(1);
+  expected.set_num_deleted_namespaces(0);
+  expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
+
+  // Run Optimize
+  OptimizeResultProto result = icing->Optimize();
+  // Depending on how many blocks the documents end up spread across, it's
+  // possible that Optimize can remove documents without shrinking storage. The
+  // first Optimize call will also write the OptimizeStatusProto for the first
+  // time which will take up 1 block. So make sure that before_size is no less
+  // than after_size - 1 block.
+  uint32_t page_size = getpagesize();
+  EXPECT_THAT(result.optimize_stats().storage_size_before(),
+              Ge(result.optimize_stats().storage_size_after() - page_size));
+  result.mutable_optimize_stats()->clear_storage_size_before();
+  result.mutable_optimize_stats()->clear_storage_size_after();
+  EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+  fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(5);
+  fake_clock->SetSystemTimeMilliseconds(20000);
+  // Use the default Icing options, so that a change to the default value will
+  // require updating this test.
+  icing = std::make_unique<TestIcingSearchEngine>(
+      GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+      std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+      GetTestJniCache());
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+
+  expected = OptimizeStatsProto();
+  expected.set_latency_ms(5);
+  expected.set_document_store_optimize_latency_ms(5);
+  expected.set_index_restoration_latency_ms(5);
+  expected.set_num_original_documents(1);
+  expected.set_num_deleted_documents(0);
+  expected.set_num_expired_documents(0);
+  expected.set_num_original_namespaces(1);
+  expected.set_num_deleted_namespaces(0);
+  expected.set_time_since_last_optimize_ms(10000);
+  expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
+
+  // Run Optimize
+  result = icing->Optimize();
+  EXPECT_THAT(result.optimize_stats().storage_size_before(),
+              Eq(result.optimize_stats().storage_size_after()));
+  result.mutable_optimize_stats()->clear_storage_size_before();
+  result.mutable_optimize_stats()->clear_storage_size_after();
+  EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+  // Delete the last document.
+  ASSERT_THAT(icing->Delete(document3.namespace_(), document3.uri()).status(),
+              ProtoIsOk());
+
+  expected = OptimizeStatsProto();
+  expected.set_latency_ms(5);
+  expected.set_document_store_optimize_latency_ms(5);
+  expected.set_index_restoration_latency_ms(5);
+  expected.set_num_original_documents(1);
+  expected.set_num_deleted_documents(1);
+  expected.set_num_expired_documents(0);
+  expected.set_num_original_namespaces(1);
+  expected.set_num_deleted_namespaces(1);
+  expected.set_time_since_last_optimize_ms(0);
+  expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
+
+  // Run Optimize
+  result = icing->Optimize();
+  EXPECT_THAT(result.optimize_stats().storage_size_before(),
+              Ge(result.optimize_stats().storage_size_after()));
+  result.mutable_optimize_stats()->clear_storage_size_before();
+  result.mutable_optimize_stats()->clear_storage_size_after();
+  EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+       OptimizationRewritesDocsWithNewCompressionLevel) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body one")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body two")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+  icing_options.set_compression_level(3);
+  int64_t document_log_size_compression_3;
+  int64_t document_log_size_after_opti_no_compression;
+  int64_t document_log_size_after_opti_compression_3;
+  const std::string document_log_path =
+      icing_options.base_dir() + "/document_dir/" +
+      DocumentLogCreator::GetDocumentLogFilename();
+  {
+    IcingSearchEngine icing(icing_options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+    ASSERT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+    document_log_size_compression_3 =
+        filesystem()->GetFileSize(document_log_path.c_str());
+  }  // Destroys IcingSearchEngine to make sure nothing is cached.
+
+  // Turn off compression
+  icing_options.set_compression_level(0);
+
+  {
+    IcingSearchEngine icing(icing_options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    // Document log size is the same even after reopening with a different
+    // compression level
+    ASSERT_EQ(document_log_size_compression_3,
+              filesystem()->GetFileSize(document_log_path.c_str()));
+    ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+    document_log_size_after_opti_no_compression =
+        filesystem()->GetFileSize(document_log_path.c_str());
+    // Document log size is larger after optimizing since optimizing rewrites
+    // with the new compression level which is 0 or none
+    ASSERT_GT(document_log_size_after_opti_no_compression,
+              document_log_size_compression_3);
+  }
+
+  // Restore the original compression level
+  icing_options.set_compression_level(3);
+
+  {
+    IcingSearchEngine icing(icing_options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    // Document log size is the same even after reopening with a different
+    // compression level
+    ASSERT_EQ(document_log_size_after_opti_no_compression,
+              filesystem()->GetFileSize(document_log_path.c_str()));
+    ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+    document_log_size_after_opti_compression_3 =
+        filesystem()->GetFileSize(document_log_path.c_str());
+    // Document log size should be the same as it was originally
+    ASSERT_EQ(document_log_size_after_opti_compression_3,
+              document_log_size_compression_3);
+  }
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/icing-search-engine_put_test.cc b/icing/icing-search-engine_put_test.cc
new file mode 100644
index 0000000..ed72f17
--- /dev/null
+++ b/icing/icing-search-engine_put_test.cc
@@ -0,0 +1,481 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/icing-search-engine.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Le;
+using ::testing::SizeIs;
+
+constexpr std::string_view kIpsumText =
+    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis "
+    "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida "
+    "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam "
+    "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo "
+    "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, "
+    "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula "
+    "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et "
+    "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, "
+    "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis "
+    "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. "
+    "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. "
+    "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur "
+    "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
+    "placerat semper.";
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+  TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+                        std::unique_ptr<const Filesystem> filesystem,
+                        std::unique_ptr<const IcingFilesystem> icing_filesystem,
+                        std::unique_ptr<Clock> clock,
+                        std::unique_ptr<JniCache> jni_cache)
+      : IcingSearchEngine(options, std::move(filesystem),
+                          std::move(icing_filesystem), std::move(clock),
+                          std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to IcingSearchEngine::Put.
+class IcingSearchEnginePutTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      // If we've specified using the reverse-JNI method for segmentation (i.e.
+      // not ICU), then we won't have the ICU data file included to set up.
+      // Technically, we could choose to use reverse-JNI for segmentation AND
+      // include an ICU data file, but that seems unlikely and our current BUILD
+      // setup doesn't do this.
+      // File generated via icu_data_file rule in //icing/BUILD.
+      std::string icu_data_file_path =
+          GetTestFilePath("icing/icu.dat");
+      ICING_ASSERT_OK(
+          icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+    }
+    filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+  Filesystem filesystem_;
+};
+
+constexpr int kMaxSupportedDocumentSize = (1u << 24) - 1;
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; }
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_base_dir(GetTestBaseDir());
+  return icing_options;
+}
+
+DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
+  return DocumentBuilder()
+      .SetKey(std::move(name_space), std::move(uri))
+      .SetSchema("Message")
+      .AddStringProperty("body", "message body")
+      .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+      .Build();
+}
+
+SchemaProto CreateMessageSchema() {
+  return SchemaBuilder()
+      .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+          PropertyConfigBuilder()
+              .SetName("body")
+              .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+              .SetCardinality(CARDINALITY_REQUIRED)))
+      .Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  return scoring_spec;
+}
+
+TEST_F(IcingSearchEnginePutTest, MaxTokenLenReturnsOkAndTruncatesTokens) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  // A length of 1 is allowed - even though it would be strange to want
+  // this.
+  options.set_max_token_length(1);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
+  EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // "message" should have been truncated to "m"
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  // The indexed tokens were  truncated to length of 1, so "m" will match
+  search_spec.set_query("m");
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document;
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // The query token is also truncated to length of 1, so "me"->"m" matches "m"
+  search_spec.set_query("me");
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // The query token is still truncated to length of 1, so "massage"->"m"
+  // matches "m"
+  search_spec.set_query("massage");
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEnginePutTest,
+       MaxIntMaxTokenLenReturnsOkTooLargeTokenReturnsResourceExhausted) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  // Set token length to max. This is allowed (it just means never to
+  // truncate tokens). However, this does mean that tokens that exceed the
+  // size of the lexicon will cause indexing to fail.
+  options.set_max_token_length(std::numeric_limits<int32_t>::max());
+  IcingSearchEngine icing(options, GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Add a document that just barely fits under the max document limit.
+  // This will still fail to index because we won't actually have enough
+  // room in the lexicon to fit this content.
+  std::string enormous_string(kMaxSupportedDocumentSize - 256, 'p');
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri")
+          .SetSchema("Message")
+          .AddStringProperty("body", std::move(enormous_string))
+          .Build();
+  EXPECT_THAT(icing.Put(document).status(),
+              ProtoStatusIs(StatusProto::OUT_OF_SPACE));
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("p");
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutWithoutSchemaFailedPrecondition) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
+  PutResultProto put_result_proto = icing.Put(document);
+  EXPECT_THAT(put_result_proto.status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(put_result_proto.status().message(), HasSubstr("Schema not set"));
+}
+
+TEST_F(IcingSearchEnginePutTest, IndexingDocMergeFailureResets) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", kIpsumText)
+                               .Build();
+  // 1. Create an index with a LiteIndex that will only allow one document
+  // before needing a merge.
+  {
+    IcingSearchEngineOptions options = GetDefaultIcingOptions();
+    options.set_index_merge_size(document.ByteSizeLong());
+    IcingSearchEngine icing(options, GetTestJniCache());
+
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+    // Add two documents. These should get merged into the main index.
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+    document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+    // Add one document. This one should get remain in the lite index.
+    document = DocumentBuilder(document).SetUri("fake_type/2").Build();
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  // 2. Delete the index file to trigger RestoreIndexIfNeeded.
+  std::string idx_subdir = GetIndexDir() + "/idx";
+  filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
+
+  // 3. Setup a mock filesystem to fail to grow the main index once.
+  bool has_failed_already = false;
+  auto open_write_lambda = [this, &has_failed_already](const char* filename) {
+    std::string main_lexicon_suffix = "/main-lexicon.prop.2";
+    std::string filename_string(filename);
+    if (!has_failed_already &&
+        filename_string.length() >= main_lexicon_suffix.length() &&
+        filename_string.substr(
+            filename_string.length() - main_lexicon_suffix.length(),
+            main_lexicon_suffix.length()) == main_lexicon_suffix) {
+      has_failed_already = true;
+      return -1;
+    }
+    return this->filesystem()->OpenForWrite(filename);
+  };
+  auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
+  ON_CALL(*mock_icing_filesystem, OpenForWrite)
+      .WillByDefault(open_write_lambda);
+
+  // 4. Create the index again. This should trigger index restoration.
+  {
+    IcingSearchEngineOptions options = GetDefaultIcingOptions();
+    options.set_index_merge_size(document.ByteSizeLong());
+    TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+                                std::move(mock_icing_filesystem),
+                                std::make_unique<FakeClock>(),
+                                GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(),
+                ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+
+    SearchSpecProto search_spec;
+    search_spec.set_query("consectetur");
+    search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+    SearchResultProto results =
+        icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(results.status(), ProtoIsOk());
+    EXPECT_THAT(results.next_page_token(), Eq(0));
+    // Only the last document that was added should still be retrievable.
+    ASSERT_THAT(results.results(), SizeIs(1));
+    EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2"));
+  }
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogFunctionLatency) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .Build();
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(10);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  PutResultProto put_result_proto = icing.Put(document);
+  EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(put_result_proto.put_document_stats().latency_ms(), Eq(10));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogDocumentStoreStats) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/0")
+          .SetSchema("Message")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .AddStringProperty("body", "message body")
+          .Build();
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(10);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  PutResultProto put_result_proto = icing.Put(document);
+  EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(put_result_proto.put_document_stats().document_store_latency_ms(),
+              Eq(10));
+  size_t document_size = put_result_proto.put_document_stats().document_size();
+  EXPECT_THAT(document_size, Ge(document.ByteSizeLong()));
+  EXPECT_THAT(document_size, Le(document.ByteSizeLong() +
+                                sizeof(DocumentProto::InternalFields)));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogIndexingStats) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .Build();
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(10);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  PutResultProto put_result_proto = icing.Put(document);
+  EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(put_result_proto.put_document_stats().index_latency_ms(), Eq(10));
+  // No merge should happen.
+  EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
+              Eq(0));
+  // The input document has 2 tokens.
+  EXPECT_THAT(put_result_proto.put_document_stats()
+                  .tokenization_stats()
+                  .num_tokens_indexed(),
+              Eq(2));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogIndexMergeLatency) {
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("icing", "fake_type/1")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", kIpsumText)
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("icing", "fake_type/2")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", kIpsumText)
+                                .Build();
+
+  // Create an icing instance with index_merge_size = document1's size.
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+  icing_options.set_index_merge_size(document1.ByteSizeLong());
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(10);
+  TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+  // Putting document2 should trigger an index merge.
+  PutResultProto put_result_proto = icing.Put(document2);
+  EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
+              Eq(10));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentIndexFailureDeletion) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Testing has shown that adding ~600,000 terms generated this way will
+  // fill up the hit buffer.
+  std::vector<std::string> terms = GenerateUniqueTerms(600000);
+  std::string content = absl_ports::StrJoin(terms, " ");
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri1")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "foo " + content)
+                               .Build();
+  // We failed to add the document to the index fully. This means that we should
+  // reject the document from Icing entirely.
+  ASSERT_THAT(icing.Put(document).status(),
+              ProtoStatusIs(StatusProto::OUT_OF_SPACE));
+
+  // Make sure that the document isn't searchable.
+  SearchSpecProto search_spec;
+  search_spec.set_query("foo");
+  search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+
+  SearchResultProto search_results =
+      icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                   ResultSpecProto::default_instance());
+  ASSERT_THAT(search_results.status(), ProtoIsOk());
+  ASSERT_THAT(search_results.results(), IsEmpty());
+
+  // Make sure that the document isn't retrievable.
+  GetResultProto get_result =
+      icing.Get("namespace", "uri1", GetResultSpecProto::default_instance());
+  ASSERT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/icing-search-engine_schema_test.cc b/icing/icing-search-engine_schema_test.cc
new file mode 100644
index 0000000..49c024e
--- /dev/null
+++ b/icing/icing-search-engine_schema_test.cc
@@ -0,0 +1,3159 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/join/join-processor.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/query/query-features.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/section.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::Return;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+  TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+                        std::unique_ptr<const Filesystem> filesystem,
+                        std::unique_ptr<const IcingFilesystem> icing_filesystem,
+                        std::unique_ptr<Clock> clock,
+                        std::unique_ptr<JniCache> jni_cache)
+      : IcingSearchEngine(options, std::move(filesystem),
+                          std::move(icing_filesystem), std::move(clock),
+                          std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to
+// IcingSearchEngine::GetSchema and IcingSearchEngine::SetSchema.
+class IcingSearchEngineSchemaTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      // If we've specified using the reverse-JNI method for segmentation (i.e.
+      // not ICU), then we won't have the ICU data file included to set up.
+      // Technically, we could choose to use reverse-JNI for segmentation AND
+      // include an ICU data file, but that seems unlikely and our current BUILD
+      // setup doesn't do this.
+      // File generated via icu_data_file rule in //icing/BUILD.
+      std::string icu_data_file_path =
+          GetTestFilePath("icing/icu.dat");
+      ICING_ASSERT_OK(
+          icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+    }
+    filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+  Filesystem filesystem_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_base_dir(GetTestBaseDir());
+  icing_options.set_document_store_namespace_id_fingerprint(true);
+  icing_options.set_use_new_qualified_id_join_index(true);
+  return icing_options;
+}
+
+DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
+  return DocumentBuilder()
+      .SetKey(std::move(name_space), std::move(uri))
+      .SetSchema("Message")
+      .AddStringProperty("body", "message body")
+      .AddInt64Property("indexableInteger", 123)
+      .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+      .Build();
+}
+
+SchemaTypeConfigProto CreateMessageSchemaTypeConfig() {
+  return SchemaTypeConfigBuilder()
+      .SetType("Message")
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName("body")
+                       .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                       .SetCardinality(CARDINALITY_REQUIRED))
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName("indexableInteger")
+                       .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                       .SetCardinality(CARDINALITY_REQUIRED))
+      .Build();
+}
+
+SchemaProto CreateMessageSchema() {
+  return SchemaBuilder().AddType(CreateMessageSchemaTypeConfig()).Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  return scoring_spec;
+}
+
+// TODO(b/272145329): create SearchSpecBuilder, JoinSpecBuilder,
+// SearchResultProtoBuilder and ResultProtoBuilder for unit tests and build all
+// instances by them.
+
+TEST_F(IcingSearchEngineSchemaTest,
+       CircularReferenceCreateSectionManagerReturnsInvalidArgument) {
+  // Create a type config with a circular reference.
+  SchemaProto schema;
+  auto* type = schema.add_types();
+  type->set_schema_type("Message");
+
+  auto* body = type->add_properties();
+  body->set_property_name("recipient");
+  body->set_schema_type("Person");
+  body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  body->mutable_document_indexing_config()->set_index_nested_properties(true);
+
+  type = schema.add_types();
+  type->set_schema_type("Person");
+
+  body = type->add_properties();
+  body->set_property_name("recipient");
+  body->set_schema_type("Message");
+  body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  body->mutable_document_indexing_config()->set_index_nested_properties(true);
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(schema).status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, FailToReadSchema) {
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+  {
+    // Successfully initialize and set a schema
+    IcingSearchEngine icing(icing_options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  }
+
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+
+  // This fails FileBackedProto::Read() when we try to check the schema we
+  // had previously set
+  ON_CALL(*mock_filesystem,
+          OpenForRead(Eq(icing_options.base_dir() + "/schema_dir/schema.pb")))
+      .WillByDefault(Return(-1));
+
+  TestIcingSearchEngine test_icing(icing_options, std::move(mock_filesystem),
+                                   std::make_unique<IcingFilesystem>(),
+                                   std::make_unique<FakeClock>(),
+                                   GetTestJniCache());
+
+  InitializeResultProto initialize_result_proto = test_icing.Initialize();
+  EXPECT_THAT(initialize_result_proto.status(),
+              ProtoStatusIs(StatusProto::INTERNAL));
+  EXPECT_THAT(initialize_result_proto.status().message(),
+              HasSubstr("Unable to open file for read"));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, FailToWriteSchema) {
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  // This fails FileBackedProto::Write()
+  ON_CALL(*mock_filesystem, OpenForWrite(HasSubstr("schema.pb")))
+      .WillByDefault(Return(-1));
+
+  TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SetSchemaResultProto set_schema_result_proto =
+      icing.SetSchema(CreateMessageSchema());
+  EXPECT_THAT(set_schema_result_proto.status(),
+              ProtoStatusIs(StatusProto::INTERNAL));
+  EXPECT_THAT(set_schema_result_proto.status().message(),
+              HasSubstr("Unable to open file for write"));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaIncompatibleFails) {
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 1. Create a schema with an Email type with properties { "title", "body"}
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    property = type->add_properties();
+    property->set_property_name("body");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    // 2. Add an email document
+    DocumentProto doc = DocumentBuilder()
+                            .SetKey("emails", "email#1")
+                            .SetSchema("Email")
+                            .AddStringProperty("title", "Hello world.")
+                            .AddStringProperty("body", "Goodnight Moon.")
+                            .Build();
+    EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk());
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 3. Set a schema that deletes email. This should fail.
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_schema_type("Message");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("body");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    EXPECT_THAT(
+        icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+            .status(),
+        ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+    // 4. Try to delete by email type. This should succeed because email wasn't
+    // deleted in step 3.
+    EXPECT_THAT(icing.DeleteBySchemaType("Email").status(), ProtoIsOk());
+  }
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       SetSchemaIncompatibleForceOverrideSucceeds) {
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 1. Create a schema with an Email type with properties { "title", "body"}
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    property = type->add_properties();
+    property->set_property_name("body");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    // 2. Add an email document
+    DocumentProto doc = DocumentBuilder()
+                            .SetKey("emails", "email#1")
+                            .SetSchema("Email")
+                            .AddStringProperty("title", "Hello world.")
+                            .AddStringProperty("body", "Goodnight Moon.")
+                            .Build();
+    EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk());
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 3. Set a schema that deletes email with force override. This should
+    // succeed and delete the email type.
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_schema_type("Message");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("body");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
+
+    // 4. Try to delete by email type. This should fail because email was
+    // already deleted.
+    EXPECT_THAT(icing.DeleteBySchemaType("Email").status(),
+                ProtoStatusIs(StatusProto::NOT_FOUND));
+  }
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaUnsetVersionIsZero) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // 1. Create a schema with an Email type with version 1
+  SchemaProto schema;
+  SchemaTypeConfigProto* type = schema.add_types();
+  type->set_schema_type("Email");
+  PropertyConfigProto* property = type->add_properties();
+  property->set_property_name("title");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(0));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaCompatibleVersionUpdateSucceeds) {
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 1. Create a schema with an Email type with version 1
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(1);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    SetSchemaResultProto set_schema_result = icing.SetSchema(schema);
+    // Ignore latency numbers. They're covered elsewhere.
+    set_schema_result.clear_latency_ms();
+    SetSchemaResultProto expected_set_schema_result;
+    expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+    expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+    EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 2. Create schema that adds a new optional property and updates version.
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(2);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    property = type->add_properties();
+    property->set_property_name("body");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    // 3. SetSchema should succeed and the version number should be updated.
+    SetSchemaResultProto set_schema_result = icing.SetSchema(schema, true);
+    // Ignore latency numbers. They're covered elsewhere.
+    set_schema_result.clear_latency_ms();
+    SetSchemaResultProto expected_set_schema_result;
+    expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+    expected_set_schema_result.mutable_fully_compatible_changed_schema_types()
+        ->Add("Email");
+    EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
+  }
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaIncompatibleVersionUpdateFails) {
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 1. Create a schema with an Email type with version 1
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(1);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED)
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(2);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+    // 3. SetSchema should fail and the version number should NOT be updated.
+    EXPECT_THAT(icing.SetSchema(schema).status(),
+                ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+  }
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       SetSchemaIncompatibleVersionUpdateForceOverrideSucceeds) {
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 1. Create a schema with an Email type with version 1
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(1);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED)
+    // with force override to true.
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(2);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+    // 3. SetSchema should succeed and the version number should be updated.
+    EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
+  }
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaNoChangeVersionUpdateSucceeds) {
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 1. Create a schema with an Email type with version 1
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(1);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 2. Create schema that only changes the version.
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(2);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    // 3. SetSchema should succeed and the version number should be updated.
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
+  }
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       SetSchemaDuplicateTypesReturnsAlreadyExists) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create a schema with types { "Email", "Message" and "Email" }
+  SchemaProto schema;
+  SchemaTypeConfigProto* type = schema.add_types();
+  type->set_schema_type("Email");
+  PropertyConfigProto* property = type->add_properties();
+  property->set_property_name("title");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  type = schema.add_types();
+  type->set_schema_type("Message");
+  property = type->add_properties();
+  property->set_property_name("body");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  *schema.add_types() = schema.types(0);
+
+  EXPECT_THAT(icing.SetSchema(schema).status(),
+              ProtoStatusIs(StatusProto::ALREADY_EXISTS));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       SetSchemaDuplicatePropertiesReturnsAlreadyExists) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create a schema with an Email type with properties { "title", "body" and
+  // "title" }
+  SchemaProto schema;
+  SchemaTypeConfigProto* type = schema.add_types();
+  type->set_schema_type("Email");
+  PropertyConfigProto* property = type->add_properties();
+  property->set_property_name("title");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  property = type->add_properties();
+  property->set_property_name("body");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  property = type->add_properties();
+  property->set_property_name("title");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  EXPECT_THAT(icing.SetSchema(schema).status(),
+              ProtoStatusIs(StatusProto::ALREADY_EXISTS));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchema) {
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(1000);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  auto message_document = CreateMessageDocument("namespace", "uri");
+
+  auto schema_with_message = CreateMessageSchema();
+
+  SchemaProto schema_with_email;
+  SchemaTypeConfigProto* type = schema_with_email.add_types();
+  type->set_schema_type("Email");
+  PropertyConfigProto* property = type->add_properties();
+  property->set_property_name("title");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  SchemaProto schema_with_email_and_message = schema_with_email;
+  *schema_with_email_and_message.add_types() = CreateMessageSchemaTypeConfig();
+
+  // Create an arbitrary invalid schema
+  SchemaProto invalid_schema;
+  SchemaTypeConfigProto* empty_type = invalid_schema.add_types();
+  empty_type->set_schema_type("");
+
+  // Make sure we can't set invalid schemas
+  SetSchemaResultProto set_schema_result = icing.SetSchema(invalid_schema);
+  EXPECT_THAT(set_schema_result.status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+  EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+
+  // Can add an document of a set schema
+  set_schema_result = icing.SetSchema(schema_with_message);
+  EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
+  EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+  EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+
+  // Schema with Email doesn't have Message, so would result incompatible
+  // data
+  set_schema_result = icing.SetSchema(schema_with_email);
+  EXPECT_THAT(set_schema_result.status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+
+  // Can expand the set of schema types and add an document of a new
+  // schema type
+  set_schema_result = icing.SetSchema(schema_with_email_and_message);
+  EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
+  EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+
+  EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+  // Can't add an document whose schema isn't set
+  auto photo_document = DocumentBuilder()
+                            .SetKey("namespace", "uri")
+                            .SetSchema("Photo")
+                            .AddStringProperty("creator", "icing")
+                            .Build();
+  PutResultProto put_result_proto = icing.Put(photo_document);
+  EXPECT_THAT(put_result_proto.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+  EXPECT_THAT(put_result_proto.status().message(),
+              HasSubstr("'Photo' not found"));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       SetSchemaNewIndexedStringPropertyTriggersIndexRestorationAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create a schema with 2 properties:
+  // - 'a': string type, unindexed. No section id assigned.
+  // - 'b': int64 type, indexed. Section id = 0.
+  SchemaProto schema_one =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Schema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("a")
+                                        .SetDataTypeString(TERM_MATCH_UNKNOWN,
+                                                           TOKENIZER_NONE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("b")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  SetSchemaResultProto set_schema_result = icing.SetSchema(schema_one);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_new_schema_types()->Add("Schema");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri")
+          .SetSchema("Schema")
+          .AddStringProperty("a", "message body")
+          .AddInt64Property("b", 123)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  // Only 'b' will be indexed.
+  EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document;
+
+  SearchResultProto empty_result;
+  empty_result.mutable_status()->set_code(StatusProto::OK);
+
+  // Verify term search: won't get anything.
+  SearchSpecProto search_spec1;
+  search_spec1.set_query("a:message");
+  search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec1, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  // Verify numeric (integer) search: will get document.
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("b == 123");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Change the schema to:
+  // - 'a': string type, indexed. Section id = 0.
+  // - 'b': int64 type, indexed. Section id = 1.
+  SchemaProto schema_two = schema_one;
+  schema_two.mutable_types(0)
+      ->mutable_properties(0)
+      ->mutable_string_indexing_config()
+      ->set_term_match_type(TERM_MATCH_PREFIX);
+  schema_two.mutable_types(0)
+      ->mutable_properties(0)
+      ->mutable_string_indexing_config()
+      ->set_tokenizer_type(TOKENIZER_PLAIN);
+  // Index restoration should be triggered here because new schema requires more
+  // properties to be indexed. Also new section ids will be reassigned and index
+  // restoration should use new section ids to rebuild.
+  set_schema_result = icing.SetSchema(schema_two);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Schema");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Verify term search: will get document now.
+  actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Verify numeric (integer) search: will still get document.
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       SetSchemaNewIndexedIntegerPropertyTriggersIndexRestorationAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create a schema with 2 properties:
+  // - 'a': int64 type, unindexed. No section id assigned.
+  // - 'b': string type, indexed. Section id = 0.
+  SchemaProto schema_one =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Schema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("a")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("b")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+
+          .Build();
+
+  SetSchemaResultProto set_schema_result = icing.SetSchema(schema_one);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_new_schema_types()->Add("Schema");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri")
+          .SetSchema("Schema")
+          .AddInt64Property("a", 123)
+          .AddStringProperty("b", "message body")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  // Only 'b' will be indexed.
+  EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document;
+
+  SearchResultProto empty_result;
+  empty_result.mutable_status()->set_code(StatusProto::OK);
+
+  // Verify term search: will get document.
+  SearchSpecProto search_spec1;
+  search_spec1.set_query("b:message");
+  search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec1, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Verify numeric (integer) search: won't get anything.
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("a == 123");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  // Change the schema to:
+  // - 'a': int64 type, indexed. Section id = 0.
+  // - 'b': string type, indexed. Section id = 1.
+  SchemaProto schema_two = schema_one;
+  schema_two.mutable_types(0)
+      ->mutable_properties(0)
+      ->mutable_integer_indexing_config()
+      ->set_numeric_match_type(NUMERIC_MATCH_RANGE);
+  // Index restoration should be triggered here because new schema requires more
+  // properties to be indexed. Also new section ids will be reassigned and index
+  // restoration should use new section ids to rebuild.
+  set_schema_result = icing.SetSchema(schema_two);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Schema");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Verify term search: will still get document.
+  actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Verify numeric (integer) search: will get document now.
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_F(
+    IcingSearchEngineSchemaTest,
+    SetSchemaNewIndexedDocumentPropertyTriggersIndexRestorationAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create a schema with a nested document type:
+  //
+  // Section id assignment for 'Person':
+  // - "age": integer type, indexed. Section id = 0
+  // - "name": string type, indexed. Section id = 1.
+  // - "worksFor.name": string type, (nested) indexed. Section id = 2.
+  //
+  // Joinable property id assignment for 'Person':
+  // - "worksFor.listRef": string type, Qualified Id type joinable. Joinable
+  //   property id = 0.
+  SchemaProto schema_one =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("List").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("title")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("age")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("worksFor")
+                                        .SetDataTypeDocument(
+                                            "Organization",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Organization")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("listRef")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema_one).status(), ProtoIsOk());
+
+  DocumentProto list_document = DocumentBuilder()
+                                    .SetKey("namespace", "list/1")
+                                    .SetSchema("List")
+                                    .SetCreationTimestampMs(1000)
+                                    .AddStringProperty("title", "title")
+                                    .Build();
+  DocumentProto person_document =
+      DocumentBuilder()
+          .SetKey("namespace", "person/2")
+          .SetSchema("Person")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("name", "John")
+          .AddInt64Property("age", 20)
+          .AddDocumentProperty(
+              "worksFor", DocumentBuilder()
+                              .SetKey("namespace", "org/1")
+                              .SetSchema("Organization")
+                              .AddStringProperty("name", "Google")
+                              .AddStringProperty("listRef", "namespace#list/1")
+                              .Build())
+          .Build();
+  EXPECT_THAT(icing.Put(list_document).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk());
+
+  ResultSpecProto result_spec = ResultSpecProto::default_instance();
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      person_document;
+
+  SearchResultProto empty_result;
+  empty_result.mutable_status()->set_code(StatusProto::OK);
+
+  // Verify term search
+  SearchSpecProto search_spec1;
+  search_spec1.set_query("worksFor.name:Google");
+  search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Verify numeric (integer) search
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("age == 20");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  actual_results =
+      icing.Search(search_spec2, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Verify join search: join a query for `title:title` (which will get
+  // list_document) with a child query for `name:John` (which will get
+  // person_document) based on the child's `worksFor.listRef` field.
+  SearchSpecProto search_spec_with_join;
+  search_spec_with_join.set_query("title:title");
+  search_spec_with_join.set_term_match_type(TermMatchType::EXACT_ONLY);
+  JoinSpecProto* join_spec = search_spec_with_join.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("worksFor.listRef");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+  nested_search_spec->set_query("name:John");
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = result_spec;
+
+  SearchResultProto expected_join_search_result_proto;
+  expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto =
+      expected_join_search_result_proto.mutable_results()->Add();
+  *result_proto->mutable_document() = list_document;
+  *result_proto->mutable_joined_results()->Add()->mutable_document() =
+      person_document;
+
+  actual_results =
+      icing.Search(search_spec_with_join, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_join_search_result_proto));
+
+  // Change the schema to add another nested document property to 'Person'
+  //
+  // New section id assignment for 'Person':
+  // - "age": integer type, indexed. Section id = 0
+  // - "almaMater.name", string type, indexed. Section id = 1
+  // - "name": string type, indexed. Section id = 2
+  // - "worksFor.name": string type, (nested) indexed. Section id = 3
+  //
+  // New joinable property id assignment for 'Person':
+  // - "almaMater.listRef": string type, Qualified Id type joinable. Joinable
+  //   property id = 0.
+  // - "worksFor.listRef": string type, Qualified Id type joinable. Joinable
+  //   property id = 1.
+  SchemaProto schema_two =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("List").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("title")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("age")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("worksFor")
+                                        .SetDataTypeDocument(
+                                            "Organization",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("almaMater")
+                                        .SetDataTypeDocument(
+                                            "Organization",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Organization")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("listRef")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  // This schema change is compatible since the added 'almaMater' property has
+  // CARDINALITY_OPTIONAL.
+  //
+  // Index restoration should be triggered here because new schema requires more
+  // properties to be indexed. Also new section ids will be reassigned and index
+  // restoration should use new section ids to rebuild.
+  SetSchemaResultProto set_schema_result = icing.SetSchema(schema_two);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Person");
+  expected_set_schema_result.mutable_join_incompatible_changed_schema_types()
+      ->Add("Person");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Verify term search:
+  // Searching for "worksFor.name:Google" should still match document
+  actual_results =
+      icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // In new_schema the 'name' property is now indexed at section id 2. If
+  // searching for "name:Google" matched the document, this means that index
+  // rebuild was not triggered and Icing is still searching the old index, where
+  // 'worksFor.name' was indexed at section id 2.
+  search_spec1.set_query("name:Google");
+  actual_results =
+      icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  // Verify numeric (integer) search: should still match document
+  actual_results =
+      icing.Search(search_spec2, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Verify join search: should still able to join by `worksFor.listRef`
+  actual_results =
+      icing.Search(search_spec_with_join, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_join_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       SetSchemaChangeNestedPropertiesTriggersIndexRestorationAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SchemaTypeConfigProto person_proto =
+      SchemaTypeConfigBuilder()
+          .SetType("Person")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("age")
+                           .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  // Create a schema with nested properties:
+  // - "sender.age": int64 type, (nested) indexed. Section id = 0.
+  // - "sender.name": string type, (nested) indexed. Section id = 1.
+  // - "subject": string type, indexed. Section id = 2.
+  // - "timestamp": int64 type, indexed. Section id = 3.
+  SchemaProto nested_schema =
+      SchemaBuilder()
+          .AddType(person_proto)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeDocument(
+                                            "Person",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("timestamp")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("subject",
+                             "Did you get the memo about TPS reports?")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace1", "uri1")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "Bill Lundbergh")
+                                   .AddInt64Property("age", 20)
+                                   .Build())
+          .AddInt64Property("timestamp", 1234)
+          .Build();
+
+  EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document;
+
+  SearchResultProto empty_result;
+  empty_result.mutable_status()->set_code(StatusProto::OK);
+
+  // Verify term search
+  // document should match a query for 'Bill' in 'sender.name', but not in
+  // 'subject'
+  SearchSpecProto search_spec1;
+  search_spec1.set_query("sender.name:Bill");
+  search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec1, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  search_spec1.set_query("subject:Bill");
+  actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  // Verify numeric (integer) search
+  // document should match a query for 20 in 'sender.age', but not in
+  // 'timestamp'
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("sender.age == 20");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  search_spec2.set_query("timestamp == 20");
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  // Now update the schema with index_nested_properties=false. This should
+  // reassign section ids, lead to an index rebuild and ensure that nothing
+  // match a query for "Bill" or 20.
+  // - "sender.age": int64 type, (nested) unindexed. No section id assigned.
+  // - "sender.name": string type, (nested) unindexed. No section id assigned.
+  // - "subject": string type, indexed. Section id = 0.
+  // - "timestamp": int64 type, indexed. Section id = 1.
+  SchemaProto no_nested_schema =
+      SchemaBuilder()
+          .AddType(person_proto)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeDocument(
+                                            "Person",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("timestamp")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  set_schema_result = icing.SetSchema(no_nested_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Email");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Verify term search
+  // document shouldn't match a query for 'Bill' in either 'sender.name' or
+  // 'subject'
+  search_spec1.set_query("sender.name:Bill");
+  actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  search_spec1.set_query("subject:Bill");
+  actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  // Verify numeric (integer) search
+  // document shouldn't match a query for 20 in either 'sender.age' or
+  // 'timestamp'
+  search_spec2.set_query("sender.age == 20");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  search_spec2.set_query("timestamp == 20");
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+}
+
+TEST_F(
+    IcingSearchEngineSchemaTest,
+    SetSchemaChangeNestedPropertiesListTriggersIndexRestorationAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SchemaTypeConfigProto person_proto =
+      SchemaTypeConfigBuilder()
+          .SetType("Person")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("lastName")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("address")
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("age")
+                           .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("birthday")
+                           .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  // Create a schema with nested properties:
+  // - "sender.address": string type, (nested) non-indexable. Section id = 0.
+  // - "sender.age": int64 type, (nested) indexed. Section id = 1.
+  // - "sender.birthday": int64 type, (nested) non-indexable. Section id = 2.
+  // - "sender.lastName": int64 type, (nested) indexed. Section id = 3.
+  // - "sender.name": string type, (nested) indexed. Section id = 4.
+  // - "subject": string type, indexed. Section id = 5.
+  // - "timestamp": int64 type, indexed. Section id = 6.
+  // - "sender.foo": unknown type, (nested) non-indexable. Section id = 7.
+  //
+  // "sender.address" and "sender.birthday" are assigned a section id because
+  // they are listed in the indexable_nested_properties_list for 'Email.sender'.
+  // They are assigned a sectionId but are not indexed since their indexing
+  // configs are non-indexable.
+  //
+  // "sender.foo" is also assigned a section id, but is also not undefined by
+  // the schema definition. Trying to index a document with this nested property
+  // should fail.
+  SchemaProto nested_schema =
+      SchemaBuilder()
+          .AddType(person_proto)
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("Email")
+                  .AddProperty(
+                      PropertyConfigBuilder()
+                          .SetName("sender")
+                          .SetDataTypeDocument(
+                              "Person", /*indexable_nested_properties_list=*/
+                              {"age", "lastName", "address", "name", "birthday",
+                               "foo"})
+                          .SetCardinality(CARDINALITY_OPTIONAL))
+                  .AddProperty(
+                      PropertyConfigBuilder()
+                          .SetName("subject")
+                          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                          .SetCardinality(CARDINALITY_OPTIONAL))
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("timestamp")
+                                   .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                   .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("subject",
+                             "Did you get the memo about TPS reports?")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace1", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Bill")
+                  .AddStringProperty("lastName", "Lundbergh")
+                  .AddStringProperty("address", "1600 Amphitheatre Pkwy")
+                  .AddInt64Property("age", 20)
+                  .AddInt64Property("birthday", 20)
+                  .Build())
+          .AddInt64Property("timestamp", 1234)
+          .Build();
+
+  // Indexing this doc should fail, since the 'sender.foo' property is not found
+  DocumentProto invalid_document =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("subject",
+                             "Did you get the memo about TPS reports?")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace1", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Bill")
+                  .AddStringProperty("lastName", "Lundbergh")
+                  .AddStringProperty("address", "1600 Amphitheatre Pkwy")
+                  .AddInt64Property("age", 20)
+                  .AddInt64Property("birthday", 20)
+                  .AddBytesProperty("foo", "bar bytes")
+                  .Build())
+          .AddInt64Property("timestamp", 1234)
+          .Build();
+
+  EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(invalid_document).status(),
+              ProtoStatusIs(StatusProto::NOT_FOUND));
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document;
+
+  SearchResultProto empty_result;
+  empty_result.mutable_status()->set_code(StatusProto::OK);
+
+  // Verify term search
+  // document should match a query for 'Bill' in 'sender.name', but not in
+  // 'sender.lastName'
+  SearchSpecProto search_spec1;
+  search_spec1.set_query("sender.name:Bill");
+  search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec1, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  search_spec1.set_query("sender.lastName:Bill");
+  actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  // document should match a query for 'Lundber' in 'sender.lastName', but not
+  // in 'sender.name'.
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("sender.lastName:Lundber");
+  search_spec2.set_term_match_type(TermMatchType::PREFIX);
+
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  search_spec2.set_query("sender.name:Lundber");
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  // document should not match a query for 'Amphitheatre' because the
+  // 'sender.address' field is not indexed.
+  search_spec2.set_query("Amphitheatre");
+  search_spec2.set_term_match_type(TermMatchType::PREFIX);
+
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  // Verify numeric (integer) search
+  // document should match a query for 20 in 'sender.age', but not in
+  // 'timestamp' or 'sender.birthday'
+  SearchSpecProto search_spec3;
+  search_spec3.set_query("sender.age == 20");
+  search_spec3.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec3.add_enabled_features(std::string(kNumericSearchFeature));
+
+  actual_results = icing.Search(search_spec3, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  search_spec3.set_query("timestamp == 20");
+  actual_results = icing.Search(search_spec3, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  search_spec3.set_query("birthday == 20");
+  actual_results = icing.Search(search_spec3, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  // Now update the schema and don't index "sender.name", "sender.birthday" and
+  // "sender.foo".
+  // This should reassign section ids, lead to an index rebuild and ensure that
+  // nothing match a query for "Bill".
+  //
+  // Section id assignment:
+  // - "sender.address": string type, (nested) non-indexable. Section id = 0.
+  // - "sender.age": int64 type, (nested) indexed. Section id = 1.
+  // - "sender.birthday": int64 type, (nested) unindexed. No section id.
+  // - "sender.lastName": int64 type, (nested) indexed. Section id = 2.
+  // - "sender.name": string type, (nested) unindexed. No section id.
+  // - "subject": string type, indexed. Section id = 3.
+  // - "timestamp": int64 type, indexed. Section id = 4.
+  // - "sender.foo": unknown type, invalid. No section id.
+  SchemaProto nested_schema_with_less_props =
+      SchemaBuilder()
+          .AddType(person_proto)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("sender")
+                               .SetDataTypeDocument(
+                                   "Person", /*indexable_nested_properties=*/
+                                   {"age", "lastName", "address"})
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("timestamp")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  set_schema_result = icing.SetSchema(nested_schema_with_less_props);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Email");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Verify term search
+  // document shouldn't match a query for 'Bill' in either 'sender.name' or
+  // 'subject'
+  search_spec1.set_query("sender.name:Bill");
+  actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+  search_spec1.set_query("subject:Bill");
+  actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       SetSchemaNewJoinablePropertyTriggersIndexRestorationAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create "Message" schema with 3 properties:
+  // - "subject": string type, non-joinable. No joinable property id assigned.
+  //   It is indexed and used for searching only.
+  // - "receiverQualifiedId": string type, non-joinable. No joinable property id
+  //   assigned.
+  // - "senderQualifiedId": string type, Qualified Id type joinable. Joinable
+  //   property id = 0.
+  SchemaProto schema_one =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("receiverQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_NONE)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  SetSchemaResultProto set_schema_result = icing.SetSchema(schema_one);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_new_schema_types()->Add("Message");
+  expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  DocumentProto person1 =
+      DocumentBuilder()
+          .SetKey("namespace", "person1")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person one")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto person2 =
+      DocumentBuilder()
+          .SetKey("namespace", "person2")
+          .SetSchema("Person")
+          .AddStringProperty("name", "person two")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  DocumentProto message =
+      DocumentBuilder()
+          .SetKey("namespace", "message1")
+          .SetSchema("Message")
+          .AddStringProperty("subject", "message")
+          .AddStringProperty("receiverQualifiedId", "namespace#person1")
+          .AddStringProperty("senderQualifiedId", "namespace#person2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+
+  ResultSpecProto result_spec = ResultSpecProto::default_instance();
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  // Verify join search: join a query for `name:person` with a child query for
+  // `subject:message` based on the child's `receiverQualifiedId` field.
+  // Since "receiverQualifiedId" is not JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+  // joining on that property should only return the "left-side" (`name:person`)
+  // of the join.
+  SearchSpecProto search_spec_join_by_receiver;
+  search_spec_join_by_receiver.set_query("name:person");
+  search_spec_join_by_receiver.set_term_match_type(TermMatchType::EXACT_ONLY);
+  JoinSpecProto* join_spec = search_spec_join_by_receiver.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("receiverQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+  nested_search_spec->set_query("subject:message");
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  SearchResultProto expected_empty_child_search_result_proto;
+  expected_empty_child_search_result_proto.mutable_status()->set_code(
+      StatusProto::OK);
+  *expected_empty_child_search_result_proto.mutable_results()
+       ->Add()
+       ->mutable_document() = person2;
+  *expected_empty_child_search_result_proto.mutable_results()
+       ->Add()
+       ->mutable_document() = person1;
+  SearchResultProto actual_results = icing.Search(
+      search_spec_join_by_receiver, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_empty_child_search_result_proto));
+
+  // Verify join search: join a query for `name:person` with a child query for
+  // `subject:message` based on the child's `senderQualifiedId` field.
+  // Since "senderQualifiedId" is JOINABLE_VALUE_TYPE_QUALIFIED_ID, joining on
+  // that property should return both "left-side" (`name:person`) and
+  // "right-side" (`subject:message`) of the join.
+  SearchSpecProto search_spec_join_by_sender = search_spec_join_by_receiver;
+  join_spec = search_spec_join_by_sender.mutable_join_spec();
+  join_spec->set_child_property_expression("senderQualifiedId");
+
+  SearchResultProto expected_join_by_sender_search_result_proto;
+  expected_join_by_sender_search_result_proto.mutable_status()->set_code(
+      StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto =
+      expected_join_by_sender_search_result_proto.mutable_results()->Add();
+  *result_proto->mutable_document() = person2;
+  *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+  *expected_join_by_sender_search_result_proto.mutable_results()
+       ->Add()
+       ->mutable_document() = person1;
+  actual_results = icing.Search(search_spec_join_by_sender,
+                                GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_join_by_sender_search_result_proto));
+
+  // Change "Message" schema to:
+  // - "subject": string type, non-joinable. No joinable property id assigned.
+  // - "receiverQualifiedId": string type, Qualified Id joinable. Joinable
+  //   property id = 0.
+  // - "senderQualifiedId": string type, Qualified Id joinable. Joinable
+  //   property id = 1.
+  SchemaProto schema_two = schema_one;
+  schema_two.mutable_types(1)
+      ->mutable_properties(1)
+      ->mutable_joinable_config()
+      ->set_value_type(JOINABLE_VALUE_TYPE_QUALIFIED_ID);
+  // Index restoration should be triggered here because new schema requires more
+  // joinable properties. Also new joinable property ids will be reassigned and
+  // index restoration should use new joinable property ids to rebuild.
+  set_schema_result = icing.SetSchema(schema_two);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result.mutable_join_incompatible_changed_schema_types()
+      ->Add("Message");
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Verify join search: join a query for `name:person` with a child query for
+  // `subject:message` based on the child's `receiverQualifiedId` field.
+  // Since we've changed "receiverQualifiedId" to be
+  // JOINABLE_VALUE_TYPE_QUALIFIED_ID, joining on that property should return
+  // should return both "left-side" (`name:person`) and "right-side"
+  // (`subject:message`) of the join now.
+  SearchResultProto expected_join_by_receiver_search_result_proto;
+  expected_join_by_receiver_search_result_proto.mutable_status()->set_code(
+      StatusProto::OK);
+  result_proto =
+      expected_join_by_receiver_search_result_proto.mutable_results()->Add();
+  *result_proto->mutable_document() = person1;
+  *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+  *expected_join_by_receiver_search_result_proto.mutable_results()
+       ->Add()
+       ->mutable_document() = person2;
+  actual_results = icing.Search(search_spec_join_by_receiver,
+                                GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(
+                  expected_join_by_receiver_search_result_proto));
+
+  // Verify join search: join a query for `name:person` with a child query for
+  // `subject:message` based on the child's `senderQualifiedId` field. We should
+  // get the same set of result since `senderQualifiedId` is unchanged.
+  actual_results = icing.Search(search_spec_join_by_sender,
+                                GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_join_by_sender_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       SetSchemaWithValidCycle_circularSchemaDefinitionNotAllowedFails) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_allow_circular_schema_definitions(false);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create schema with circular type definitions: A <-> B
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true)))
+          .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/false)))
+          .Build();
+
+  EXPECT_THAT(
+      icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+          .status(),
+      ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       SetSchemaWithValidCycle_allowCircularSchemaDefinitionsOK) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_allow_circular_schema_definitions(true);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create schema with valid circular type definitions: A <-> B, B->A sets
+  // index_nested_properties=false
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true)))
+          .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/false)))
+          .Build();
+
+  EXPECT_THAT(
+      icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+          .status(),
+      ProtoStatusIs(StatusProto::OK));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       SetSchemaWithInvalidCycle_allowCircularSchemaDefinitionsFails) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_allow_circular_schema_definitions(true);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create schema with invalid circular type definitions: A <-> B, all edges
+  // set index_nested_properties=true
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true)))
+          .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/true)))
+          .Build();
+
+  EXPECT_THAT(
+      icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+          .status(),
+      ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(
+    IcingSearchEngineSchemaTest,
+    ForceSetSchemaIndexedPropertyDeletionTriggersIndexRestorationAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create a schema with 4 properties:
+  // - "body": string type, indexed. Section id = 0.
+  // - "subject": string type, indexed. Section id = 1.
+  // - "timestamp1": int64 type, indexed. Section id = 2.
+  // - "timestamp2": int64 type, indexed. Section id = 3.
+  SchemaProto email_with_body_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("timestamp1")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("timestamp2")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SetSchemaResultProto set_schema_result =
+      icing.SetSchema(email_with_body_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Create a document with only subject and timestamp2 property.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("subject",
+                             "Did you get the memo about TPS reports?")
+          .AddInt64Property("timestamp2", 1234)
+          .Build();
+  EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document;
+
+  // Verify term search
+  // We should be able to retrieve the document by searching for 'tps' in
+  // 'subject'.
+  SearchSpecProto search_spec1;
+  search_spec1.set_query("subject:tps");
+  search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec1, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Verify numeric (integer) search
+  // We should be able to retrieve the document by searching for 1234 in
+  // 'timestamp2'.
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("timestamp2 == 1234");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Now update the schema to remove the 'body' and 'timestamp1' field. This is
+  // backwards incompatible, but document should be preserved because it doesn't
+  // contain a 'body' or 'timestamp1' field.
+  // - "subject": string type, indexed. Section id = 0.
+  // - "timestamp2": int64 type, indexed. Section id = 1.
+  //
+  // If the index is not correctly rebuilt, then the hits of 'subject' and
+  // 'timestamp2' in the index will still have old section ids of 1, 3 and
+  // therefore they won't be found.
+  SchemaProto email_no_body_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("timestamp2")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  set_schema_result = icing.SetSchema(
+      email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Email");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Verify term search
+  // We should be able to retrieve the document by searching for 'tps' in
+  // 'subject'.
+  search_spec1.set_query("subject:tps");
+  actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Verify numeric (integer) search
+  // We should be able to retrieve the document by searching for 1234 in
+  // 'timestamp'.
+  search_spec2.set_query("timestamp2 == 1234");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       ForceSetSchemaJoinablePropertyDeletionTriggersIndexRestoration) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create "Email" schema with 2 joinable properties:
+  // - "receiverQualifiedId": qualified id joinable. Joinable property id = 0.
+  // - "senderQualifiedId": qualified id joinable. Joinable property id = 1.
+  SchemaProto email_with_receiver_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("receiverQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SetSchemaResultProto set_schema_result =
+      icing.SetSchema(email_with_receiver_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  DocumentProto person = DocumentBuilder()
+                             .SetKey("namespace", "person")
+                             .SetSchema("Person")
+                             .SetCreationTimestampMs(1000)
+                             .AddStringProperty("name", "person")
+                             .Build();
+  // Create an email document with only "senderQualifiedId" joinable property.
+  DocumentProto email =
+      DocumentBuilder()
+          .SetKey("namespace", "email")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("subject",
+                             "Did you get the memo about TPS reports?")
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .Build();
+
+  EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(email).status(), ProtoIsOk());
+
+  // Verify join search: join a query for `name:person` with a child query for
+  // `subject:tps` based on the child's `senderQualifiedId` field. We should be
+  // able to join person and email documents by this property.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto =
+      expected_search_result_proto.mutable_results()->Add();
+  *result_proto->mutable_document() = person;
+  *result_proto->mutable_joined_results()->Add()->mutable_document() = email;
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("name:person");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("senderQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+  nested_search_spec->set_query("subject:tps");
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  ResultSpecProto result_spec = ResultSpecProto::default_instance();
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Now update the schema to remove "receiverQualifiedId" fields. This is
+  // backwards incompatible, but document should be preserved because it doesn't
+  // contain "receiverQualifiedId" field. Also since it is join incompatible, we
+  // have to rebuild join index.
+  // - "senderQualifiedId": qualified id joinable. Joinable property id = 0.
+  //
+  // If the index is not correctly rebuilt, then the joinable data of
+  // "senderQualifiedId" in the join index will still have old joinable property
+  // id of 1 and therefore won't take effect for join search query.
+  SchemaProto email_without_receiver_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Although we've just deleted an existing property "receiverQualifiedId" from
+  // schema "Email", some email documents will still be preserved because they
+  // don't have "receiverQualifiedId" property.
+  set_schema_result =
+      icing.SetSchema(email_without_receiver_schema,
+                      /*ignore_errors_and_delete_documents=*/true);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_join_incompatible_changed_schema_types()
+      ->Add("Email");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Verify join search: join a query for `name:person` with a child query for
+  // `subject:tps` based on the child's `senderQualifiedId` field. We should
+  // still be able to join person and email documents by this property.
+  actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_F(
+    IcingSearchEngineSchemaTest,
+    ForceSetSchemaIndexedPropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create a schema with 3 properties:
+  // - "body": string type, indexed. Section id = 0.
+  // - "subject": string type, indexed. Section id = 1.
+  // - "timestamp": int64 type, indexed. Section id = 2.
+  SchemaProto email_with_body_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("timestamp")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SetSchemaResultProto set_schema_result =
+      icing.SetSchema(email_with_body_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Create a document with only subject and timestamp property.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("subject",
+                             "Did you get the memo about TPS reports?")
+          .AddInt64Property("timestamp", 1234)
+          .Build();
+  EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document;
+
+  // Verify term search
+  // We should be able to retrieve the document by searching for 'tps' in
+  // 'subject'.
+  SearchSpecProto search_spec1;
+  search_spec1.set_query("subject:tps");
+  search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec1, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Verify numeric (integer) search
+  // We should be able to retrieve the document by searching for 1234 in
+  // 'timestamp'.
+  SearchSpecProto search_spec2;
+  search_spec2.set_query("timestamp == 1234");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Now update the schema to remove the 'body' field. This is backwards
+  // incompatible, but document should be preserved because it doesn't contain a
+  // 'body' field.
+  // - "subject": string type, indexed. Section id = 0.
+  // - "timestamp": int64 type, indexed. Section id = 1.
+  // - "to": string type, indexed. Section id = 2.
+  //
+  // If the index is not correctly rebuilt, then the hits of 'subject' and
+  // 'timestamp' in the index will still have old section ids of 1, 2 and
+  // therefore they won't be found.
+  SchemaProto email_no_body_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("to")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("timestamp")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  set_schema_result = icing.SetSchema(
+      email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Email");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Verify term search
+  // We should be able to retrieve the document by searching for 'tps' in
+  // 'subject'.
+  search_spec1.set_query("subject:tps");
+  actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Verify numeric (integer) search
+  // We should be able to retrieve the document by searching for 1234 in
+  // 'timestamp'.
+  search_spec2.set_query("timestamp == 1234");
+  search_spec2.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+  actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_F(
+    IcingSearchEngineSchemaTest,
+    ForceSetSchemaJoinablePropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create "Email" schema with 2 joinable properties:
+  // - "receiverQualifiedId": qualified id joinable. Joinable property id = 0.
+  // - "senderQualifiedId": qualified id joinable. Joinable property id = 1.
+  SchemaProto email_with_body_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("receiverQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SetSchemaResultProto set_schema_result =
+      icing.SetSchema(email_with_body_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  DocumentProto person = DocumentBuilder()
+                             .SetKey("namespace", "person")
+                             .SetSchema("Person")
+                             .SetCreationTimestampMs(1000)
+                             .AddStringProperty("name", "person")
+                             .Build();
+  // Create an email document with only subject and timestamp property.
+  DocumentProto email =
+      DocumentBuilder()
+          .SetKey("namespace", "email")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("subject",
+                             "Did you get the memo about TPS reports?")
+          .AddStringProperty("senderQualifiedId", "namespace#person")
+          .Build();
+
+  EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(email).status(), ProtoIsOk());
+
+  // Verify join search: join a query for `name:person` with a child query for
+  // `subject:tps` based on the child's `senderQualifiedId` field. We should be
+  // able to join person and email documents by this property.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto =
+      expected_search_result_proto.mutable_results()->Add();
+  *result_proto->mutable_document() = person;
+  *result_proto->mutable_joined_results()->Add()->mutable_document() = email;
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("name:person");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("senderQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+  nested_search_spec->set_query("subject:tps");
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  ResultSpecProto result_spec = ResultSpecProto::default_instance();
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+
+  // Now update the schema to remove the "receiverQualified" field and add
+  // "zQualifiedId". This is backwards incompatible, but document should
+  // be preserved because it doesn't contain a "receiverQualified" field and
+  // "zQualifiedId" is optional.
+  // - "senderQualifiedId": qualified id joinable. Joinable property id = 0.
+  // - "zQualifiedId": qualified id joinable. Joinable property id = 1.
+  //
+  // If the index is not correctly rebuilt, then the joinable data of
+  // "senderQualifiedId" in the join index will still have old joinable property
+  // id of 1 and therefore won't take effect for join search query.
+  SchemaProto email_no_body_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("zQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("senderQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  set_schema_result = icing.SetSchema(
+      email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_join_incompatible_changed_schema_types()
+      ->Add("Email");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Verify join search: join a query for `name:person` with a child query for
+  // `subject:tps` based on the child's `senderQualifiedId` field. We should
+  // still be able to join person and email documents by this property.
+  actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       ForceSetSchemaIncompatibleNestedDocsAreDeleted) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SchemaTypeConfigProto email_schema_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Email")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("sender")
+                  .SetDataTypeDocument("Person",
+                                       /*index_nested_properties=*/true)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto nested_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("company")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(email_schema_type)
+          .Build();
+
+  SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result;
+  expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Create two documents - a person document and an email document - both docs
+  // should be deleted when we remove the 'company' field from the person type.
+  DocumentProto person_document =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Person")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("name", "Bill Lundbergh")
+          .AddStringProperty("company", "Initech Corp.")
+          .Build();
+  EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk());
+
+  DocumentProto email_document =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri2")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(1000)
+          .AddStringProperty("subject",
+                             "Did you get the memo about TPS reports?")
+          .AddDocumentProperty("sender", person_document)
+          .Build();
+  EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
+
+  // We should be able to retrieve both documents.
+  GetResultProto get_result =
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
+  EXPECT_THAT(get_result.status(), ProtoIsOk());
+  EXPECT_THAT(get_result.document(), EqualsProto(person_document));
+
+  get_result =
+      icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
+  EXPECT_THAT(get_result.status(), ProtoIsOk());
+  EXPECT_THAT(get_result.document(), EqualsProto(email_document));
+
+  // Now update the schema to remove the 'company' field. This is backwards
+  // incompatible, *both* documents should be deleted because both fail
+  // validation (they each contain a 'Person' that has a non-existent property).
+  nested_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(email_schema_type)
+          .Build();
+
+  set_schema_result = icing.SetSchema(
+      nested_schema, /*ignore_errors_and_delete_documents=*/true);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result = SetSchemaResultProto();
+  expected_set_schema_result.mutable_incompatible_schema_types()->Add("Person");
+  expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Email");
+  expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+      ->Add("Person");
+  expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+  // Both documents should be deleted now.
+  get_result =
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
+  EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+
+  get_result =
+      icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
+  EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SchemaProto schema_with_optional_subject;
+  auto type = schema_with_optional_subject.add_types();
+  type->set_schema_type("email");
+
+  // Add a OPTIONAL property
+  auto property = type->add_properties();
+  property->set_property_name("subject");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  EXPECT_THAT(icing.SetSchema(schema_with_optional_subject).status(),
+              ProtoIsOk());
+
+  DocumentProto email_document_without_subject =
+      DocumentBuilder()
+          .SetKey("namespace", "without_subject")
+          .SetSchema("email")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto email_document_with_subject =
+      DocumentBuilder()
+          .SetKey("namespace", "with_subject")
+          .SetSchema("email")
+          .AddStringProperty("subject", "foo")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  EXPECT_THAT(icing.Put(email_document_without_subject).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(email_document_with_subject).status(), ProtoIsOk());
+
+  SchemaProto schema_with_required_subject;
+  type = schema_with_required_subject.add_types();
+  type->set_schema_type("email");
+
+  // Add a REQUIRED property
+  property = type->add_properties();
+  property->set_property_name("subject");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+  // Can't set the schema since it's incompatible
+  SetSchemaResultProto set_schema_result =
+      icing.SetSchema(schema_with_required_subject);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_set_schema_result_proto;
+  expected_set_schema_result_proto.mutable_status()->set_code(
+      StatusProto::FAILED_PRECONDITION);
+  expected_set_schema_result_proto.mutable_status()->set_message(
+      "Schema is incompatible.");
+  expected_set_schema_result_proto.add_incompatible_schema_types("email");
+
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
+
+  // Force set it
+  set_schema_result =
+      icing.SetSchema(schema_with_required_subject,
+                      /*ignore_errors_and_delete_documents=*/true);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_set_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+  expected_set_schema_result_proto.mutable_status()->clear_message();
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = email_document_with_subject;
+
+  EXPECT_THAT(icing.Get("namespace", "with_subject",
+                        GetResultSpecProto::default_instance()),
+              EqualsProto(expected_get_result_proto));
+
+  // The document without a subject got deleted because it failed validation
+  // against the new schema
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace, without_subject) not found.");
+  expected_get_result_proto.clear_document();
+
+  EXPECT_THAT(icing.Get("namespace", "without_subject",
+                        GetResultSpecProto::default_instance()),
+              EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaDeletesDocumentsAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SchemaProto schema;
+  auto type = schema.add_types();
+  type->set_schema_type("email");
+  type = schema.add_types();
+  type->set_schema_type("message");
+
+  EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto email_document =
+      DocumentBuilder()
+          .SetKey("namespace", "email_uri")
+          .SetSchema("email")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto message_document =
+      DocumentBuilder()
+          .SetKey("namespace", "message_uri")
+          .SetSchema("message")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+
+  // Clear the schema and only add the "email" type, essentially deleting the
+  // "message" type
+  SchemaProto new_schema;
+  type = new_schema.add_types();
+  type->set_schema_type("email");
+
+  // Can't set the schema since it's incompatible
+  SetSchemaResultProto set_schema_result = icing.SetSchema(new_schema);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  SetSchemaResultProto expected_result;
+  expected_result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
+  expected_result.mutable_status()->set_message("Schema is incompatible.");
+  expected_result.add_deleted_schema_types("message");
+
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
+
+  // Force set it
+  set_schema_result =
+      icing.SetSchema(new_schema,
+                      /*ignore_errors_and_delete_documents=*/true);
+  // Ignore latency numbers. They're covered elsewhere.
+  set_schema_result.clear_latency_ms();
+  expected_result.mutable_status()->set_code(StatusProto::OK);
+  expected_result.mutable_status()->clear_message();
+  EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
+
+  // "email" document is still there
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = email_document;
+
+  EXPECT_THAT(icing.Get("namespace", "email_uri",
+                        GetResultSpecProto::default_instance()),
+              EqualsProto(expected_get_result_proto));
+
+  // "message" document got deleted
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace, message_uri) not found.");
+  expected_get_result_proto.clear_document();
+
+  EXPECT_THAT(icing.Get("namespace", "message_uri",
+                        GetResultSpecProto::default_instance()),
+              EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, GetSchemaNotFound) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  EXPECT_THAT(icing.GetSchema().status(),
+              ProtoStatusIs(StatusProto::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, GetSchemaOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  GetSchemaResultProto expected_get_schema_result_proto;
+  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
+  EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, GetSchemaTypeFailedPrecondition) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  GetSchemaTypeResultProto get_schema_type_result_proto =
+      icing.GetSchemaType("nonexistent_schema");
+  EXPECT_THAT(get_schema_type_result_proto.status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(get_schema_type_result_proto.status().message(),
+              HasSubstr("Schema not set"));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, GetSchemaTypeOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  GetSchemaTypeResultProto expected_get_schema_type_result_proto;
+  expected_get_schema_type_result_proto.mutable_status()->set_code(
+      StatusProto::OK);
+  *expected_get_schema_type_result_proto.mutable_schema_type_config() =
+      CreateMessageSchema().types(0);
+  EXPECT_THAT(icing.GetSchemaType(CreateMessageSchema().types(0).schema_type()),
+              EqualsProto(expected_get_schema_type_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+       SetSchemaCanNotDetectPreviousSchemaWasLostWithoutDocuments) {
+  SchemaProto schema;
+  auto type = schema.add_types();
+  type->set_schema_type("Message");
+
+  auto body = type->add_properties();
+  body->set_property_name("body");
+  body->set_data_type(PropertyConfigProto::DataType::STRING);
+  body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  // Make an incompatible schema, a previously OPTIONAL field is REQUIRED
+  SchemaProto incompatible_schema = schema;
+  incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
+      PropertyConfigProto::Cardinality::REQUIRED);
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
+
+  // Since we don't have any documents yet, we can't detect this edge-case.  But
+  // it should be fine since there aren't any documents to be invalidated.
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaCanDetectPreviousSchemaWasLost) {
+  SchemaTypeConfigProto message_schema_type_config =
+      CreateMessageSchemaTypeConfig();
+  message_schema_type_config.mutable_properties(0)->set_cardinality(
+      CARDINALITY_OPTIONAL);
+
+  SchemaProto schema;
+  *schema.add_types() = message_schema_type_config;
+
+  // Make an incompatible schema, a previously OPTIONAL field is REQUIRED
+  SchemaProto incompatible_schema = schema;
+  incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
+      PropertyConfigProto::Cardinality::REQUIRED);
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    DocumentProto document = CreateMessageDocument("namespace", "uri");
+    ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+    // Can retrieve by namespace/uri
+    GetResultProto expected_get_result_proto;
+    expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+    *expected_get_result_proto.mutable_document() = document;
+
+    ASSERT_THAT(
+        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
+
+    // Can search for it
+    SearchResultProto expected_search_result_proto;
+    expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+    *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+        CreateMessageDocument("namespace", "uri");
+    SearchResultProto search_result_proto =
+        icing.Search(search_spec, GetDefaultScoringSpec(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                         expected_search_result_proto));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
+
+  // Setting the new, different schema will remove incompatible documents
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk());
+
+  // Can't retrieve by namespace/uri
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+  expected_get_result_proto.mutable_status()->set_message(
+      "Document (namespace, uri) not found.");
+
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Can't search for it
+  SearchResultProto empty_result;
+  empty_result.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto,
+              EqualsSearchResultIgnoreStatsAndScores(empty_result));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, IcingShouldWorkFor64Sections) {
+  // Create a schema with 64 sections
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       // Person has 4 sections.
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("firstName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("lastName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("phoneNumber")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       // Email has 16 sections.
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("date")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("time")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("sender")
+                               .SetDataTypeDocument(
+                                   "Person", /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("receiver")
+                               .SetDataTypeDocument(
+                                   "Person", /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("cc")
+                               .SetDataTypeDocument(
+                                   "Person", /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       // EmailCollection has 64 sections.
+                       .SetType("EmailCollection")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("email1")
+                               .SetDataTypeDocument(
+                                   "Email", /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("email2")
+                               .SetDataTypeDocument(
+                                   "Email", /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("email3")
+                               .SetDataTypeDocument(
+                                   "Email", /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("email4")
+                               .SetDataTypeDocument(
+                                   "Email", /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  DocumentProto person1 =
+      DocumentBuilder()
+          .SetKey("namespace", "person1")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first1")
+          .AddStringProperty("lastName", "last1")
+          .AddStringProperty("emailAddress", "email1@gmail.com")
+          .AddStringProperty("phoneNumber", "000-000-001")
+          .Build();
+  DocumentProto person2 =
+      DocumentBuilder()
+          .SetKey("namespace", "person2")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first2")
+          .AddStringProperty("lastName", "last2")
+          .AddStringProperty("emailAddress", "email2@gmail.com")
+          .AddStringProperty("phoneNumber", "000-000-002")
+          .Build();
+  DocumentProto person3 =
+      DocumentBuilder()
+          .SetKey("namespace", "person3")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first3")
+          .AddStringProperty("lastName", "last3")
+          .AddStringProperty("emailAddress", "email3@gmail.com")
+          .AddStringProperty("phoneNumber", "000-000-003")
+          .Build();
+  DocumentProto email1 = DocumentBuilder()
+                             .SetKey("namespace", "email1")
+                             .SetSchema("Email")
+                             .AddStringProperty("body", "test body")
+                             .AddStringProperty("subject", "test subject")
+                             .AddStringProperty("date", "2022-08-01")
+                             .AddStringProperty("time", "1:00 PM")
+                             .AddDocumentProperty("sender", person1)
+                             .AddDocumentProperty("receiver", person2)
+                             .AddDocumentProperty("cc", person3)
+                             .Build();
+  DocumentProto email2 = DocumentBuilder()
+                             .SetKey("namespace", "email2")
+                             .SetSchema("Email")
+                             .AddStringProperty("body", "test body")
+                             .AddStringProperty("subject", "test subject")
+                             .AddStringProperty("date", "2022-08-02")
+                             .AddStringProperty("time", "2:00 PM")
+                             .AddDocumentProperty("sender", person2)
+                             .AddDocumentProperty("receiver", person1)
+                             .AddDocumentProperty("cc", person3)
+                             .Build();
+  DocumentProto email3 = DocumentBuilder()
+                             .SetKey("namespace", "email3")
+                             .SetSchema("Email")
+                             .AddStringProperty("body", "test body")
+                             .AddStringProperty("subject", "test subject")
+                             .AddStringProperty("date", "2022-08-03")
+                             .AddStringProperty("time", "3:00 PM")
+                             .AddDocumentProperty("sender", person3)
+                             .AddDocumentProperty("receiver", person1)
+                             .AddDocumentProperty("cc", person2)
+                             .Build();
+  DocumentProto email4 = DocumentBuilder()
+                             .SetKey("namespace", "email4")
+                             .SetSchema("Email")
+                             .AddStringProperty("body", "test body")
+                             .AddStringProperty("subject", "test subject")
+                             .AddStringProperty("date", "2022-08-04")
+                             .AddStringProperty("time", "4:00 PM")
+                             .AddDocumentProperty("sender", person3)
+                             .AddDocumentProperty("receiver", person2)
+                             .AddDocumentProperty("cc", person1)
+                             .Build();
+  DocumentProto email_collection =
+      DocumentBuilder()
+          .SetKey("namespace", "email_collection")
+          .SetSchema("EmailCollection")
+          .AddDocumentProperty("email1", email1)
+          .AddDocumentProperty("email2", email2)
+          .AddDocumentProperty("email3", email3)
+          .AddDocumentProperty("email4", email4)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email_collection).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  const std::vector<std::string> query_terms = {
+      "first1", "last2",   "email3@gmail.com", "000-000-001",
+      "body",   "subject", "2022-08-02",       "3\\:00"};
+  SearchResultProto expected_document;
+  expected_document.mutable_status()->set_code(StatusProto::OK);
+  *expected_document.mutable_results()->Add()->mutable_document() =
+      email_collection;
+  for (const std::string& query_term : query_terms) {
+    search_spec.set_query(query_term);
+    SearchResultProto actual_results =
+        icing.Search(search_spec, GetDefaultScoringSpec(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(actual_results,
+                EqualsSearchResultIgnoreStatsAndScores(expected_document));
+  }
+
+  search_spec.set_query("foo");
+  SearchResultProto expected_no_documents;
+  expected_no_documents.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(expected_no_documents));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, IcingShouldReturnErrorForExtraSections) {
+  // Create a schema with more sections than allowed.
+  SchemaTypeConfigBuilder schema_type_config_builder =
+      SchemaTypeConfigBuilder().SetType("type");
+  for (int i = 0; i <= kMaxSectionId + 1; ++i) {
+    schema_type_config_builder.AddProperty(
+        PropertyConfigBuilder()
+            .SetName("prop" + std::to_string(i))
+            .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+            .SetCardinality(CARDINALITY_OPTIONAL));
+  }
+  SchemaProto schema =
+      SchemaBuilder().AddType(schema_type_config_builder).Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status().message(),
+              HasSubstr("Too many properties to be indexed"));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc
new file mode 100644
index 0000000..21512c6
--- /dev/null
+++ b/icing/icing-search-engine_search_test.cc
@@ -0,0 +1,7173 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/join/join-processor.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/query/query-features.h"
+#include "icing/result/result-state-manager.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/snippet-helpers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::DoubleEq;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::IsEmpty;
+using ::testing::Lt;
+using ::testing::Ne;
+using ::testing::SizeIs;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+  TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+                        std::unique_ptr<const Filesystem> filesystem,
+                        std::unique_ptr<const IcingFilesystem> icing_filesystem,
+                        std::unique_ptr<Clock> clock,
+                        std::unique_ptr<JniCache> jni_cache)
+      : IcingSearchEngine(options, std::move(filesystem),
+                          std::move(icing_filesystem), std::move(clock),
+                          std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to IcingSearchEngine::Search
+// and IcingSearchEngine::GetNextPage.
+class IcingSearchEngineSearchTest
+    : public ::testing::TestWithParam<SearchSpecProto::SearchType::Code> {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      // If we've specified using the reverse-JNI method for segmentation (i.e.
+      // not ICU), then we won't have the ICU data file included to set up.
+      // Technically, we could choose to use reverse-JNI for segmentation AND
+      // include an ICU data file, but that seems unlikely and our current BUILD
+      // setup doesn't do this.
+      // File generated via icu_data_file rule in //icing/BUILD.
+      std::string icu_data_file_path =
+          GetTestFilePath("icing/icu.dat");
+      ICING_ASSERT_OK(
+          icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+    }
+    filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+  Filesystem filesystem_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_base_dir(GetTestBaseDir());
+  icing_options.set_document_store_namespace_id_fingerprint(true);
+  icing_options.set_use_new_qualified_id_join_index(true);
+  return icing_options;
+}
+
+DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
+  return DocumentBuilder()
+      .SetKey(std::move(name_space), std::move(uri))
+      .SetSchema("Message")
+      .AddStringProperty("body", "message body")
+      .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+      .Build();
+}
+
+DocumentProto CreateEmailDocument(const std::string& name_space,
+                                  const std::string& uri, int score,
+                                  const std::string& subject_content,
+                                  const std::string& body_content) {
+  return DocumentBuilder()
+      .SetKey(name_space, uri)
+      .SetSchema("Email")
+      .SetScore(score)
+      .AddStringProperty("subject", subject_content)
+      .AddStringProperty("body", body_content)
+      .Build();
+}
+
+SchemaProto CreateMessageSchema() {
+  return SchemaBuilder()
+      .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+          PropertyConfigBuilder()
+              .SetName("body")
+              .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+              .SetCardinality(CARDINALITY_REQUIRED)))
+      .Build();
+}
+
+SchemaProto CreateEmailSchema() {
+  return SchemaBuilder()
+      .AddType(SchemaTypeConfigBuilder()
+                   .SetType("Email")
+                   .AddProperty(PropertyConfigBuilder()
+                                    .SetName("body")
+                                    .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                       TOKENIZER_PLAIN)
+                                    .SetCardinality(CARDINALITY_REQUIRED))
+                   .AddProperty(PropertyConfigBuilder()
+                                    .SetName("subject")
+                                    .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                       TOKENIZER_PLAIN)
+                                    .SetCardinality(CARDINALITY_REQUIRED)))
+      .Build();
+}
+
+SchemaProto CreatePersonAndEmailSchema() {
+  return SchemaBuilder()
+      .AddType(SchemaTypeConfigBuilder()
+                   .SetType("Person")
+                   .AddProperty(PropertyConfigBuilder()
+                                    .SetName("name")
+                                    .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                       TOKENIZER_PLAIN)
+                                    .SetCardinality(CARDINALITY_OPTIONAL))
+                   .AddProperty(PropertyConfigBuilder()
+                                    .SetName("emailAddress")
+                                    .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                       TOKENIZER_PLAIN)
+                                    .SetCardinality(CARDINALITY_OPTIONAL)))
+      .AddType(
+          SchemaTypeConfigBuilder()
+              .SetType("Email")
+              .AddProperty(
+                  PropertyConfigBuilder()
+                      .SetName("body")
+                      .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                      .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(
+                  PropertyConfigBuilder()
+                      .SetName("subject")
+                      .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                      .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(PropertyConfigBuilder()
+                               .SetName("sender")
+                               .SetDataTypeDocument(
+                                   "Person", /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+      .Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  return scoring_spec;
+}
+
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+                              int64_t timestamp_ms,
+                              UsageReport::UsageType usage_type) {
+  UsageReport usage_report;
+  usage_report.set_document_namespace(name_space);
+  usage_report.set_document_uri(uri);
+  usage_report.set_usage_timestamp_ms(timestamp_ms);
+  usage_report.set_usage_type(usage_type);
+  return usage_report;
+}
+
+std::vector<std::string> GetUrisFromSearchResults(
+    SearchResultProto& search_result_proto) {
+  std::vector<std::string> result_uris;
+  result_uris.reserve(search_result_proto.results_size());
+  for (int i = 0; i < search_result_proto.results_size(); i++) {
+    result_uris.push_back(
+        search_result_proto.mutable_results(i)->document().uri());
+  }
+  return result_uris;
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsValidResults) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
+
+  SearchResultProto results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(2));
+
+  const DocumentProto& document = results.results(0).document();
+  EXPECT_THAT(document, EqualsProto(document_two));
+
+  const SnippetProto& snippet = results.results(0).snippet();
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("message body"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("message"));
+
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+  EXPECT_THAT(results.results(1).snippet().entries(), IsEmpty());
+
+  search_spec.set_query("foo");
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsScoresDocumentScore) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+  document_one.set_score(93);
+  document_one.set_creation_timestamp_ms(10000);
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+  document_two.set_score(15);
+  document_two.set_creation_timestamp_ms(12000);
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+  search_spec.set_search_type(GetParam());
+
+  // Rank by DOCUMENT_SCORE and ensure that the score field is populated with
+  // document score.
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  SearchResultProto results = icing.Search(search_spec, scoring_spec,
+                                           ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(2));
+
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+  EXPECT_THAT(results.results(0).score(), 93);
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document_two));
+  EXPECT_THAT(results.results(1).score(), 15);
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsScoresCreationTimestamp) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+  document_one.set_score(93);
+  document_one.set_creation_timestamp_ms(10000);
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+  document_two.set_score(15);
+  document_two.set_creation_timestamp_ms(12000);
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+  search_spec.set_search_type(GetParam());
+
+  // Rank by CREATION_TS and ensure that the score field is populated with
+  // creation ts.
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+  SearchResultProto results = icing.Search(search_spec, scoring_spec,
+                                           ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(2));
+
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+  EXPECT_THAT(results.results(0).score(), 12000);
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+  EXPECT_THAT(results.results(1).score(), 10000);
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsOneResult) {
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(1000);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document_two;
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+
+  EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
+              Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+              Eq(1000));
+  // TODO(b/305098009): deprecate search-related flat fields in query_stats.
+  EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
+              Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .parse_query_latency_ms(),
+              Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .scoring_latency_ms(),
+              Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .num_documents_scored(),
+              Eq(2));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .num_fetched_hits_lite_index(),
+              Eq(2));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .num_fetched_hits_main_index(),
+              Eq(0));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .num_fetched_hits_integer_index(),
+              Eq(0));
+
+  // The token is a random number so we don't verify it.
+  expected_search_result_proto.set_next_page_token(
+      search_result_proto.next_page_token());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsOneResult_readOnlyFalse) {
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(1000);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+  search_spec.set_search_type(GetParam());
+  search_spec.set_use_read_only_search(false);
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document_two;
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+
+  EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
+              Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+              Eq(1000));
+  // TODO(b/305098009): deprecate search-related flat fields in query_stats.
+  EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
+              Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .parse_query_latency_ms(),
+              Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .scoring_latency_ms(),
+              Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .num_documents_scored(),
+              Eq(2));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .num_fetched_hits_lite_index(),
+              Eq(2));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .num_fetched_hits_main_index(),
+              Eq(0));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .num_fetched_hits_integer_index(),
+              Eq(0));
+
+  // The token is a random number so we don't verify it.
+  expected_search_result_proto.set_next_page_token(
+      search_result_proto.next_page_token());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchZeroResultLimitReturnsEmptyResults) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(0);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchZeroResultLimitReturnsEmptyResults_readOnlyFalse) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("");
+  search_spec.set_search_type(GetParam());
+  search_spec.set_use_read_only_search(false);
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(0);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithNumToScore) {
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(1000);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+  document_one.set_score(10);
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+  document_two.set_score(5);
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(10);
+  result_spec.set_num_to_score(10);
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+  SearchResultProto expected_search_result_proto1;
+  expected_search_result_proto1.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto1.mutable_results()->Add()->mutable_document() =
+      document_one;
+  *expected_search_result_proto1.mutable_results()->Add()->mutable_document() =
+      document_two;
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto1));
+
+  result_spec.set_num_to_score(1);
+  // By setting num_to_score = 1, only document_two will be scored, ranked, and
+  // returned.
+  // - num_to_score cutoff is only affected by the reading order from posting
+  //   list. IOW, since we read posting lists in doc id descending order,
+  //   ScoringProcessor scores documents with higher doc ids first and cuts off
+  //   if exceeding num_to_score.
+  // - Therefore, even though document_one has higher score, ScoringProcessor
+  //   still skips document_one, because posting list reads document_two first
+  //   and ScoringProcessor stops after document_two given that total # of
+  //   scored document has already reached num_to_score.
+  SearchResultProto expected_search_result_google::protobuf;
+  expected_search_result_google::protobuf.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() =
+      document_two;
+
+  search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_google::protobuf));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchNegativeResultLimitReturnsInvalidArgument) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(-5);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(
+      StatusProto::INVALID_ARGUMENT);
+  expected_search_result_proto.mutable_status()->set_message(
+      "ResultSpecProto.num_per_page cannot be negative.");
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchNegativeResultLimitReturnsInvalidArgument_readOnlyFalse) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("");
+  search_spec.set_search_type(GetParam());
+  search_spec.set_use_read_only_search(false);
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(-5);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(
+      StatusProto::INVALID_ARGUMENT);
+  expected_search_result_proto.mutable_status()->set_message(
+      "ResultSpecProto.num_per_page cannot be negative.");
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchNonPositivePageTotalBytesLimitReturnsInvalidArgument) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_total_bytes_per_page_threshold(-1);
+
+  SearchResultProto actual_results1 =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results1.status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+
+  result_spec.set_num_total_bytes_per_page_threshold(0);
+  SearchResultProto actual_results2 =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results2.status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchNegativeMaxJoinedChildrenPerParentReturnsInvalidArgument) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.set_max_joined_children_per_parent_to_return(-1);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(
+      StatusProto::INVALID_ARGUMENT);
+  expected_search_result_proto.mutable_status()->set_message(
+      "ResultSpecProto.max_joined_children_per_parent_to_return cannot be "
+      "negative.");
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchNonPositiveNumToScoreReturnsInvalidArgument) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_to_score(-1);
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(
+      StatusProto::INVALID_ARGUMENT);
+  expected_search_result_proto.mutable_status()->set_message(
+      "ResultSpecProto.num_to_score cannot be non-positive.");
+
+  SearchResultProto actual_results1 =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results1, EqualsSearchResultIgnoreStatsAndScores(
+                                   expected_search_result_proto));
+
+  result_spec.set_num_to_score(0);
+  SearchResultProto actual_results2 =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(actual_results2, EqualsSearchResultIgnoreStatsAndScores(
+                                   expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithPersistenceReturnsValidResults) {
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+  {
+    // Set the schema up beforehand.
+    IcingSearchEngine icing(icing_options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    // Schema will be persisted to disk when icing goes out of scope.
+  }
+
+  {
+    // Ensure that icing initializes the schema and section_manager
+    // properly from the pre-existing file.
+    IcingSearchEngine icing(icing_options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+                ProtoIsOk());
+    // The index and document store will be persisted to disk when icing goes
+    // out of scope.
+  }
+
+  {
+    // Ensure that the index is brought back up without problems and we
+    // can query for the content that we expect.
+    IcingSearchEngine icing(icing_options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    SearchSpecProto search_spec;
+    search_spec.set_term_match_type(TermMatchType::PREFIX);
+    search_spec.set_query("message");
+    search_spec.set_search_type(GetParam());
+
+    SearchResultProto expected_search_result_proto;
+    expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+    *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+        CreateMessageDocument("namespace", "uri");
+
+    SearchResultProto actual_results =
+        icing.Search(search_spec, GetDefaultScoringSpec(),
+                     ResultSpecProto::default_instance());
+    EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                    expected_search_result_proto));
+
+    search_spec.set_query("foo");
+
+    SearchResultProto empty_result;
+    empty_result.mutable_status()->set_code(StatusProto::OK);
+    actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                  ResultSpecProto::default_instance());
+    EXPECT_THAT(actual_results,
+                EqualsSearchResultIgnoreStatsAndScores(empty_result));
+  }
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchShouldReturnEmpty) {
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(1000);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+  search_spec.set_search_type(GetParam());
+
+  // Empty result, no next-page token
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+
+  EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
+              Eq(0));
+  EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+              Eq(1000));
+  // TODO(b/305098009): deprecate search-related flat fields in query_stats.
+  EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
+              Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(0));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .parse_query_latency_ms(),
+              Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .scoring_latency_ms(),
+              Eq(1000));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .num_documents_scored(),
+              Eq(0));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .num_fetched_hits_lite_index(),
+              Eq(0));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .num_fetched_hits_main_index(),
+              Eq(0));
+  EXPECT_THAT(search_result_proto.query_stats()
+                  .parent_search_stats()
+                  .num_fetched_hits_integer_index(),
+              Eq(0));
+
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchShouldReturnMultiplePages) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates and inserts 5 documents
+  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+  DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+  DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+  DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(2);
+
+  // Searches and gets the first page, 2 results
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document5;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document4;
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
+  uint64_t next_page_token = search_result_proto.next_page_token();
+  // Since the token is a random number, we don't need to verify
+  expected_search_result_proto.set_next_page_token(next_page_token);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+
+  // Second page, 2 results
+  expected_search_result_proto.clear_results();
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document3;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  search_result_proto = icing.GetNextPage(next_page_token);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+
+  // Third page, 1 result
+  expected_search_result_proto.clear_results();
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document1;
+  // Because there are no more results, we should not return the next page
+  // token.
+  expected_search_result_proto.clear_next_page_token();
+  search_result_proto = icing.GetNextPage(next_page_token);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+
+  // No more results
+  expected_search_result_proto.clear_results();
+  search_result_proto = icing.GetNextPage(next_page_token);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchWithNoScoringShouldReturnMultiplePages) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates and inserts 5 documents
+  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+  DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+  DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+  DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+  search_spec.set_search_type(GetParam());
+
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(2);
+
+  // Searches and gets the first page, 2 results
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document5;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document4;
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
+  uint64_t next_page_token = search_result_proto.next_page_token();
+  // Since the token is a random number, we don't need to verify
+  expected_search_result_proto.set_next_page_token(next_page_token);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+
+  // Second page, 2 results
+  expected_search_result_proto.clear_results();
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document3;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  search_result_proto = icing.GetNextPage(next_page_token);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+
+  // Third page, 1 result
+  expected_search_result_proto.clear_results();
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document1;
+  // Because there are no more results, we should not return the next page
+  // token.
+  expected_search_result_proto.clear_next_page_token();
+  search_result_proto = icing.GetNextPage(next_page_token);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+
+  // No more results
+  expected_search_result_proto.clear_results();
+  search_result_proto = icing.GetNextPage(next_page_token);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchWithUnknownEnabledFeatureShouldReturnError) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+  search_spec.set_search_type(GetParam());
+  search_spec.add_enabled_features("BAD_FEATURE");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto.status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest, ShouldReturnMultiplePagesWithSnippets) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates and inserts 5 documents
+  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+  DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+  DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+  DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(2);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
+
+  // Searches and gets the first page, 2 results with 2 snippets
+  SearchResultProto search_result =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  ASSERT_THAT(search_result.status(), ProtoIsOk());
+  ASSERT_THAT(search_result.results(), SizeIs(2));
+  ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
+
+  const DocumentProto& document_result_1 = search_result.results(0).document();
+  EXPECT_THAT(document_result_1, EqualsProto(document5));
+  const SnippetProto& snippet_result_1 = search_result.results(0).snippet();
+  EXPECT_THAT(snippet_result_1.entries(), SizeIs(1));
+  EXPECT_THAT(snippet_result_1.entries(0).property_name(), Eq("body"));
+  std::string_view content = GetString(
+      &document_result_1, snippet_result_1.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet_result_1.entries(0)),
+              ElementsAre("message body"));
+  EXPECT_THAT(GetMatches(content, snippet_result_1.entries(0)),
+              ElementsAre("message"));
+
+  const DocumentProto& document_result_2 = search_result.results(1).document();
+  EXPECT_THAT(document_result_2, EqualsProto(document4));
+  const SnippetProto& snippet_result_2 = search_result.results(1).snippet();
+  EXPECT_THAT(snippet_result_2.entries(0).property_name(), Eq("body"));
+  content = GetString(&document_result_2,
+                      snippet_result_2.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet_result_2.entries(0)),
+              ElementsAre("message body"));
+  EXPECT_THAT(GetMatches(content, snippet_result_2.entries(0)),
+              ElementsAre("message"));
+
+  // Second page, 2 result with 1 snippet
+  search_result = icing.GetNextPage(search_result.next_page_token());
+  ASSERT_THAT(search_result.status(), ProtoIsOk());
+  ASSERT_THAT(search_result.results(), SizeIs(2));
+  ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
+
+  const DocumentProto& document_result_3 = search_result.results(0).document();
+  EXPECT_THAT(document_result_3, EqualsProto(document3));
+  const SnippetProto& snippet_result_3 = search_result.results(0).snippet();
+  EXPECT_THAT(snippet_result_3.entries(0).property_name(), Eq("body"));
+  content = GetString(&document_result_3,
+                      snippet_result_3.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet_result_3.entries(0)),
+              ElementsAre("message body"));
+  EXPECT_THAT(GetMatches(content, snippet_result_3.entries(0)),
+              ElementsAre("message"));
+
+  EXPECT_THAT(search_result.results(1).document(), EqualsProto(document2));
+  EXPECT_THAT(search_result.results(1).snippet().entries(), IsEmpty());
+
+  // Third page, 1 result with 0 snippets
+  search_result = icing.GetNextPage(search_result.next_page_token());
+  ASSERT_THAT(search_result.status(), ProtoIsOk());
+  ASSERT_THAT(search_result.results(), SizeIs(1));
+  ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+  EXPECT_THAT(search_result.results(0).document(), EqualsProto(document1));
+  EXPECT_THAT(search_result.results(0).snippet().entries(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest, ShouldInvalidateNextPageToken) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+
+  // Searches and gets the first page, 1 result
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
+  uint64_t next_page_token = search_result_proto.next_page_token();
+  // Since the token is a random number, we don't need to verify
+  expected_search_result_proto.set_next_page_token(next_page_token);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+  // Now document1 is still to be fetched.
+
+  // Invalidates token
+  icing.InvalidateNextPageToken(next_page_token);
+
+  // Tries to fetch the second page, no result since it's invalidated
+  expected_search_result_proto.clear_results();
+  expected_search_result_proto.clear_next_page_token();
+  search_result_proto = icing.GetNextPage(next_page_token);
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchIncludesDocumentsBeforeTtl) {
+  SchemaProto schema;
+  auto type = schema.add_types();
+  type->set_schema_type("Message");
+
+  auto body = type->add_properties();
+  body->set_property_name("body");
+  body->set_data_type(PropertyConfigProto::DataType::STRING);
+  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  body->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  body->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
+
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .SetCreationTimestampMs(100)
+                               .SetTtlMs(500)
+                               .Build();
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_search_type(GetParam());
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document;
+
+  // Time just has to be less than the document's creation timestamp (100) + the
+  // document's ttl (500)
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetSystemTimeMilliseconds(400);
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // Check that the document is returned as part of search results
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchDoesntIncludeDocumentsPastTtl) {
+  SchemaProto schema;
+  auto type = schema.add_types();
+  type->set_schema_type("Message");
+
+  auto body = type->add_properties();
+  body->set_property_name("body");
+  body->set_data_type(PropertyConfigProto::DataType::STRING);
+  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  body->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  body->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
+
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .SetCreationTimestampMs(100)
+                               .SetTtlMs(500)
+                               .Build();
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_search_type(GetParam());
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+  // Time just has to be greater than the document's creation timestamp (100) +
+  // the document's ttl (500)
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetSystemTimeMilliseconds(700);
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // Check that the document is not returned as part of search results
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchWorksAfterSchemaTypesCompatiblyModified) {
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("message");
+
+  auto property = type_config->add_properties();
+  property->set_property_name("body");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  DocumentProto message_document =
+      DocumentBuilder()
+          .SetKey("namespace", "message_uri")
+          .SetSchema("message")
+          .AddStringProperty("body", "foo")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+
+  // Make sure we can search for message document
+  SearchSpecProto search_spec;
+  search_spec.set_query("foo");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_search_type(GetParam());
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+  // The message isn't indexed, so we get nothing
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+
+  // With just the schema type filter, we can search for the message
+  search_spec.Clear();
+  search_spec.add_schema_type_filters("message");
+
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      message_document;
+
+  search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                     ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+
+  // Since SchemaTypeIds are assigned based on order in the SchemaProto, this
+  // will force a change in the DocumentStore's cached SchemaTypeIds
+  schema.clear_types();
+  type_config = schema.add_types();
+  type_config->set_schema_type("email");
+
+  // Adding a new indexed property will require reindexing
+  type_config = schema.add_types();
+  type_config->set_schema_type("message");
+
+  property = type_config->add_properties();
+  property->set_property_name("body");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  property->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
+
+  EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  search_spec.Clear();
+  search_spec.set_query("foo");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.add_schema_type_filters("message");
+
+  // We can still search for the message document
+  search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                     ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedByDocumentScore) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 3 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetScore(3)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // Intentionally inserts the documents in the order that is different than
+  // their score order
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+  // "m" will match all 3 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  // Result should be in descending score order
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document3;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document1;
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWorksForNestedSubtypeDocument) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Artist")
+                       .AddParentType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("Company").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("employee")
+                  .SetDataTypeDocument("Person",
+                                       /*index_nested_properties=*/true)
+                  .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a company with a person and an artist.
+  DocumentProto document_company =
+      DocumentBuilder()
+          .SetKey("namespace", "uri")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Company")
+          .AddDocumentProperty("employee",
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri1")
+                                   .SetCreationTimestampMs(1000)
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "name_person")
+                                   .Build(),
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri2")
+                                   .SetCreationTimestampMs(1000)
+                                   .SetSchema("Artist")
+                                   .AddStringProperty("name", "name_artist")
+                                   .AddStringProperty("emailAddress", "email")
+                                   .Build())
+          .Build();
+  ASSERT_THAT(icing.Put(document_company).status(), ProtoIsOk());
+
+  SearchResultProto company_search_result_proto;
+  company_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *company_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document_company;
+
+  SearchResultProto empty_search_result_proto;
+  empty_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_search_type(GetParam());
+
+  // "name_person" should match the company.
+  search_spec.set_query("name_person");
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       company_search_result_proto));
+
+  // "name_artist" should match the company.
+  search_spec.set_query("name_artist");
+  search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                     ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       company_search_result_proto));
+
+  // "email" should not match the company even though the artist has a matched
+  // property. This is because the "employee" property is defined as Person
+  // type, and indexing on document properties should be based on defined types,
+  // instead of subtypes.
+  search_spec.set_query("email");
+  search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                     ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       empty_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchShouldAllowNoScoring) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 3 documents and ensures the relationship of them is:
+  // document1 < document2 < document3
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message1")
+                                .SetScore(1)
+                                .SetCreationTimestampMs(1571111111111)
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message2")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(1572222222222)
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace", "uri/3")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message3")
+                                .SetScore(3)
+                                .SetCreationTimestampMs(1573333333333)
+                                .Build();
+
+  // Intentionally inserts the documents in the order that is different than
+  // their score order
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // "m" will match all 3 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document1;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document3;
+
+  // Results should not be ranked by score but returned in reverse insertion
+  // order.
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchResultShouldBeRankedByCreationTimestamp) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 3 documents and ensures the relationship in terms of creation
+  // timestamp score is: document1 < document2 < document3
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message1")
+                                .SetCreationTimestampMs(1571111111111)
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message2")
+                                .SetCreationTimestampMs(1572222222222)
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace", "uri/3")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message3")
+                                .SetCreationTimestampMs(1573333333333)
+                                .Build();
+
+  // Intentionally inserts the documents in the order that is different than
+  // their score order
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // "m" will match all 3 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  // Result should be in descending timestamp order
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document3;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document1;
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedByUsageCount) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 3 test documents
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // Intentionally inserts the documents in a different order to eliminate the
+  // possibility that the following results are sorted in the default reverse
+  // insertion order.
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // Report usage for doc3 twice and doc2 once. The order will be doc3 > doc2 >
+  // doc1 when ranked by USAGE_TYPE1_COUNT.
+  UsageReport usage_report_doc3 = CreateUsageReport(
+      /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE1);
+  UsageReport usage_report_doc2 = CreateUsageReport(
+      /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE1);
+  ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk());
+
+  // "m" will match all 3 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  // Result should be in descending USAGE_TYPE1_COUNT order
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document3;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document1;
+
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchResultShouldHaveDefaultOrderWithoutUsageCounts) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 3 test documents
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  // "m" will match all 3 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  // None of the documents have usage reports. Result should be in the default
+  // reverse insertion order.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document3;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document1;
+
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchResultShouldBeRankedByUsageTimestamp) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 3 test documents
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // Intentionally inserts the documents in a different order to eliminate the
+  // possibility that the following results are sorted in the default reverse
+  // insertion order.
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // Report usage for doc2 and doc3. The order will be doc3 > doc2 > doc1 when
+  // ranked by USAGE_TYPE1_LAST_USED_TIMESTAMP.
+  UsageReport usage_report_doc2 = CreateUsageReport(
+      /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE1);
+  UsageReport usage_report_doc3 = CreateUsageReport(
+      /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/5000,
+      UsageReport::USAGE_TYPE1);
+  ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
+
+  // "m" will match all 3 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  // Result should be in descending USAGE_TYPE1_LAST_USED_TIMESTAMP order
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document3;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document1;
+
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringOneNamespace) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace1".
+  DocumentProto document = CreateEmailDocument(
+      "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+      "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+                                 "speederia pizza",
+                                 "thin-crust pizza. good and fast.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+      "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+                                 "starbucks coffee",
+                                 "habit. birthday rewards. good coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("coffee OR food");
+  search_spec.set_search_type(GetParam());
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+  // Result should be in descending score order
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+  // Both doc5 and doc7 have "coffee" in name and text sections.
+  // However, doc5 has more matches in the text section.
+  // Documents with "food" are ranked lower as the term "food" is commonly
+  // present in this corpus, and thus, has a lower IDF.
+  EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+              ElementsAre("namespace1/uri5",    // 'coffee' 3 times
+                          "namespace1/uri7",    // 'coffee' 2 times
+                          "namespace1/uri1",    // 'food' 2 times
+                          "namespace1/uri4",    // 'food' 2 times
+                          "namespace1/uri2",    // 'food' 1 time
+                          "namespace1/uri6"));  // 'food' 1 time
+}
+
+TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringOneNamespaceAdvanced) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace1".
+  DocumentProto document = CreateEmailDocument(
+      "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+      "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+                                 "speederia pizza",
+                                 "thin-crust pizza. good and fast.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+      "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+                                 "starbucks coffee",
+                                 "habit. birthday rewards. good coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("coffee OR food");
+  search_spec.set_search_type(GetParam());
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_advanced_scoring_expression("this.relevanceScore() * 2 + 1");
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+  // Result should be in descending score order
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+  // Both doc5 and doc7 have "coffee" in name and text sections.
+  // However, doc5 has more matches in the text section.
+  // Documents with "food" are ranked lower as the term "food" is commonly
+  // present in this corpus, and thus, has a lower IDF.
+  EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+              ElementsAre("namespace1/uri5",    // 'coffee' 3 times
+                          "namespace1/uri7",    // 'coffee' 2 times
+                          "namespace1/uri1",    // 'food' 2 times
+                          "namespace1/uri4",    // 'food' 2 times
+                          "namespace1/uri2",    // 'food' 1 time
+                          "namespace1/uri6"));  // 'food' 1 time
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       Bm25fRelevanceScoringOneNamespaceNotOperator) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace1".
+  DocumentProto document = CreateEmailDocument(
+      "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+      "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri3", /*score=*/23, "speederia pizza",
+      "thin-crust pizza. good and fast. nice coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+      "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+                                 "starbucks coffee",
+                                 "habit. birthday rewards. good coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("coffee -starbucks");
+  search_spec.set_search_type(GetParam());
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+  // Result should be in descending score order
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(
+      GetUrisFromSearchResults(search_result_proto),
+      ElementsAre("namespace1/uri5",    // 'coffee' 3 times, 'starbucks' 0 times
+                  "namespace1/uri3"));  // 'coffee' 1 times, 'starbucks' 0 times
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       Bm25fRelevanceScoringOneNamespaceSectionRestrict) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace1".
+  DocumentProto document = CreateEmailDocument(
+      "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+      "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+                                 "speederia pizza",
+                                 "thin-crust pizza. good and fast.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document =
+      CreateEmailDocument("namespace1", "namespace1/uri5", /*score=*/18,
+                          "peets coffee, best coffee",
+                          "espresso. decaf. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri7", /*score=*/4, "starbucks",
+      "habit. birthday rewards. good coffee. brewed coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("subject:coffee OR body:food");
+  search_spec.set_search_type(GetParam());
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+  // Result should be in descending score order
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+  // The term frequencies of "coffee" and "food" are calculated respectively
+  // from the subject section and the body section.
+  // Documents with "food" are ranked lower as the term "food" is commonly
+  // present in this corpus, and thus, has a lower IDF.
+  EXPECT_THAT(
+      GetUrisFromSearchResults(search_result_proto),
+      ElementsAre("namespace1/uri5",    // 'coffee' 2 times in section subject
+                  "namespace1/uri1",    // 'food' 2 times in section body
+                  "namespace1/uri4",    // 'food' 2 times in section body
+                  "namespace1/uri2",    // 'food' 1 time in section body
+                  "namespace1/uri6"));  // 'food' 1 time in section body
+}
+
+TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringTwoNamespaces) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace1".
+  DocumentProto document = CreateEmailDocument(
+      "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+      "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+                                 "speederia pizza",
+                                 "thin-crust pizza. good and fast.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+      "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+                                 "starbucks coffee",
+                                 "habit. birthday rewards. good coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace2".
+  document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10,
+                                 "sushi belmont",
+                                 "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23,
+                                 "speederia pizza",
+                                 "thin-crust pizza. good and fast.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee",
+      "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace2", "namespace2/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4,
+                                 "starbucks coffee", "good coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("coffee OR food");
+  search_spec.set_search_type(GetParam());
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+  ResultSpecProto result_spec_proto;
+  result_spec_proto.set_num_per_page(16);
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec_proto);
+
+  // Result should be in descending score order
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+  // The two corpora have the same documents except for document 7, which in
+  // "namespace2" is much shorter than the average dcoument length, so it is
+  // boosted.
+  EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+              ElementsAre("namespace2/uri7",    // 'coffee' 2 times, short doc
+                          "namespace1/uri5",    // 'coffee' 3 times
+                          "namespace2/uri5",    // 'coffee' 3 times
+                          "namespace1/uri7",    // 'coffee' 2 times
+                          "namespace1/uri1",    // 'food' 2 times
+                          "namespace2/uri1",    // 'food' 2 times
+                          "namespace1/uri4",    // 'food' 2 times
+                          "namespace2/uri4",    // 'food' 2 times
+                          "namespace1/uri2",    // 'food' 1 time
+                          "namespace2/uri2",    // 'food' 1 time
+                          "namespace1/uri6",    // 'food' 1 time
+                          "namespace2/uri6"));  // 'food' 1 time
+}
+
+TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringWithNamespaceFilter) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace1".
+  DocumentProto document = CreateEmailDocument(
+      "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+      "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+                                 "speederia pizza",
+                                 "thin-crust pizza. good and fast.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+      "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+                                 "starbucks coffee",
+                                 "habit. birthday rewards. good coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace2".
+  document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10,
+                                 "sushi belmont",
+                                 "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23,
+                                 "speederia pizza",
+                                 "thin-crust pizza. good and fast.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee",
+      "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace2", "namespace2/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4,
+                                 "starbucks coffee", "good coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("coffee OR food");
+  search_spec.set_search_type(GetParam());
+  // Now query only corpus 2
+  search_spec.add_namespace_filters("namespace2");
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+  search_result_proto = icing.Search(search_spec, scoring_spec,
+                                     ResultSpecProto::default_instance());
+
+  // Result from namespace "namespace2" should be in descending score order
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+  // Both doc5 and doc7 have "coffee" in name and text sections.
+  // Even though doc5 has more matches in the text section, doc7's length is
+  // much shorter than the average corpus's length, so it's being boosted.
+  // Documents with "food" are ranked lower as the term "food" is commonly
+  // present in this corpus, and thus, has a lower IDF.
+  EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+              ElementsAre("namespace2/uri7",    // 'coffee' 2 times, short doc
+                          "namespace2/uri5",    // 'coffee' 3 times
+                          "namespace2/uri1",    // 'food' 2 times
+                          "namespace2/uri4",    // 'food' 2 times
+                          "namespace2/uri2",    // 'food' 1 time
+                          "namespace2/uri6"));  // 'food' 1 time
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchResultShouldHaveDefaultOrderWithoutUsageTimestamp) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 3 test documents
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  // "m" will match all 3 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  // None of the documents have usage reports. Result should be in the default
+  // reverse insertion order.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document3;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document1;
+
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedAscendingly) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 3 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetScore(3)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  // Intentionally inserts the documents in the order that is different than
+  // their score order
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+  // "m" will match all 3 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  // Result should be in ascending score order
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document1;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document3;
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  scoring_spec.set_order_by(ScoringSpecProto::Order::ASC);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchResultGroupingDuplicateNamespaceShouldReturnError) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // "m" will match all 2 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  // Specify "namespace1" twice. This should result in an error.
+  ResultSpecProto result_spec;
+  result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_namespace_("namespace1");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace2");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace1");
+  result_grouping = result_spec.add_result_groupings();
+  entry = result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_namespace_("namespace1");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(search_result_proto.status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchResultGroupingDuplicateSchemaShouldReturnError) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // "m" will match all 2 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  // Specify "Message" twice. This should result in an error.
+  ResultSpecProto result_spec;
+  result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_schema("Message");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_schema("nonexistentMessage");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  entry = result_grouping->add_entry_groupings();
+  entry->set_schema("Message");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(search_result_proto.status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchResultGroupingDuplicateNamespaceAndSchemaSchemaShouldReturnError) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // "m" will match all 2 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  // Specify "namespace1xMessage" twice. This should result in an error.
+  ResultSpecProto result_spec;
+  result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_namespace_("namespace1");
+  entry->set_schema("Message");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace2");
+  entry->set_schema("Message");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace1");
+  entry->set_schema("Message");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace1");
+  entry->set_schema("Message");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(search_result_proto.status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchResultGroupingNonPositiveMaxResultsShouldReturnError) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // "m" will match all 2 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  // Specify zero results. This should result in an error.
+  ResultSpecProto result_spec;
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(0);
+  entry->set_namespace_("namespace1");
+  entry->set_schema("Message");
+  result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace2");
+  entry->set_schema("Message");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(search_result_proto.status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+
+  // Specify negative results. This should result in an error.
+  result_spec.mutable_result_groupings(0)->set_max_results(-1);
+  EXPECT_THAT(search_result_proto.status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchResultGroupingMultiNamespaceGrouping) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 3 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3 < document4 < document5 <
+  // document6
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetScore(3)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document4 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/4")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(4)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document5 =
+      DocumentBuilder()
+          .SetKey("namespace3", "uri/5")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetScore(5)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document6 =
+      DocumentBuilder()
+          .SetKey("namespace3", "uri/6")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(6)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
+
+  // "m" will match all 6 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  ResultSpecProto result_spec;
+  result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_namespace_("namespace1");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(2);
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace2");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace3");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+
+  // The last result (document1) in namespace "namespace1" should not be
+  // included. "namespace2" and "namespace3" are grouped together. So only the
+  // two highest scored documents between the two (both of which are in
+  // "namespace3") should be returned.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document6;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document5;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchResultGroupingMultiSchemaGrouping) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeDocument(
+                                            "Person",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetScore(1)
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject", "foo")
+          .AddDocumentProperty("sender", DocumentBuilder()
+                                             .SetKey("namespace", "uri1-sender")
+                                             .SetSchema("Person")
+                                             .AddStringProperty("name", "foo")
+                                             .Build())
+          .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri2")
+                                .SetSchema("Message")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("body", "fo")
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "uri3")
+                                .SetSchema("Message")
+                                .SetScore(3)
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("body", "fo")
+                                .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  // "f" will match all 3 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("f");
+  search_spec.set_search_type(GetParam());
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  ResultSpecProto result_spec;
+  result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_schema("Message");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("Email");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+
+  // Each of the highest scored documents of schema type "Message" (document3)
+  // and "Email" (document1) should be returned.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document3;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document1;
+
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchResultGroupingMultiNamespaceAndSchemaGrouping) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 3 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3 < document4 < document5 <
+  // document6
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetScore(3)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document4 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/4")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(4)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document5 =
+      DocumentBuilder()
+          .SetKey("namespace3", "uri/5")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetScore(5)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document6 =
+      DocumentBuilder()
+          .SetKey("namespace3", "uri/6")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(6)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
+
+  // "m" will match all 6 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  ResultSpecProto result_spec;
+  result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_namespace_("namespace1");
+  entry->set_schema("Message");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace2");
+  entry->set_schema("Message");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace3");
+  entry->set_schema("Message");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+
+  // The three highest scored documents that fit the criteria of
+  // "namespace1xMessage" (document2), "namespace2xMessage" (document4),
+  // and "namespace3xMessage" (document6) should be returned.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document6;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document4;
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchResultGroupingNonexistentNamespaceShouldBeIgnored) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // "m" will match all 2 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  ResultSpecProto result_spec;
+  result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_namespace_("namespace1");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("nonexistentNamespace");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+
+  // Only the top ranked document in "namespace" (document2), should be
+  // returned. The presence of "nonexistentNamespace" in the same result
+  // grouping should have no effect.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchResultGroupingNonexistentSchemaShouldBeIgnored) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // "m" will match all 2 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  ResultSpecProto result_spec;
+  result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_schema("Message");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_schema("nonexistentMessage");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+
+  // Only the top ranked document in "Message" (document2), should be
+  // returned. The presence of "nonexistentMessage" in the same result
+  // grouping should have no effect.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document2;
+
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchResultGroupingNonexistentNamespaceAndSchemaShouldBeIgnored) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetScore(3)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  DocumentProto document4 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri/4")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message4")
+          .SetScore(4)
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+
+  // "m" will match all 2 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  search_spec.set_search_type(GetParam());
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  ResultSpecProto result_spec;
+  result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_namespace_("namespace2");
+  entry->set_schema("Message");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_schema("namespace1");
+  entry->set_schema("nonexistentMessage");
+
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec);
+
+  // Only the top ranked document in "namespace2xMessage" (document4), should be
+  // returned. The presence of "namespace1xnonexistentMessage" in the same
+  // result grouping should have no effect. If either the namespace or the
+  // schema type is nonexistent, the entire entry will be ignored.
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document4;
+
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SnippetNormalization) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "MDI zurich Team Meeting")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "mdi Zürich Team Meeting")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("mdi Zürich");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
+
+  SearchResultProto results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  ASSERT_THAT(results.results(), SizeIs(2));
+  const DocumentProto& result_document_1 = results.results(0).document();
+  const SnippetProto& result_snippet_1 = results.results(0).snippet();
+  EXPECT_THAT(result_document_1, EqualsProto(document_two));
+  EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
+  EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
+  std::string_view content = GetString(
+      &result_document_1, result_snippet_1.entries(0).property_name());
+  EXPECT_THAT(
+      GetWindows(content, result_snippet_1.entries(0)),
+      ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
+  EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
+              ElementsAre("mdi", "Zürich"));
+
+  const DocumentProto& result_document_2 = results.results(1).document();
+  const SnippetProto& result_snippet_2 = results.results(1).snippet();
+  EXPECT_THAT(result_document_2, EqualsProto(document_one));
+  EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
+  EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
+  content = GetString(&result_document_2,
+                      result_snippet_2.entries(0).property_name());
+  EXPECT_THAT(
+      GetWindows(content, result_snippet_2.entries(0)),
+      ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
+  EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
+              ElementsAre("MDI", "zurich"));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SnippetNormalizationPrefix) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "MDI zurich Team Meeting")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "mdi Zürich Team Meeting")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("md Zür");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
+
+  SearchResultProto results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  ASSERT_THAT(results.results(), SizeIs(2));
+  const DocumentProto& result_document_1 = results.results(0).document();
+  const SnippetProto& result_snippet_1 = results.results(0).snippet();
+  EXPECT_THAT(result_document_1, EqualsProto(document_two));
+  EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
+  EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
+  std::string_view content = GetString(
+      &result_document_1, result_snippet_1.entries(0).property_name());
+  EXPECT_THAT(
+      GetWindows(content, result_snippet_1.entries(0)),
+      ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
+  EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
+              ElementsAre("mdi", "Zürich"));
+
+  const DocumentProto& result_document_2 = results.results(1).document();
+  const SnippetProto& result_snippet_2 = results.results(1).snippet();
+  EXPECT_THAT(result_document_2, EqualsProto(document_one));
+  EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
+  EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
+  content = GetString(&result_document_2,
+                      result_snippet_2.entries(0).property_name());
+  EXPECT_THAT(
+      GetWindows(content, result_snippet_2.entries(0)),
+      ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
+  EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
+              ElementsAre("MDI", "zurich"));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SnippetSectionRestrict) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "MDI zurich Team Meeting")
+          .AddStringProperty("body", "MDI zurich Team Meeting")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "MDI zurich trip")
+          .AddStringProperty("body", "Let's travel to zurich")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("body:Zür");
+  search_spec->set_search_type(GetParam());
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+  result_spec->set_num_per_page(1);
+  result_spec->mutable_snippet_spec()->set_max_window_utf32_length(64);
+  result_spec->mutable_snippet_spec()->set_num_matches_per_property(10);
+  result_spec->mutable_snippet_spec()->set_num_to_snippet(10);
+
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  *scoring_spec = GetDefaultScoringSpec();
+
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  ASSERT_THAT(results.results(), SizeIs(1));
+
+  const DocumentProto& result_document_two = results.results(0).document();
+  const SnippetProto& result_snippet_two = results.results(0).snippet();
+  EXPECT_THAT(result_document_two, EqualsProto(document_two));
+  EXPECT_THAT(result_snippet_two.entries(), SizeIs(1));
+  EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body"));
+  std::string_view content = GetString(
+      &result_document_two, result_snippet_two.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)),
+              ElementsAre("Let's travel to zurich"));
+  EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)),
+              ElementsAre("zurich"));
+
+  search_spec.reset();
+  scoring_spec.reset();
+  result_spec.reset();
+
+  results = icing.GetNextPage(results.next_page_token());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  ASSERT_THAT(results.results(), SizeIs(1));
+
+  const DocumentProto& result_document_one = results.results(0).document();
+  const SnippetProto& result_snippet_one = results.results(0).snippet();
+  EXPECT_THAT(result_document_one, EqualsProto(document_one));
+  EXPECT_THAT(result_snippet_one.entries(), SizeIs(1));
+  EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body"));
+  content = GetString(&result_document_one,
+                      result_snippet_one.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)),
+              ElementsAre("MDI zurich Team Meeting"));
+  EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)),
+              ElementsAre("zurich"));
+}
+
+TEST_P(IcingSearchEngineSearchTest, Hyphens) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SchemaProto schema;
+  SchemaTypeConfigProto* type = schema.add_types();
+  type->set_schema_type("MyType");
+  PropertyConfigProto* prop = type->add_properties();
+  prop->set_property_name("foo");
+  prop->set_data_type(PropertyConfigProto::DataType::STRING);
+  prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  prop->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  prop->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("MyType")
+          .AddStringProperty("foo", "foo bar-baz bat")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("MyType")
+          .AddStringProperty("foo", "bar for baz bat-man")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("foo:bar-baz");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  SearchResultProto results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  ASSERT_THAT(results.results(), SizeIs(2));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithProjectionEmptyFieldPath) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add two email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "shopgirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .AddStringProperty("subject", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  // 2. Issue a query that will match those documents and use an empty field
+  // mask to request NO properties.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("hello");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  // Retrieve only one result at a time to make sure that projection works when
+  // retrieving all pages.
+  result_spec.set_num_per_page(1);
+  TypePropertyMask* email_field_mask = result_spec.add_type_property_masks();
+  email_field_mask->set_schema_type("Email");
+  email_field_mask->add_paths("");
+
+  SearchResultProto results =
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+
+  // 3. Verify that the returned results contain no properties.
+  DocumentProto projected_document_two = DocumentBuilder()
+                                             .SetKey("namespace", "uri2")
+                                             .SetCreationTimestampMs(1000)
+                                             .SetSchema("Email")
+                                             .Build();
+  EXPECT_THAT(results.results(0).document(),
+              EqualsProto(projected_document_two));
+
+  results = icing.GetNextPage(results.next_page_token());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+  DocumentProto projected_document_one = DocumentBuilder()
+                                             .SetKey("namespace", "uri1")
+                                             .SetCreationTimestampMs(1000)
+                                             .SetSchema("Email")
+                                             .Build();
+  EXPECT_THAT(results.results(0).document(),
+              EqualsProto(projected_document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithProjectionMultipleFieldPaths) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add two email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "shopgirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .AddStringProperty("subject", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  // 2. Issue a query that will match those documents and request only
+  // 'sender.name' and 'subject' properties.
+  // Create all of search_spec, result_spec and scoring_spec as objects with
+  // scope that will end before the call to GetNextPage to ensure that the
+  // implementation isn't relying on references to any of them.
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("hello");
+  search_spec->set_search_type(GetParam());
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+  // Retrieve only one result at a time to make sure that projection works when
+  // retrieving all pages.
+  result_spec->set_num_per_page(1);
+  TypePropertyMask* email_field_mask = result_spec->add_type_property_masks();
+  email_field_mask->set_schema_type("Email");
+  email_field_mask->add_paths("sender.name");
+  email_field_mask->add_paths("subject");
+
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  *scoring_spec = GetDefaultScoringSpec();
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+
+  // 3. Verify that the first returned result only contains the 'sender.name'
+  // property.
+  DocumentProto projected_document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri2")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "Tom Hanks")
+                                   .Build())
+          .AddStringProperty("subject", "Goodnight Moon!")
+          .Build();
+  EXPECT_THAT(results.results(0).document(),
+              EqualsProto(projected_document_two));
+
+  // 4. Now, delete all of the specs used in the search. GetNextPage should have
+  // no problem because it shouldn't be keeping any references to them.
+  search_spec.reset();
+  result_spec.reset();
+  scoring_spec.reset();
+
+  // 5. Verify that the second returned result only contains the 'sender.name'
+  // property.
+  results = icing.GetNextPage(results.next_page_token());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri1")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "Meg Ryan")
+                                   .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .Build();
+  EXPECT_THAT(results.results(0).document(),
+              EqualsProto(projected_document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFilters) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add two email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "hellogirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .AddStringProperty("subject", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  // 2. Issue a query with property filters of sender.name and subject for the
+  // Email schema type.
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("hello");
+  search_spec->set_search_type(GetParam());
+  TypePropertyMask* email_property_filters =
+      search_spec->add_type_property_filters();
+  email_property_filters->set_schema_type("Email");
+  email_property_filters->add_paths("sender.name");
+  email_property_filters->add_paths("subject");
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  *scoring_spec = GetDefaultScoringSpec();
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+
+  // 3. Verify that only the first document is returned. Although 'hello' is
+  // present in document_two, it shouldn't be in the result since 'hello' is not
+  // in the specified property filter.
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, EmptySearchWithPropertyFilter) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add two email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "hellogirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .AddStringProperty("subject", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  // 2. Issue a query with a property filter
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("");
+  search_spec->set_search_type(GetParam());
+  TypePropertyMask* email_property_filters =
+      search_spec->add_type_property_filters();
+  email_property_filters->set_schema_type("Email");
+  email_property_filters->add_paths("subject");
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+
+  // 3. Verify that both documents are returned.
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  *scoring_spec = GetDefaultScoringSpec();
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, EmptySearchWithEmptyPropertyFilter) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add two email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "hellogirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .AddStringProperty("subject", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  // 2. Issue a query with a property filter
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("");
+  search_spec->set_search_type(GetParam());
+  TypePropertyMask* email_property_filters =
+      search_spec->add_type_property_filters();
+  // Add empty list for Email's property filters
+  email_property_filters->set_schema_type("Email");
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+
+  // 3. Verify that both documents are returned.
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  *scoring_spec = GetDefaultScoringSpec();
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersOnMultipleSchema) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  // Add Person and Organization schema with a property 'name' in both.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Organization")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("address")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  // 1. Add person document
+  DocumentProto person_document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Meg Ryan")
+          .AddStringProperty("emailAddress", "hellogirl@aol.com")
+          .Build();
+  ASSERT_THAT(icing.Put(person_document).status(), ProtoIsOk());
+
+  // 1. Add organization document
+  DocumentProto organization_document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Organization")
+          .AddStringProperty("name", "Meg Corp")
+          .AddStringProperty("address", "Universal street")
+          .Build();
+  ASSERT_THAT(icing.Put(organization_document).status(), ProtoIsOk());
+
+  // 2. Issue a query with property filters. Person schema has name in it's
+  // property filter but Organization schema doesn't.
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("Meg");
+  search_spec->set_search_type(GetParam());
+  TypePropertyMask* person_property_filters =
+      search_spec->add_type_property_filters();
+  person_property_filters->set_schema_type("Person");
+  person_property_filters->add_paths("name");
+  TypePropertyMask* organization_property_filters =
+      search_spec->add_type_property_filters();
+  organization_property_filters->set_schema_type("Organization");
+  organization_property_filters->add_paths("address");
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  *scoring_spec = GetDefaultScoringSpec();
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+
+  // 3. Verify that only the person document is returned. Although 'Meg' is
+  // present in organization document, it shouldn't be in the result since
+  // the name field is not specified in the Organization property filter.
+  EXPECT_THAT(results.results(0).document(), EqualsProto(person_document));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithWildcardPropertyFilters) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add two email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "hellogirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .AddStringProperty("subject", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  // 2. Issue a query with property filters of sender.name and subject for the
+  // wildcard(*) schema type.
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("hello");
+  search_spec->set_search_type(GetParam());
+  TypePropertyMask* wildcard_property_filters =
+      search_spec->add_type_property_filters();
+  wildcard_property_filters->set_schema_type("*");
+  wildcard_property_filters->add_paths("sender.name");
+  wildcard_property_filters->add_paths("subject");
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  *scoring_spec = GetDefaultScoringSpec();
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+
+  // 3. Verify that only the first document is returned since the second
+  // document doesn't contain the word 'hello' in either of fields specified in
+  // the property filter. This confirms that the property filters for the
+  // wildcard entry have been applied to the Email schema as well.
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithMixedPropertyFilters) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add two email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "hellogirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .AddStringProperty("subject", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  // 2. Issue a query with property filters of sender.name and subject for the
+  // wildcard(*) schema type plus property filters of sender.name and body for
+  // the Email schema type.
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("hello");
+  search_spec->set_search_type(GetParam());
+  TypePropertyMask* wildcard_property_filters =
+      search_spec->add_type_property_filters();
+  wildcard_property_filters->set_schema_type("*");
+  wildcard_property_filters->add_paths("sender.name");
+  wildcard_property_filters->add_paths("subject");
+  TypePropertyMask* email_property_filters =
+      search_spec->add_type_property_filters();
+  email_property_filters->set_schema_type("Email");
+  email_property_filters->add_paths("sender.name");
+  email_property_filters->add_paths("body");
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  *scoring_spec = GetDefaultScoringSpec();
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+
+  // 3. Verify that only the second document is returned since the first
+  // document doesn't contain the word 'hello' in either of fields sender.name
+  // or body. This confirms that the property filters specified for Email schema
+  // have been applied and the ones specified for wildcard entry have been
+  // ignored.
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithNonApplicablePropertyFilters) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add two email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "hellogirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .AddStringProperty("subject", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  // 2. Issue a query with property filters of sender.name and subject for an
+  // unknown schema type.
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("hello");
+  search_spec->set_search_type(GetParam());
+  TypePropertyMask* email_property_filters =
+      search_spec->add_type_property_filters();
+  email_property_filters->set_schema_type("unknown");
+  email_property_filters->add_paths("sender.name");
+  email_property_filters->add_paths("subject");
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  *scoring_spec = GetDefaultScoringSpec();
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(2));
+
+  // 3. Verify that both the documents are returned since each of them have the
+  // word 'hello' in at least 1 property. The second document being returned
+  // confirms that the body field was searched and the specified property
+  // filters were not applied to the Email schema type.
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithEmptyPropertyFilter) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // 1. Add two email documents
+  DocumentProto document_one = DocumentBuilder()
+                                   .SetKey("namespace", "uri1")
+                                   .SetCreationTimestampMs(1000)
+                                   .SetSchema("Message")
+                                   .AddStringProperty("body", "Hello World!")
+                                   .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  // 2. Issue a query with empty property filter for Message schema.
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("hello");
+  search_spec->set_search_type(GetParam());
+  TypePropertyMask* message_property_filters =
+      search_spec->add_type_property_filters();
+  message_property_filters->set_schema_type("Message");
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  *scoring_spec = GetDefaultScoringSpec();
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+
+  // 3. Verify that no documents are returned. Although 'hello' is present in
+  // the indexed document, it shouldn't be returned since the Message property
+  // filter doesn't allow any properties to be searched.
+  ASSERT_THAT(results.results(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchWithPropertyFilterHavingInvalidProperty) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // 1. Add two email documents
+  DocumentProto document_one = DocumentBuilder()
+                                   .SetKey("namespace", "uri1")
+                                   .SetCreationTimestampMs(1000)
+                                   .SetSchema("Message")
+                                   .AddStringProperty("body", "Hello World!")
+                                   .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  // 2. Issue a query with property filter having invalid/unknown property for
+  // Message schema.
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("hello");
+  search_spec->set_search_type(GetParam());
+  TypePropertyMask* message_property_filters =
+      search_spec->add_type_property_filters();
+  message_property_filters->set_schema_type("Message");
+  message_property_filters->add_paths("unknown");
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  *scoring_spec = GetDefaultScoringSpec();
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+
+  // 3. Verify that no documents are returned. Although 'hello' is present in
+  // the indexed document, it shouldn't be returned since the Message property
+  // filter doesn't allow any valid properties to be searched. Any
+  // invalid/unknown properties specified in the property filters will be
+  // ignored while searching.
+  ASSERT_THAT(results.results(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersWithNesting) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add two email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "hellogirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .AddStringProperty("subject", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  // 2. Issue a query with property filter of sender.emailAddress for the Email
+  // schema type.
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("hello");
+  search_spec->set_search_type(GetParam());
+  TypePropertyMask* email_property_filters =
+      search_spec->add_type_property_filters();
+  email_property_filters->set_schema_type("Email");
+  email_property_filters->add_paths("sender.emailAddress");
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  *scoring_spec = GetDefaultScoringSpec();
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+
+  // 3. Verify that only the first document is returned since the second
+  // document doesn't contain the word 'hello' in sender.emailAddress. The first
+  // document being returned confirms that the nested property
+  // sender.emailAddress was actually searched.
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchWithPropertyFilter_RelevanceScoreUnaffectedByExcludedSectionHits) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add two email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri1")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Hello Ryan")
+                            .AddStringProperty("emailAddress", "hello@aol.com")
+                            .Build())
+          .AddStringProperty("subject", "Hello Hello!")
+          .AddStringProperty("body", "hello1 hello2 hello3 hello4 hello5")
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "world@aol.com")
+                            .Build())
+          .AddStringProperty("subject", "Hello Hello!")
+          .AddStringProperty("body", "one1 two2 three3 four4 five5")
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  // 2. Issue a query with a property filter
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("Hello");
+  search_spec->set_search_type(GetParam());
+  TypePropertyMask* email_property_filters =
+      search_spec->add_type_property_filters();
+  email_property_filters->set_schema_type("Email");
+  email_property_filters->add_paths("subject");
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+
+  // 3. Verify that both documents are returned and have equal relevance score
+  // Note, the total number of tokens must be equal in the documents
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  scoring_spec->set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  ASSERT_THAT(results.results(), SizeIs(2));
+  EXPECT_THAT(results.results(0).score(), DoubleEq(results.results(1).score()));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       SearchWithPropertyFilter_ExcludingSectionsWithHitsLowersRelevanceScore) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add an email document
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri1")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Hello Ryan")
+                            .AddStringProperty("emailAddress", "hello@aol.com")
+                            .Build())
+          .AddStringProperty("subject", "Hello Hello!")
+          .AddStringProperty("body", "hello hello hello hello hello")
+          .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  // 2. Issue a query without property filter
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_query("Hello");
+  search_spec->set_search_type(GetParam());
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+
+  // 3. Get the relevance score without property filter
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  scoring_spec->set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  ASSERT_THAT(results.results(), SizeIs(1));
+  double original_relevance_score = results.results(0).score();
+
+  // 4. Relevance score with property filter should be lower
+  TypePropertyMask* email_property_filters =
+      search_spec->add_type_property_filters();
+  email_property_filters->set_schema_type("Email");
+  email_property_filters->add_paths("subject");
+  results = icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  ASSERT_THAT(results.results(), SizeIs(1));
+  EXPECT_THAT(results.results(0).score(), Lt(original_relevance_score));
+}
+
+TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(5);
+
+  // Set index merge size to 6 hits. This will cause document1, document2,
+  // document3's hits being merged into the main index, and document4,
+  // document5's hits will remain in the lite index.
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_index_merge_size(sizeof(TermIdHitPair::Value) * 6);
+
+  TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Creates and inserts 5 documents
+  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+  DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+  DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+  DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.add_namespace_filters("namespace");
+  search_spec.add_schema_type_filters(document1.schema());
+  search_spec.set_query("message");
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(2);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
+
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+  // Searches and gets the first page, 2 results with 2 snippets
+  SearchResultProto search_result =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  ASSERT_THAT(search_result.status(), ProtoIsOk());
+  ASSERT_THAT(search_result.results(), SizeIs(2));
+  ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken));
+
+  // Check the stats
+  // TODO(b/305098009): deprecate search-related flat fields in query_stats.
+  QueryStatsProto exp_stats;
+  exp_stats.set_query_length(7);
+  exp_stats.set_num_terms(1);
+  exp_stats.set_num_namespaces_filtered(1);
+  exp_stats.set_num_schema_types_filtered(1);
+  exp_stats.set_ranking_strategy(
+      ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+  exp_stats.set_is_first_page(true);
+  exp_stats.set_requested_page_size(2);
+  exp_stats.set_num_results_returned_current_page(2);
+  exp_stats.set_num_documents_scored(5);
+  exp_stats.set_num_results_with_snippets(2);
+  exp_stats.set_latency_ms(5);
+  exp_stats.set_parse_query_latency_ms(5);
+  exp_stats.set_scoring_latency_ms(5);
+  exp_stats.set_ranking_latency_ms(5);
+  exp_stats.set_document_retrieval_latency_ms(5);
+  exp_stats.set_lock_acquisition_latency_ms(5);
+  exp_stats.set_num_joined_results_returned_current_page(0);
+
+  QueryStatsProto::SearchStats* exp_parent_search_stats =
+      exp_stats.mutable_parent_search_stats();
+  exp_parent_search_stats->set_query_length(7);
+  exp_parent_search_stats->set_num_terms(1);
+  exp_parent_search_stats->set_num_namespaces_filtered(1);
+  exp_parent_search_stats->set_num_schema_types_filtered(1);
+  exp_parent_search_stats->set_ranking_strategy(
+      ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+  exp_parent_search_stats->set_num_documents_scored(5);
+  exp_parent_search_stats->set_parse_query_latency_ms(5);
+  exp_parent_search_stats->set_scoring_latency_ms(5);
+  exp_parent_search_stats->set_num_fetched_hits_lite_index(2);
+  exp_parent_search_stats->set_num_fetched_hits_main_index(3);
+  exp_parent_search_stats->set_num_fetched_hits_integer_index(0);
+
+  EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+  // Second page, 2 result with 1 snippet
+  search_result = icing.GetNextPage(search_result.next_page_token());
+  ASSERT_THAT(search_result.status(), ProtoIsOk());
+  ASSERT_THAT(search_result.results(), SizeIs(2));
+  ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
+
+  exp_stats = QueryStatsProto();
+  exp_stats.set_is_first_page(false);
+  exp_stats.set_requested_page_size(2);
+  exp_stats.set_num_results_returned_current_page(2);
+  exp_stats.set_num_results_with_snippets(1);
+  exp_stats.set_latency_ms(5);
+  exp_stats.set_document_retrieval_latency_ms(5);
+  exp_stats.set_lock_acquisition_latency_ms(5);
+  exp_stats.set_num_joined_results_returned_current_page(0);
+  EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+  // Third page, 1 result with 0 snippets
+  search_result = icing.GetNextPage(search_result.next_page_token());
+  ASSERT_THAT(search_result.status(), ProtoIsOk());
+  ASSERT_THAT(search_result.results(), SizeIs(1));
+  ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+  exp_stats = QueryStatsProto();
+  exp_stats.set_is_first_page(false);
+  exp_stats.set_requested_page_size(2);
+  exp_stats.set_num_results_returned_current_page(1);
+  exp_stats.set_num_results_with_snippets(0);
+  exp_stats.set_latency_ms(5);
+  exp_stats.set_document_retrieval_latency_ms(5);
+  exp_stats.set_lock_acquisition_latency_ms(5);
+  exp_stats.set_num_joined_results_returned_current_page(0);
+  EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(5);
+
+  // Set index merge size to 13 hits. This will cause person1, person2, email1,
+  // email2, email3's hits being merged into the main index, and person3,
+  // email4's hits will remain in the lite index.
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_index_merge_size(sizeof(TermIdHitPair::Value) * 13);
+
+  TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("firstName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("lastName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("personQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  DocumentProto person1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person1")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first1")
+          .AddStringProperty("lastName", "last1")
+          .AddStringProperty("emailAddress", "email1@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(1)
+          .Build();
+  DocumentProto person2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person2")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first2")
+          .AddStringProperty("lastName", "last2")
+          .AddStringProperty("emailAddress", "email2@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(2)
+          .Build();
+  DocumentProto person3 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person3")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first3")
+          .AddStringProperty("lastName", "last3")
+          .AddStringProperty("emailAddress", "email3@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(3)
+          .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(3)
+          .Build();
+  DocumentProto email2 =
+      DocumentBuilder()
+          .SetKey("namespace", "email2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 2")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(2)
+          .Build();
+  DocumentProto email3 =
+      DocumentBuilder()
+          .SetKey("namespace", "email3")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 3")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(1)
+          .Build();
+  DocumentProto email4 =
+      DocumentBuilder()
+          .SetKey("namespace", "email4")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 4")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(0)
+          .Build();
+
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email4).status(), ProtoIsOk());
+
+  // Parent SearchSpec
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("firstName:first");
+  search_spec.set_search_type(GetParam());
+
+  // JoinSpec
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("personQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+  nested_search_spec->set_query("subject:test");
+  nested_search_spec->set_search_type(GetParam());
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  // Parent ScoringSpec
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE);
+  scoring_spec.set_order_by(ScoringSpecProto::Order::DESC);
+
+  // Parent ResultSpec
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  // Since we:
+  // - Use COUNT for aggregation scoring strategy.
+  // - (Default) use DOCUMENT_SCORE to score child documents.
+  // - (Default) use DESC as the ranking order.
+  //
+  // person1 with [email1, email2, email4] should have the highest aggregated
+  // score (3) and be returned first. person2 with [email3] (aggregated score =
+  // 1) should be the second, and person3 with no child (aggregated score = 0)
+  // should be the last.
+  SearchResultProto expected_result1;
+  expected_result1.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto1 =
+      expected_result1.mutable_results()->Add();
+  *result_proto1->mutable_document() = person1;
+  *result_proto1->mutable_joined_results()->Add()->mutable_document() = email1;
+  *result_proto1->mutable_joined_results()->Add()->mutable_document() = email2;
+  *result_proto1->mutable_joined_results()->Add()->mutable_document() = email4;
+
+  SearchResultProto expected_result2;
+  expected_result2.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_google::protobuf =
+      expected_result2.mutable_results()->Add();
+  *result_google::protobuf->mutable_document() = person2;
+  *result_google::protobuf->mutable_joined_results()->Add()->mutable_document() = email3;
+
+  SearchResultProto expected_result3;
+  expected_result3.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto3 =
+      expected_result3.mutable_results()->Add();
+  *result_proto3->mutable_document() = person3;
+
+  SearchResultProto search_result =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  uint64_t next_page_token = search_result.next_page_token();
+  EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+  expected_result1.set_next_page_token(next_page_token);
+  ASSERT_THAT(search_result,
+              EqualsSearchResultIgnoreStatsAndScores(expected_result1));
+
+  // Check the stats
+  // TODO(b/305098009): deprecate search-related flat fields in query_stats.
+  QueryStatsProto exp_stats;
+  exp_stats.set_query_length(15);
+  exp_stats.set_num_terms(1);
+  exp_stats.set_num_namespaces_filtered(0);
+  exp_stats.set_num_schema_types_filtered(0);
+  exp_stats.set_ranking_strategy(
+      ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE);
+  exp_stats.set_is_first_page(true);
+  exp_stats.set_requested_page_size(1);
+  exp_stats.set_num_results_returned_current_page(1);
+  exp_stats.set_num_documents_scored(3);
+  exp_stats.set_num_results_with_snippets(0);
+  exp_stats.set_latency_ms(5);
+  exp_stats.set_parse_query_latency_ms(5);
+  exp_stats.set_scoring_latency_ms(5);
+  exp_stats.set_ranking_latency_ms(5);
+  exp_stats.set_document_retrieval_latency_ms(5);
+  exp_stats.set_lock_acquisition_latency_ms(5);
+  exp_stats.set_num_joined_results_returned_current_page(3);
+  exp_stats.set_join_latency_ms(5);
+  exp_stats.set_is_join_query(true);
+
+  QueryStatsProto::SearchStats* exp_parent_search_stats =
+      exp_stats.mutable_parent_search_stats();
+  exp_parent_search_stats->set_query_length(15);
+  exp_parent_search_stats->set_num_terms(1);
+  exp_parent_search_stats->set_num_namespaces_filtered(0);
+  exp_parent_search_stats->set_num_schema_types_filtered(0);
+  exp_parent_search_stats->set_ranking_strategy(
+      ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE);
+  exp_parent_search_stats->set_num_documents_scored(3);
+  exp_parent_search_stats->set_parse_query_latency_ms(5);
+  exp_parent_search_stats->set_scoring_latency_ms(5);
+  exp_parent_search_stats->set_num_fetched_hits_lite_index(1);
+  exp_parent_search_stats->set_num_fetched_hits_main_index(2);
+  exp_parent_search_stats->set_num_fetched_hits_integer_index(0);
+
+  QueryStatsProto::SearchStats* exp_child_search_stats =
+      exp_stats.mutable_child_search_stats();
+  exp_child_search_stats->set_query_length(12);
+  exp_child_search_stats->set_num_terms(1);
+  exp_child_search_stats->set_num_namespaces_filtered(0);
+  exp_child_search_stats->set_num_schema_types_filtered(0);
+  exp_child_search_stats->set_ranking_strategy(
+      ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  exp_child_search_stats->set_num_documents_scored(4);
+  exp_child_search_stats->set_parse_query_latency_ms(5);
+  exp_child_search_stats->set_scoring_latency_ms(5);
+  exp_child_search_stats->set_num_fetched_hits_lite_index(1);
+  exp_child_search_stats->set_num_fetched_hits_main_index(3);
+  exp_child_search_stats->set_num_fetched_hits_integer_index(0);
+
+  EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+  // Second page, 1 child doc.
+  search_result = icing.GetNextPage(next_page_token);
+  next_page_token = search_result.next_page_token();
+  EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+  expected_result2.set_next_page_token(next_page_token);
+  EXPECT_THAT(search_result,
+              EqualsSearchResultIgnoreStatsAndScores(expected_result2));
+
+  exp_stats = QueryStatsProto();
+  exp_stats.set_is_first_page(false);
+  exp_stats.set_requested_page_size(1);
+  exp_stats.set_num_results_returned_current_page(1);
+  exp_stats.set_num_results_with_snippets(0);
+  exp_stats.set_latency_ms(5);
+  exp_stats.set_document_retrieval_latency_ms(5);
+  exp_stats.set_lock_acquisition_latency_ms(5);
+  exp_stats.set_num_joined_results_returned_current_page(1);
+  EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+  // Third page, 0 child docs.
+  search_result = icing.GetNextPage(next_page_token);
+  next_page_token = search_result.next_page_token();
+  ASSERT_THAT(search_result.status(), ProtoIsOk());
+  ASSERT_THAT(search_result.results(), SizeIs(1));
+  ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+  exp_stats = QueryStatsProto();
+  exp_stats.set_is_first_page(false);
+  exp_stats.set_requested_page_size(1);
+  exp_stats.set_num_results_returned_current_page(1);
+  exp_stats.set_num_joined_results_returned_current_page(0);
+  exp_stats.set_latency_ms(5);
+  exp_stats.set_document_retrieval_latency_ms(5);
+  exp_stats.set_lock_acquisition_latency_ms(5);
+  exp_stats.set_num_results_with_snippets(0);
+  ASSERT_THAT(search_result,
+              EqualsSearchResultIgnoreStatsAndScores(expected_result3));
+  EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+  ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+  search_result = icing.GetNextPage(search_result.next_page_token());
+  ASSERT_THAT(search_result.status(), ProtoIsOk());
+  ASSERT_THAT(search_result.results(), IsEmpty());
+  ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+  exp_stats = QueryStatsProto();
+  exp_stats.set_is_first_page(false);
+  exp_stats.set_lock_acquisition_latency_ms(5);
+  EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SnippetErrorTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Generic").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetScore(10)
+          .SetSchema("Generic")
+          .AddStringProperty("subject", "I like cats", "I like dogs",
+                             "I like birds", "I like fish")
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetScore(20)
+          .SetSchema("Generic")
+          .AddStringProperty("subject", "I like red", "I like green",
+                             "I like blue", "I like yellow")
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri3")
+          .SetScore(5)
+          .SetSchema("Generic")
+          .AddStringProperty("subject", "I like cupcakes", "I like donuts",
+                             "I like eclairs", "I like froyo")
+          .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.add_schema_type_filters("Generic");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("like");
+  search_spec.set_search_type(GetParam());
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  ResultSpecProto result_spec;
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(3);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(4);
+  SearchResultProto search_results =
+      icing.Search(search_spec, scoring_spec, result_spec);
+
+  ASSERT_THAT(search_results.results(), SizeIs(3));
+  const SearchResultProto::ResultProto* result = &search_results.results(0);
+  EXPECT_THAT(result->document().uri(), Eq("uri2"));
+  ASSERT_THAT(result->snippet().entries(), SizeIs(3));
+  const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+  EXPECT_THAT(entry->property_name(), "subject[0]");
+  std::string_view content = GetString(&result->document(), "subject[0]");
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+  entry = &result->snippet().entries(1);
+  EXPECT_THAT(entry->property_name(), "subject[1]");
+  content = GetString(&result->document(), "subject[1]");
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+  entry = &result->snippet().entries(2);
+  EXPECT_THAT(entry->property_name(), "subject[2]");
+  content = GetString(&result->document(), "subject[2]");
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+  result = &search_results.results(1);
+  EXPECT_THAT(result->document().uri(), Eq("uri1"));
+  ASSERT_THAT(result->snippet().entries(), SizeIs(3));
+  entry = &result->snippet().entries(0);
+  EXPECT_THAT(entry->property_name(), "subject[0]");
+  content = GetString(&result->document(), "subject[0]");
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+  entry = &result->snippet().entries(1);
+  ASSERT_THAT(entry->property_name(), "subject[1]");
+  content = GetString(&result->document(), "subject[1]");
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+  entry = &result->snippet().entries(2);
+  ASSERT_THAT(entry->property_name(), "subject[2]");
+  content = GetString(&result->document(), "subject[2]");
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+  result = &search_results.results(2);
+  ASSERT_THAT(result->document().uri(), Eq("uri3"));
+  ASSERT_THAT(result->snippet().entries(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest, CJKSnippetTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // String:     "我每天走路去上班。"
+  //              ^ ^  ^   ^^
+  // UTF8 idx:    0 3  9  15 18
+  // UTF16 idx:   0 1  3   5 6
+  // Breaks into segments: "我", "每天", "走路", "去", "上班"
+  constexpr std::string_view kChinese = "我每天走路去上班。";
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri1")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", kChinese)
+                               .Build();
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // Search and request snippet matching but no windowing.
+  SearchSpecProto search_spec;
+  search_spec.set_query("走");
+  search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(
+      std::numeric_limits<int>::max());
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(
+      std::numeric_limits<int>::max());
+
+  // Search and make sure that we got a single successful result
+  SearchResultProto search_results = icing.Search(
+      search_spec, ScoringSpecProto::default_instance(), result_spec);
+  ASSERT_THAT(search_results.status(), ProtoIsOk());
+  ASSERT_THAT(search_results.results(), SizeIs(1));
+  const SearchResultProto::ResultProto* result = &search_results.results(0);
+  EXPECT_THAT(result->document().uri(), Eq("uri1"));
+
+  // Ensure that one and only one property was matched and it was "body"
+  ASSERT_THAT(result->snippet().entries(), SizeIs(1));
+  const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+  EXPECT_THAT(entry->property_name(), Eq("body"));
+
+  // Get the content for "subject" and see what the match is.
+  std::string_view content = GetString(&result->document(), "body");
+  ASSERT_THAT(content, Eq(kChinese));
+
+  // Ensure that there is one and only one match within "subject"
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+  EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(9));
+  EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(6));
+  std::string_view match =
+      content.substr(match_proto.exact_match_byte_position(),
+                     match_proto.exact_match_byte_length());
+  ASSERT_THAT(match, Eq("走路"));
+
+  // Ensure that the utf-16 values are also as expected
+  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3));
+  EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, InvalidToEmptyQueryTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // String:     "Luca Brasi sleeps with the 🐟🐟🐟."
+  //              ^    ^     ^      ^    ^   ^ ^  ^ ^
+  // UTF8 idx:    0    5     11     18   23 27 3135 39
+  // UTF16 idx:   0    5     11     18   23 27 2931 33
+  // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟"
+  // and "🐟".
+  constexpr std::string_view kSicilianMessage =
+      "Luca Brasi sleeps with the 🐟🐟🐟.";
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri1")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", kSicilianMessage)
+                               .Build();
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "Some other content.")
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  // Search and request snippet matching but no windowing.
+  SearchSpecProto search_spec;
+  search_spec.set_query("?");
+  search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+  search_spec.set_search_type(GetParam());
+  ScoringSpecProto scoring_spec;
+  ResultSpecProto result_spec;
+
+  // Search and make sure that we got a single successful result
+  SearchResultProto search_results =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(search_results.status(), ProtoIsOk());
+  if (GetParam() ==
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+    // This is the actual correct behavior.
+    EXPECT_THAT(search_results.results(), IsEmpty());
+  } else {
+    EXPECT_THAT(search_results.results(), SizeIs(2));
+  }
+
+  search_spec.set_query("。");
+  search_results = icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(search_results.status(), ProtoIsOk());
+  if (GetParam() ==
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+    // This is the actual correct behavior.
+    EXPECT_THAT(search_results.results(), IsEmpty());
+  } else {
+    EXPECT_THAT(search_results.results(), SizeIs(2));
+  }
+
+  search_spec.set_query("-");
+  search_results = icing.Search(search_spec, scoring_spec, result_spec);
+  if (GetParam() ==
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+    // This is the actual correct behavior.
+    EXPECT_THAT(search_results.status(),
+                ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+  } else {
+    EXPECT_THAT(search_results.status(), ProtoIsOk());
+    EXPECT_THAT(search_results.results(), SizeIs(2));
+  }
+
+  search_spec.set_query(":");
+  search_results = icing.Search(search_spec, scoring_spec, result_spec);
+  if (GetParam() ==
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+    // This is the actual correct behavior.
+    EXPECT_THAT(search_results.status(),
+                ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+  } else {
+    EXPECT_THAT(search_results.status(), ProtoIsOk());
+    EXPECT_THAT(search_results.results(), SizeIs(2));
+  }
+
+  search_spec.set_query("OR");
+  search_results = icing.Search(search_spec, scoring_spec, result_spec);
+  if (GetParam() ==
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+    EXPECT_THAT(search_results.status(),
+                ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+  } else {
+    EXPECT_THAT(search_results.status(), ProtoIsOk());
+    EXPECT_THAT(search_results.results(), SizeIs(2));
+  }
+
+  search_spec.set_query(" ");
+  search_results = icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(search_results.status(), ProtoIsOk());
+  EXPECT_THAT(search_results.results(), SizeIs(2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, EmojiSnippetTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // String:     "Luca Brasi sleeps with the 🐟🐟🐟."
+  //              ^    ^     ^      ^    ^   ^ ^  ^ ^
+  // UTF8 idx:    0    5     11     18   23 27 3135 39
+  // UTF16 idx:   0    5     11     18   23 27 2931 33
+  // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟"
+  // and "🐟".
+  constexpr std::string_view kSicilianMessage =
+      "Luca Brasi sleeps with the 🐟🐟🐟.";
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri1")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", kSicilianMessage)
+                               .Build();
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "Some other content.")
+          .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  // Search and request snippet matching but no windowing.
+  SearchSpecProto search_spec;
+  search_spec.set_query("🐟");
+  search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+
+  // Search and make sure that we got a single successful result
+  SearchResultProto search_results = icing.Search(
+      search_spec, ScoringSpecProto::default_instance(), result_spec);
+  ASSERT_THAT(search_results.status(), ProtoIsOk());
+  ASSERT_THAT(search_results.results(), SizeIs(1));
+  const SearchResultProto::ResultProto* result = &search_results.results(0);
+  EXPECT_THAT(result->document().uri(), Eq("uri1"));
+
+  // Ensure that one and only one property was matched and it was "body"
+  ASSERT_THAT(result->snippet().entries(), SizeIs(1));
+  const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+  EXPECT_THAT(entry->property_name(), Eq("body"));
+
+  // Get the content for "subject" and see what the match is.
+  std::string_view content = GetString(&result->document(), "body");
+  ASSERT_THAT(content, Eq(kSicilianMessage));
+
+  // Ensure that there is one and only one match within "subject"
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+  EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(27));
+  EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(4));
+  std::string_view match =
+      content.substr(match_proto.exact_match_byte_position(),
+                     match_proto.exact_match_byte_length());
+  ASSERT_THAT(match, Eq("🐟"));
+
+  // Ensure that the utf-16 values are also as expected
+  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(27));
+  EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedId) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("firstName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("lastName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("personQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  DocumentProto person1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person1")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first1")
+          .AddStringProperty("lastName", "last1")
+          .AddStringProperty("emailAddress", "email1@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(1)
+          .Build();
+  DocumentProto person2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person2")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first2")
+          .AddStringProperty("lastName", "last2")
+          .AddStringProperty("emailAddress", "email2@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(2)
+          .Build();
+  DocumentProto person3 =
+      DocumentBuilder()
+          .SetKey(R"(pkg$db/name#space\\)", "person3")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first3")
+          .AddStringProperty("lastName", "last3")
+          .AddStringProperty("emailAddress", "email3@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(3)
+          .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(3)
+          .Build();
+  DocumentProto email2 =
+      DocumentBuilder()
+          .SetKey("namespace", "email2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 2")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(2)
+          .Build();
+  DocumentProto email3 =
+      DocumentBuilder()
+          .SetKey("namespace", "email3")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 3")
+          .AddStringProperty("personQualifiedId",
+                             R"(pkg$db/name\#space\\\\#person3)")  // escaped
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(1)
+          .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+
+  // Parent SearchSpec
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("firstName:first");
+  search_spec.set_search_type(GetParam());
+
+  // JoinSpec
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("personQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::MAX);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+  nested_search_spec->set_query("subject:test");
+  nested_search_spec->set_search_type(GetParam());
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  // Parent ScoringSpec
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+  // Parent ResultSpec
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  // Since we:
+  // - Use MAX for aggregation scoring strategy.
+  // - (Default) use DOCUMENT_SCORE to score child documents.
+  // - (Default) use DESC as the ranking order.
+  //
+  // person1 + email1 should have the highest aggregated score (3) and be
+  // returned first. person2 + email2 (aggregated score = 2) should be the
+  // second, and person3 + email3 (aggregated score = 1) should be the last.
+  SearchResultProto expected_result1;
+  expected_result1.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto1 =
+      expected_result1.mutable_results()->Add();
+  *result_proto1->mutable_document() = person1;
+  *result_proto1->mutable_joined_results()->Add()->mutable_document() = email1;
+
+  SearchResultProto expected_result2;
+  expected_result2.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_google::protobuf =
+      expected_result2.mutable_results()->Add();
+  *result_google::protobuf->mutable_document() = person2;
+  *result_google::protobuf->mutable_joined_results()->Add()->mutable_document() = email2;
+
+  SearchResultProto expected_result3;
+  expected_result3.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto3 =
+      expected_result3.mutable_results()->Add();
+  *result_proto3->mutable_document() = person3;
+  *result_proto3->mutable_joined_results()->Add()->mutable_document() = email3;
+
+  SearchResultProto result1 =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  uint64_t next_page_token = result1.next_page_token();
+  EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+  expected_result1.set_next_page_token(next_page_token);
+  EXPECT_THAT(result1,
+              EqualsSearchResultIgnoreStatsAndScores(expected_result1));
+
+  SearchResultProto result2 = icing.GetNextPage(next_page_token);
+  next_page_token = result2.next_page_token();
+  EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+  expected_result2.set_next_page_token(next_page_token);
+  EXPECT_THAT(result2,
+              EqualsSearchResultIgnoreStatsAndScores(expected_result2));
+
+  SearchResultProto result3 = icing.GetNextPage(next_page_token);
+  next_page_token = result3.next_page_token();
+  EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+  EXPECT_THAT(result3,
+              EqualsSearchResultIgnoreStatsAndScores(expected_result3));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedIdMultipleNamespaces) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("firstName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("lastName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("personQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  DocumentProto person1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace1", "person")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first1")
+          .AddStringProperty("lastName", "last1")
+          .AddStringProperty("emailAddress", "email1@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(1)
+          .Build();
+  DocumentProto person2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace2", "person")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first2")
+          .AddStringProperty("lastName", "last2")
+          .AddStringProperty("emailAddress", "email2@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(2)
+          .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace1#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(3)
+          .Build();
+  DocumentProto email2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "email2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 2")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace1#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(2)
+          .Build();
+  DocumentProto email3 =
+      DocumentBuilder()
+          .SetKey("namespace2", "email3")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 3")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace2#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(1)
+          .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+
+  // Parent SearchSpec
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("firstName:first");
+  search_spec.set_search_type(GetParam());
+
+  // JoinSpec
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("personQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+  nested_search_spec->set_query("subject:test");
+  nested_search_spec->set_search_type(GetParam());
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  // Parent ScoringSpec
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+  // Parent ResultSpec
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  // Since we:
+  // - Use COUNT for aggregation scoring strategy.
+  // - (Default) use DESC as the ranking order.
+  //
+  // pkg$db/namespace1#person + email1, email2 should have the highest
+  // aggregated score (2) and be returned first. pkg$db/namespace2#person +
+  // email3 (aggregated score = 1) should be the second.
+  SearchResultProto expected_result1;
+  expected_result1.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_proto1 =
+      expected_result1.mutable_results()->Add();
+  *result_proto1->mutable_document() = person1;
+  *result_proto1->mutable_joined_results()->Add()->mutable_document() = email1;
+  *result_proto1->mutable_joined_results()->Add()->mutable_document() = email2;
+
+  SearchResultProto expected_result2;
+  expected_result2.mutable_status()->set_code(StatusProto::OK);
+  SearchResultProto::ResultProto* result_google::protobuf =
+      expected_result2.mutable_results()->Add();
+  *result_google::protobuf->mutable_document() = person2;
+  *result_google::protobuf->mutable_joined_results()->Add()->mutable_document() = email3;
+
+  SearchResultProto result1 =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  uint64_t next_page_token = result1.next_page_token();
+  EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+  expected_result1.set_next_page_token(next_page_token);
+  EXPECT_THAT(result1,
+              EqualsSearchResultIgnoreStatsAndScores(expected_result1));
+
+  SearchResultProto result2 = icing.GetNextPage(next_page_token);
+  next_page_token = result2.next_page_token();
+  EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+  EXPECT_THAT(result2,
+              EqualsSearchResultIgnoreStatsAndScores(expected_result2));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       JoinShouldLimitNumChildDocumentsByMaxJoinedChildPerParent) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("firstName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("lastName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("personQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  DocumentProto person1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person1")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first1")
+          .AddStringProperty("lastName", "last1")
+          .AddStringProperty("emailAddress", "email1@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(1)
+          .Build();
+  DocumentProto person2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person2")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first2")
+          .AddStringProperty("lastName", "last2")
+          .AddStringProperty("emailAddress", "email2@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(2)
+          .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(100)
+          .Build();
+  DocumentProto email2 =
+      DocumentBuilder()
+          .SetKey("namespace", "email2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 2")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(99)
+          .Build();
+  DocumentProto email3 =
+      DocumentBuilder()
+          .SetKey("namespace", "email3")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 3")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(98)
+          .Build();
+  DocumentProto email4 =
+      DocumentBuilder()
+          .SetKey("namespace", "email4")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 4")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(97)
+          .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email4).status(), ProtoIsOk());
+
+  // Parent SearchSpec
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("firstName:first");
+  search_spec.set_search_type(GetParam());
+
+  // JoinSpec
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("personQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+  nested_search_spec->set_query("subject:test");
+  nested_search_spec->set_search_type(GetParam());
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  // Parent ScoringSpec
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+  // Parent ResultSpec with max_joined_children_per_parent_to_return = 2
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+  result_spec.set_max_joined_children_per_parent_to_return(2);
+
+  // - Use COUNT for aggregation scoring strategy.
+  // - max_joined_children_per_parent_to_return = 2.
+  // - (Default) use DESC as the ranking order.
+  //
+  // person2 should have the highest aggregated score (3) since email2, email3,
+  // email4 are joined to it and the COUNT aggregated score is 3. However, only
+  // email2 and email3 should be attached to person2 due to
+  // max_joined_children_per_parent_to_return limitation in result_spec.
+  // person1 should be the second (aggregated score = 1).
+  SearchResultProto::ResultProto expected_result_proto1;
+  *expected_result_proto1.mutable_document() = person2;
+  expected_result_proto1.set_score(3);
+  SearchResultProto::ResultProto* child_result_proto1 =
+      expected_result_proto1.mutable_joined_results()->Add();
+  *child_result_proto1->mutable_document() = email2;
+  child_result_proto1->set_score(99);
+  SearchResultProto::ResultProto* child_result_google::protobuf =
+      expected_result_proto1.mutable_joined_results()->Add();
+  *child_result_google::protobuf->mutable_document() = email3;
+  child_result_google::protobuf->set_score(98);
+
+  SearchResultProto::ResultProto expected_result_google::protobuf;
+  *expected_result_google::protobuf.mutable_document() = person1;
+  expected_result_google::protobuf.set_score(1);
+  SearchResultProto::ResultProto* child_result_proto3 =
+      expected_result_google::protobuf.mutable_joined_results()->Add();
+  *child_result_proto3->mutable_document() = email1;
+  child_result_proto3->set_score(100);
+
+  SearchResultProto result1 =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  uint64_t next_page_token = result1.next_page_token();
+  EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+  EXPECT_THAT(result1.results(),
+              ElementsAre(EqualsProto(expected_result_proto1)));
+
+  SearchResultProto result2 = icing.GetNextPage(next_page_token);
+  next_page_token = result2.next_page_token();
+  EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+  EXPECT_THAT(result2.results(),
+              ElementsAre(EqualsProto(expected_result_google::protobuf)));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinWithZeroMaxJoinedChildPerParent) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("firstName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("lastName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("personQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  DocumentProto person1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person1")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first1")
+          .AddStringProperty("lastName", "last1")
+          .AddStringProperty("emailAddress", "email1@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(1)
+          .Build();
+  DocumentProto person2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person2")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first2")
+          .AddStringProperty("lastName", "last2")
+          .AddStringProperty("emailAddress", "email2@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(2)
+          .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(100)
+          .Build();
+  DocumentProto email2 =
+      DocumentBuilder()
+          .SetKey("namespace", "email2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 2")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(99)
+          .Build();
+  DocumentProto email3 =
+      DocumentBuilder()
+          .SetKey("namespace", "email3")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 3")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(98)
+          .Build();
+  DocumentProto email4 =
+      DocumentBuilder()
+          .SetKey("namespace", "email4")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 4")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(97)
+          .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email4).status(), ProtoIsOk());
+
+  // Parent SearchSpec
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("firstName:first");
+  search_spec.set_search_type(GetParam());
+
+  // JoinSpec
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("personQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+  nested_search_spec->set_query("subject:test");
+  nested_search_spec->set_search_type(GetParam());
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  // Parent ScoringSpec
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+  // Parent ResultSpec with max_joined_children_per_parent_to_return = 0
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+  result_spec.set_max_joined_children_per_parent_to_return(0);
+
+  // - Use COUNT for aggregation scoring strategy.
+  // - max_joined_children_per_parent_to_return = 0.
+  // - (Default) use DESC as the ranking order.
+  //
+  // person2 should have the highest aggregated score (3) since email2, email3,
+  // email4 are joined to it and the COUNT aggregated score is 3. However, no
+  // child documents should be attached to person2 due to
+  // max_joined_children_per_parent_to_return limitation in result_spec.
+  // person1 should be the second (aggregated score = 1) with no attached child
+  // documents.
+  SearchResultProto::ResultProto expected_result_proto1;
+  *expected_result_proto1.mutable_document() = person2;
+  expected_result_proto1.set_score(3);
+
+  SearchResultProto::ResultProto expected_result_google::protobuf;
+  *expected_result_google::protobuf.mutable_document() = person1;
+  expected_result_google::protobuf.set_score(1);
+
+  SearchResultProto result1 =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  uint64_t next_page_token = result1.next_page_token();
+  EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+  EXPECT_THAT(result1.results(),
+              ElementsAre(EqualsProto(expected_result_proto1)));
+
+  SearchResultProto result2 = icing.GetNextPage(next_page_token);
+  next_page_token = result2.next_page_token();
+  EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+  EXPECT_THAT(result2.results(),
+              ElementsAre(EqualsProto(expected_result_google::protobuf)));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinSnippet) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("firstName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("lastName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("personQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first")
+          .AddStringProperty("lastName", "last")
+          .AddStringProperty("emailAddress", "email@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(1)
+          .Build();
+
+  DocumentProto email =
+      DocumentBuilder()
+          .SetKey("namespace", "email")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(3)
+          .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email).status(), ProtoIsOk());
+
+  // Parent SearchSpec
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("firstName:first");
+  search_spec.set_search_type(GetParam());
+
+  // JoinSpec
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("personQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::MAX);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+  nested_search_spec->set_query("subject:test");
+  nested_search_spec->set_search_type(GetParam());
+  // Child ResultSpec (with snippet)
+  ResultSpecProto* nested_result_spec = nested_spec->mutable_result_spec();
+  nested_result_spec->mutable_snippet_spec()->set_max_window_utf32_length(64);
+  nested_result_spec->mutable_snippet_spec()->set_num_matches_per_property(1);
+  nested_result_spec->mutable_snippet_spec()->set_num_to_snippet(1);
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+
+  // Parent ScoringSpec
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+  // Parent ResultSpec (without snippet)
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  SearchResultProto result =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(result.status(), ProtoIsOk());
+  EXPECT_THAT(result.next_page_token(), Eq(kInvalidNextPageToken));
+
+  ASSERT_THAT(result.results(), SizeIs(1));
+  // Check parent doc (person).
+  const DocumentProto& result_parent_document = result.results(0).document();
+  EXPECT_THAT(result_parent_document, EqualsProto(person));
+  EXPECT_THAT(result.results(0).snippet().entries(), IsEmpty());
+
+  // Check child doc (email).
+  ASSERT_THAT(result.results(0).joined_results(), SizeIs(1));
+  const DocumentProto& result_child_document =
+      result.results(0).joined_results(0).document();
+  const SnippetProto& result_child_snippet =
+      result.results(0).joined_results(0).snippet();
+  EXPECT_THAT(result_child_document, EqualsProto(email));
+  ASSERT_THAT(result_child_snippet.entries(), SizeIs(1));
+  EXPECT_THAT(result_child_snippet.entries(0).property_name(), Eq("subject"));
+  std::string_view content = GetString(
+      &result_child_document, result_child_snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_child_snippet.entries(0)),
+              ElementsAre("test subject"));
+  EXPECT_THAT(GetMatches(content, result_child_snippet.entries(0)),
+              ElementsAre("test"));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinProjection) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("firstName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("lastName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("personQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  DocumentProto person =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first")
+          .AddStringProperty("lastName", "last")
+          .AddStringProperty("emailAddress", "email@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(1)
+          .Build();
+
+  DocumentProto email =
+      DocumentBuilder()
+          .SetKey("namespace", "email")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(3)
+          .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email).status(), ProtoIsOk());
+
+  // Parent SearchSpec
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("firstName:first");
+  search_spec.set_search_type(GetParam());
+
+  // JoinSpec
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("personQualifiedId");
+  join_spec->set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::MAX);
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+  nested_search_spec->set_query("subject:test");
+  nested_search_spec->set_search_type(GetParam());
+  // Child ResultSpec (with projection)
+  ResultSpecProto* nested_result_spec = nested_spec->mutable_result_spec();
+  TypePropertyMask* type_property_mask =
+      nested_result_spec->add_type_property_masks();
+  type_property_mask->set_schema_type("Email");
+  type_property_mask->add_paths("subject");
+  *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+
+  // Parent ScoringSpec
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+  // Parent ResultSpec (with projection)
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+  type_property_mask = result_spec.add_type_property_masks();
+  type_property_mask->set_schema_type("Person");
+  type_property_mask->add_paths("emailAddress");
+
+  SearchResultProto result =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(result.status(), ProtoIsOk());
+  EXPECT_THAT(result.next_page_token(), Eq(kInvalidNextPageToken));
+
+  ASSERT_THAT(result.results(), SizeIs(1));
+  // Check parent doc (person): should contain only the "emailAddress" property.
+  DocumentProto projected_person_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person")
+          .SetSchema("Person")
+          .AddStringProperty("emailAddress", "email@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(1)
+          .Build();
+  EXPECT_THAT(result.results().at(0).document(),
+              EqualsProto(projected_person_document));
+
+  // Check child doc (email): should contain only the "subject" property.
+  ASSERT_THAT(result.results(0).joined_results(), SizeIs(1));
+  DocumentProto projected_email_document =
+      DocumentBuilder()
+          .SetKey("namespace", "email")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(3)
+          .Build();
+  EXPECT_THAT(result.results(0).joined_results(0).document(),
+              EqualsProto(projected_email_document));
+}
+
+TEST_F(IcingSearchEngineSearchTest, JoinWithAdvancedScoring) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("firstName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("lastName")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("personQualifiedId")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  const int32_t person1_doc_score = 10;
+  const int32_t person2_doc_score = 25;
+  const int32_t person3_doc_score = 123;
+  const int32_t email1_doc_score = 10;
+  const int32_t email2_doc_score = 15;
+  const int32_t email3_doc_score = 40;
+
+  // person1 has children email1 and email2.
+  DocumentProto person1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person1")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first1")
+          .AddStringProperty("lastName", "last1")
+          .AddStringProperty("emailAddress", "email1@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(person1_doc_score)
+          .Build();
+  // person2 has a single child email3
+  DocumentProto person2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person2")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first2")
+          .AddStringProperty("lastName", "last2")
+          .AddStringProperty("emailAddress", "email2@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(person2_doc_score)
+          .Build();
+  // person3 has no child.
+  DocumentProto person3 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "person3")
+          .SetSchema("Person")
+          .AddStringProperty("firstName", "first3")
+          .AddStringProperty("lastName", "last3")
+          .AddStringProperty("emailAddress", "email3@gmail.com")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(person3_doc_score)
+          .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(email1_doc_score)
+          .Build();
+  DocumentProto email2 =
+      DocumentBuilder()
+          .SetKey("namespace", "email2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 2")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(email2_doc_score)
+          .Build();
+  DocumentProto email3 =
+      DocumentBuilder()
+          .SetKey("namespace", "email3")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 3")
+          .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetScore(email3_doc_score)
+          .Build();
+
+  // Set children scoring expression and their expected value.
+  ScoringSpecProto child_scoring_spec = GetDefaultScoringSpec();
+  child_scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+  child_scoring_spec.set_advanced_scoring_expression(
+      "this.documentScore() * 2 + 1");
+  const int32_t exp_email1_score = email1_doc_score * 2 + 1;
+  const int32_t exp_email2_score = email2_doc_score * 2 + 1;
+  const int32_t exp_email3_score = email3_doc_score * 2 + 1;
+
+  // Set parent scoring expression and their expected value.
+  ScoringSpecProto parent_scoring_spec = GetDefaultScoringSpec();
+  parent_scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+  parent_scoring_spec.set_advanced_scoring_expression(
+      "this.documentScore() * sum(this.childrenRankingSignals())");
+  const int32_t exp_person1_score =
+      person1_doc_score * (exp_email1_score + exp_email2_score);
+  const int32_t exp_person2_score = person2_doc_score * exp_email3_score;
+  const int32_t exp_person3_score = person3_doc_score * 0;
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+
+  // Parent SearchSpec
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("firstName:first");
+
+  // JoinSpec
+  JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+  join_spec->set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec->set_child_property_expression("personQualifiedId");
+  JoinSpecProto::NestedSpecProto* nested_spec =
+      join_spec->mutable_nested_spec();
+  SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+  nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+  nested_search_spec->set_query("subject:test");
+  *nested_spec->mutable_scoring_spec() = child_scoring_spec;
+  *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+  // Parent ResultSpec
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(1);
+  result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+
+  SearchResultProto results =
+      icing.Search(search_spec, parent_scoring_spec, result_spec);
+  uint64_t next_page_token = results.next_page_token();
+  EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+  ASSERT_THAT(results.results(), SizeIs(1));
+  EXPECT_THAT(results.results(0).document().uri(), Eq("person2"));
+  // exp_person2_score = 2025
+  EXPECT_THAT(results.results(0).score(), Eq(exp_person2_score));
+
+  results = icing.GetNextPage(next_page_token);
+  next_page_token = results.next_page_token();
+  EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+  ASSERT_THAT(results.results(), SizeIs(1));
+  EXPECT_THAT(results.results(0).document().uri(), Eq("person1"));
+  // exp_person1_score = 520
+  EXPECT_THAT(results.results(0).score(), Eq(exp_person1_score));
+
+  results = icing.GetNextPage(next_page_token);
+  next_page_token = results.next_page_token();
+  EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+  ASSERT_THAT(results.results(), SizeIs(1));
+  EXPECT_THAT(results.results(0).document().uri(), Eq("person3"));
+  // exp_person3_score = 0
+  EXPECT_THAT(results.results(0).score(), Eq(exp_person3_score));
+}
+
+TEST_F(IcingSearchEngineSearchTest, NumericFilterAdvancedQuerySucceeds) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create the schema and document store
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("transaction")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("price")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("cost")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto document_one = DocumentBuilder()
+                                   .SetKey("namespace", "1")
+                                   .SetSchema("transaction")
+                                   .SetCreationTimestampMs(1)
+                                   .AddInt64Property("price", 10)
+                                   .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two = DocumentBuilder()
+                                   .SetKey("namespace", "2")
+                                   .SetSchema("transaction")
+                                   .SetCreationTimestampMs(1)
+                                   .AddInt64Property("price", 25)
+                                   .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  DocumentProto document_three = DocumentBuilder()
+                                     .SetKey("namespace", "3")
+                                     .SetSchema("transaction")
+                                     .SetCreationTimestampMs(1)
+                                     .AddInt64Property("cost", 2)
+                                     .Build();
+  ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("price < 20");
+  search_spec.set_search_type(
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+  search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+  SearchResultProto results =
+      icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                   ResultSpecProto::default_instance());
+  ASSERT_THAT(results.results(), SizeIs(1));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+
+  search_spec.set_query("price == 25");
+  results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                         ResultSpecProto::default_instance());
+  ASSERT_THAT(results.results(), SizeIs(1));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+
+  search_spec.set_query("cost > 2");
+  results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                         ResultSpecProto::default_instance());
+  EXPECT_THAT(results.results(), IsEmpty());
+
+  search_spec.set_query("cost >= 2");
+  results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                         ResultSpecProto::default_instance());
+  ASSERT_THAT(results.results(), SizeIs(1));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_three));
+
+  search_spec.set_query("price <= 25");
+  results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                         ResultSpecProto::default_instance());
+  ASSERT_THAT(results.results(), SizeIs(2));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+}
+
+TEST_F(IcingSearchEngineSearchTest,
+       NumericFilterAdvancedQueryWithPersistenceSucceeds) {
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+  {
+    // Create the schema and document store
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("transaction")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("price")
+                                     .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("cost")
+                                     .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+
+    IcingSearchEngine icing(icing_options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+    // Schema will be persisted to disk when icing goes out of scope.
+  }
+
+  DocumentProto document_one = DocumentBuilder()
+                                   .SetKey("namespace", "1")
+                                   .SetSchema("transaction")
+                                   .SetCreationTimestampMs(1)
+                                   .AddInt64Property("price", 10)
+                                   .Build();
+  DocumentProto document_two = DocumentBuilder()
+                                   .SetKey("namespace", "2")
+                                   .SetSchema("transaction")
+                                   .SetCreationTimestampMs(1)
+                                   .AddInt64Property("price", 25)
+                                   .AddInt64Property("cost", 2)
+                                   .Build();
+  {
+    // Ensure that icing initializes the schema and section_manager
+    // properly from the pre-existing file.
+    IcingSearchEngine icing(icing_options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+    // The index and document store will be persisted to disk when icing goes
+    // out of scope.
+  }
+
+  {
+    // Ensure that the index is brought back up without problems and we
+    // can query for the content that we expect.
+    IcingSearchEngine icing(icing_options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    SearchSpecProto search_spec;
+    search_spec.set_query("price < 20");
+    search_spec.set_search_type(
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+    search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+    SearchResultProto results =
+        icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    ASSERT_THAT(results.results(), SizeIs(1));
+    EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+
+    search_spec.set_query("price == 25");
+    results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                           ResultSpecProto::default_instance());
+    ASSERT_THAT(results.results(), SizeIs(1));
+    EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+
+    search_spec.set_query("cost > 2");
+    results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                           ResultSpecProto::default_instance());
+    EXPECT_THAT(results.results(), IsEmpty());
+
+    search_spec.set_query("cost >= 2");
+    results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                           ResultSpecProto::default_instance());
+    ASSERT_THAT(results.results(), SizeIs(1));
+    EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+
+    search_spec.set_query("price <= 25");
+    results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                           ResultSpecProto::default_instance());
+    ASSERT_THAT(results.results(), SizeIs(2));
+    EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+    EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+  }
+}
+
+TEST_F(IcingSearchEngineSearchTest, NumericFilterOldQueryFails) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create the schema and document store
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("transaction")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("price")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("cost")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto document_one = DocumentBuilder()
+                                   .SetKey("namespace", "1")
+                                   .SetSchema("transaction")
+                                   .SetCreationTimestampMs(1)
+                                   .AddInt64Property("price", 10)
+                                   .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two = DocumentBuilder()
+                                   .SetKey("namespace", "2")
+                                   .SetSchema("transaction")
+                                   .SetCreationTimestampMs(1)
+                                   .AddInt64Property("price", 25)
+                                   .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  DocumentProto document_three = DocumentBuilder()
+                                     .SetKey("namespace", "3")
+                                     .SetSchema("transaction")
+                                     .SetCreationTimestampMs(1)
+                                     .AddInt64Property("cost", 2)
+                                     .Build();
+  ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("price < 20");
+  search_spec.set_search_type(SearchSpecProto::SearchType::ICING_RAW_QUERY);
+  search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+  SearchResultProto results =
+      icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSearchTest, NumericFilterQueryStatsProtoTest) {
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(5);
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create the schema and document store
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("transaction")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("price")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("cost")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto document_one = DocumentBuilder()
+                                   .SetKey("namespace", "1")
+                                   .SetSchema("transaction")
+                                   .SetCreationTimestampMs(1)
+                                   .AddInt64Property("price", 10)
+                                   .Build();
+  ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+  DocumentProto document_two = DocumentBuilder()
+                                   .SetKey("namespace", "2")
+                                   .SetSchema("transaction")
+                                   .SetCreationTimestampMs(2)
+                                   .AddInt64Property("price", 25)
+                                   .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  DocumentProto document_three = DocumentBuilder()
+                                     .SetKey("namespace", "3")
+                                     .SetSchema("transaction")
+                                     .SetCreationTimestampMs(3)
+                                     .AddInt64Property("cost", 2)
+                                     .Build();
+  ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk());
+
+  DocumentProto document_four = DocumentBuilder()
+                                    .SetKey("namespace", "3")
+                                    .SetSchema("transaction")
+                                    .SetCreationTimestampMs(4)
+                                    .AddInt64Property("price", 15)
+                                    .Build();
+  ASSERT_THAT(icing.Put(document_four).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.add_namespace_filters("namespace");
+  search_spec.add_schema_type_filters(document_one.schema());
+  search_spec.set_query("price < 20");
+  search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(5);
+
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+  SearchResultProto results =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  ASSERT_THAT(results.results(), SizeIs(2));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_four));
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+
+  // Check the stats
+  // TODO(b/305098009): deprecate search-related flat fields in query_stats.
+  QueryStatsProto exp_stats;
+  exp_stats.set_query_length(10);
+  exp_stats.set_num_terms(0);
+  exp_stats.set_num_namespaces_filtered(1);
+  exp_stats.set_num_schema_types_filtered(1);
+  exp_stats.set_ranking_strategy(
+      ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+  exp_stats.set_is_first_page(true);
+  exp_stats.set_requested_page_size(5);
+  exp_stats.set_num_results_returned_current_page(2);
+  exp_stats.set_num_documents_scored(2);
+  exp_stats.set_num_results_with_snippets(0);
+  exp_stats.set_latency_ms(5);
+  exp_stats.set_parse_query_latency_ms(5);
+  exp_stats.set_scoring_latency_ms(5);
+  exp_stats.set_ranking_latency_ms(5);
+  exp_stats.set_document_retrieval_latency_ms(5);
+  exp_stats.set_lock_acquisition_latency_ms(5);
+  exp_stats.set_num_joined_results_returned_current_page(0);
+
+  QueryStatsProto::SearchStats* exp_parent_search_stats =
+      exp_stats.mutable_parent_search_stats();
+  exp_parent_search_stats->set_query_length(10);
+  exp_parent_search_stats->set_num_terms(0);
+  exp_parent_search_stats->set_num_namespaces_filtered(1);
+  exp_parent_search_stats->set_num_schema_types_filtered(1);
+  exp_parent_search_stats->set_ranking_strategy(
+      ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+  exp_parent_search_stats->set_is_numeric_query(true);
+  exp_parent_search_stats->set_num_documents_scored(2);
+  exp_parent_search_stats->set_parse_query_latency_ms(5);
+  exp_parent_search_stats->set_scoring_latency_ms(5);
+  exp_parent_search_stats->set_num_fetched_hits_lite_index(0);
+  exp_parent_search_stats->set_num_fetched_hits_main_index(0);
+  // Since we will inspect 1 bucket from "price" in integer index and it
+  // contains 3 hits, we will fetch 3 hits (but filter out one of them).
+  exp_parent_search_stats->set_num_fetched_hits_integer_index(3);
+
+  EXPECT_THAT(results.query_stats(), EqualsProto(exp_stats));
+}
+
+TEST_P(IcingSearchEngineSearchTest, BarisNormalizationTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri")
+                               .SetSchema("Person")
+                               .SetCreationTimestampMs(1)
+                               .AddStringProperty("name", "Barış")
+                               .Build();
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  DocumentProto document_two = DocumentBuilder()
+                                   .SetKey("namespace", "uri2")
+                                   .SetSchema("Person")
+                                   .SetCreationTimestampMs(1)
+                                   .AddStringProperty("name", "ıbar")
+                                   .Build();
+  ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+  search_spec.set_search_type(GetParam());
+
+  ScoringSpecProto scoring_spec;
+  ResultSpecProto result_spec;
+
+  SearchResultProto exp_results;
+  exp_results.mutable_status()->set_code(StatusProto::OK);
+  *exp_results.add_results()->mutable_document() = document;
+
+  search_spec.set_query("barış");
+  SearchResultProto results =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results));
+
+  search_spec.set_query("barıs");
+  results = icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results));
+
+  search_spec.set_query("baris");
+  results = icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results));
+
+  SearchResultProto exp_results2;
+  exp_results2.mutable_status()->set_code(StatusProto::OK);
+  *exp_results2.add_results()->mutable_document() = document_two;
+  search_spec.set_query("ı");
+  results = icing.Search(search_spec, scoring_spec, result_spec);
+  EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, LatinSnippetTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  constexpr std::string_view kLatin = "test ḞÖÖḸĬŞĤ test";
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri1")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", kLatin)
+                               .Build();
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("foo");
+  search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+  search_spec.set_search_type(GetParam());
+
+  ResultSpecProto result_spec;
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(
+      std::numeric_limits<int>::max());
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(
+      std::numeric_limits<int>::max());
+
+  // Search and make sure that we got a single successful result
+  SearchResultProto search_results = icing.Search(
+      search_spec, ScoringSpecProto::default_instance(), result_spec);
+  ASSERT_THAT(search_results.status(), ProtoIsOk());
+  ASSERT_THAT(search_results.results(), SizeIs(1));
+  const SearchResultProto::ResultProto* result = &search_results.results(0);
+  EXPECT_THAT(result->document().uri(), Eq("uri1"));
+
+  // Ensure that one and only one property was matched and it was "body"
+  ASSERT_THAT(result->snippet().entries(), SizeIs(1));
+  const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+  EXPECT_THAT(entry->property_name(), Eq("body"));
+
+  // Ensure that there is one and only one match within "body"
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+
+  // Check that the match is "ḞÖÖḸĬŞĤ".
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+  std::string_view match =
+      kLatin.substr(match_proto.exact_match_byte_position(),
+                    match_proto.submatch_byte_length());
+  ASSERT_THAT(match, Eq("ḞÖÖ"));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       DocumentStoreNamespaceIdFingerprintCompatible) {
+  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+  DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+
+  // Initialize with some documents with document_store_namespace_id_fingerprint
+  // being false.
+  {
+    IcingSearchEngineOptions options = GetDefaultIcingOptions();
+    options.set_document_store_namespace_id_fingerprint(false);
+    IcingSearchEngine icing(options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+    // Creates and inserts 3 documents
+    ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  }
+
+  // Reinitializate with document_store_namespace_id_fingerprint being true,
+  // and test that we are still able to read/query docs.
+  {
+    IcingSearchEngineOptions options = GetDefaultIcingOptions();
+    options.set_document_store_namespace_id_fingerprint(true);
+    IcingSearchEngine icing(options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    ASSERT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoIsOk());
+    ASSERT_THAT(
+        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoIsOk());
+    ASSERT_THAT(
+        icing.Get("namespace", "uri3", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoIsOk());
+
+    SearchSpecProto search_spec;
+    search_spec.set_term_match_type(TermMatchType::PREFIX);
+    search_spec.set_query("message");
+    search_spec.set_search_type(GetParam());
+    SearchResultProto results =
+        icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    ASSERT_THAT(results.results(), SizeIs(3));
+    EXPECT_THAT(results.results(0).document(), EqualsProto(document3));
+    EXPECT_THAT(results.results(1).document(), EqualsProto(document2));
+    EXPECT_THAT(results.results(2).document(), EqualsProto(document1));
+  }
+
+  // Reinitializate with document_store_namespace_id_fingerprint being false,
+  // and test that we are still able to read/query docs.
+  {
+    IcingSearchEngineOptions options = GetDefaultIcingOptions();
+    options.set_document_store_namespace_id_fingerprint(false);
+    IcingSearchEngine icing(options, GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    ASSERT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoIsOk());
+    ASSERT_THAT(
+        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoIsOk());
+    ASSERT_THAT(
+        icing.Get("namespace", "uri3", GetResultSpecProto::default_instance())
+            .status(),
+        ProtoIsOk());
+
+    SearchSpecProto search_spec;
+    search_spec.set_term_match_type(TermMatchType::PREFIX);
+    search_spec.set_query("message");
+    search_spec.set_search_type(GetParam());
+    SearchResultProto results =
+        icing.Search(search_spec, ScoringSpecProto::default_instance(),
+                     ResultSpecProto::default_instance());
+    ASSERT_THAT(results.results(), SizeIs(3));
+    EXPECT_THAT(results.results(0).document(), EqualsProto(document3));
+    EXPECT_THAT(results.results(1).document(), EqualsProto(document2));
+    EXPECT_THAT(results.results(2).document(), EqualsProto(document1));
+  }
+}
+
+TEST_P(IcingSearchEngineSearchTest, HasPropertyQuery) {
+  if (GetParam() !=
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+    GTEST_SKIP()
+        << "The hasProperty() function is only supported in advanced query.";
+  }
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Value")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("timestamp")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("score")
+                                        .SetDataType(TYPE_DOUBLE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Create a document with every property.
+  DocumentProto document0 = DocumentBuilder()
+                                .SetKey("icing", "uri0")
+                                .SetSchema("Value")
+                                .SetCreationTimestampMs(1)
+                                .AddStringProperty("body", "foo")
+                                .AddInt64Property("timestamp", 123)
+                                .AddDoubleProperty("score", 456.789)
+                                .Build();
+  // Create a document with missing body.
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("icing", "uri1")
+                                .SetSchema("Value")
+                                .SetCreationTimestampMs(1)
+                                .AddInt64Property("timestamp", 123)
+                                .AddDoubleProperty("score", 456.789)
+                                .Build();
+  // Create a document with missing timestamp.
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("icing", "uri2")
+                                .SetSchema("Value")
+                                .SetCreationTimestampMs(1)
+                                .AddStringProperty("body", "foo")
+                                .AddDoubleProperty("score", 456.789)
+                                .Build();
+
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_build_property_existence_metadata_hits(true);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document0).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // Get all documents that have "body".
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_search_type(GetParam());
+  search_spec.add_enabled_features(std::string(kHasPropertyFunctionFeature));
+  search_spec.add_enabled_features(
+      std::string(kListFilterQueryLanguageFeature));
+  search_spec.set_query("hasProperty(\"body\")");
+  SearchResultProto results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                           ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(2));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
+
+  // Get all documents that have "timestamp".
+  search_spec.set_query("hasProperty(\"timestamp\")");
+  results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                         ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(2));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document1));
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
+
+  // Get all documents that have "score".
+  search_spec.set_query("hasProperty(\"score\")");
+  results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                         ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(3));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document1));
+  EXPECT_THAT(results.results(2).document(), EqualsProto(document0));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+       HasPropertyQueryDoesNotWorkWithoutMetadataHits) {
+  if (GetParam() !=
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+    GTEST_SKIP()
+        << "The hasProperty() function is only supported in advanced query.";
+  }
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Value")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("timestamp")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("score")
+                                        .SetDataType(TYPE_DOUBLE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Create a document with every property.
+  DocumentProto document0 = DocumentBuilder()
+                                .SetKey("icing", "uri0")
+                                .SetSchema("Value")
+                                .SetCreationTimestampMs(1)
+                                .AddStringProperty("body", "foo")
+                                .AddInt64Property("timestamp", 123)
+                                .AddDoubleProperty("score", 456.789)
+                                .Build();
+  // Create a document with missing body.
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("icing", "uri1")
+                                .SetSchema("Value")
+                                .SetCreationTimestampMs(1)
+                                .AddInt64Property("timestamp", 123)
+                                .AddDoubleProperty("score", 456.789)
+                                .Build();
+  // Create a document with missing timestamp.
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("icing", "uri2")
+                                .SetSchema("Value")
+                                .SetCreationTimestampMs(1)
+                                .AddStringProperty("body", "foo")
+                                .AddDoubleProperty("score", 456.789)
+                                .Build();
+
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_build_property_existence_metadata_hits(false);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document0).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // Check that none of the following hasProperty queries can return any
+  // results.
+  //
+  // Get all documents that have "body".
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_search_type(GetParam());
+  search_spec.add_enabled_features(std::string(kHasPropertyFunctionFeature));
+  search_spec.add_enabled_features(
+      std::string(kListFilterQueryLanguageFeature));
+  search_spec.set_query("hasProperty(\"body\")");
+  SearchResultProto results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                           ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), IsEmpty());
+
+  // Get all documents that have "timestamp".
+  search_spec.set_query("hasProperty(\"timestamp\")");
+  results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                         ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), IsEmpty());
+
+  // Get all documents that have "score".
+  search_spec.set_query("hasProperty(\"score\")");
+  results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                         ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest, HasPropertyQueryNestedDocument) {
+  if (GetParam() !=
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+    GTEST_SKIP()
+        << "The hasProperty() function is only supported in advanced query.";
+  }
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Value")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("timestamp")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("score")
+                                        .SetDataType(TYPE_DOUBLE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("TreeNode")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("value")
+                               .SetDataTypeDocument(
+                                   "Value", /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Create a complex nested root_document with the following property paths.
+  // - name
+  // - value
+  // - value.body
+  // - value.score
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "uri")
+          .SetSchema("TreeNode")
+          .SetCreationTimestampMs(1)
+          .AddStringProperty("name", "root")
+          .AddDocumentProperty("value", DocumentBuilder()
+                                            .SetKey("icing", "uri")
+                                            .SetSchema("Value")
+                                            .AddStringProperty("body", "foo")
+                                            .AddDoubleProperty("score", 456.789)
+                                            .Build())
+          .Build();
+
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_build_property_existence_metadata_hits(true);
+  IcingSearchEngine icing(options, GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // Check that the document can be found by `hasProperty("name")`.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_search_type(GetParam());
+  search_spec.add_enabled_features(std::string(kHasPropertyFunctionFeature));
+  search_spec.add_enabled_features(
+      std::string(kListFilterQueryLanguageFeature));
+  search_spec.set_query("hasProperty(\"name\")");
+  SearchResultProto results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                           ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document));
+
+  // Check that the document can be found by `hasProperty("value")`.
+  search_spec.set_query("hasProperty(\"value\")");
+  results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                         ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document));
+
+  // Check that the document can be found by `hasProperty("value.body")`.
+  search_spec.set_query("hasProperty(\"value.body\")");
+  results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                         ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document));
+
+  // Check that the document can be found by `hasProperty("value.score")`.
+  search_spec.set_query("hasProperty(\"value.score\")");
+  results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                         ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document));
+
+  // Check that the document can NOT be found by `hasProperty("body")`.
+  search_spec.set_query("hasProperty(\"body\")");
+  results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                         ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), IsEmpty());
+
+  // Check that the document can NOT be found by `hasProperty("score")`.
+  search_spec.set_query("hasProperty(\"score\")");
+  results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                         ResultSpecProto::default_instance());
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), IsEmpty());
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    IcingSearchEngineSearchTest, IcingSearchEngineSearchTest,
+    testing::Values(
+        SearchSpecProto::SearchType::ICING_RAW_QUERY,
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY));
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/icing-search-engine_suggest_test.cc b/icing/icing-search-engine_suggest_test.cc
new file mode 100644
index 0000000..b3aeafc
--- /dev/null
+++ b/icing/icing-search-engine_suggest_test.cc
@@ -0,0 +1,1601 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::UnorderedElementsAre;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+  TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+                        std::unique_ptr<const Filesystem> filesystem,
+                        std::unique_ptr<const IcingFilesystem> icing_filesystem,
+                        std::unique_ptr<Clock> clock,
+                        std::unique_ptr<JniCache> jni_cache)
+      : IcingSearchEngine(options, std::move(filesystem),
+                          std::move(icing_filesystem), std::move(clock),
+                          std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to IcingSearchEngine::Search
+// and IcingSearchEngine::SearchSuggestions.
+class IcingSearchEngineSuggestTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      // If we've specified using the reverse-JNI method for segmentation (i.e.
+      // not ICU), then we won't have the ICU data file included to set up.
+      // Technically, we could choose to use reverse-JNI for segmentation AND
+      // include an ICU data file, but that seems unlikely and our current BUILD
+      // setup doesn't do this.
+      // File generated via icu_data_file rule in //icing/BUILD.
+      std::string icu_data_file_path =
+          GetTestFilePath("icing/icu.dat");
+      ICING_ASSERT_OK(
+          icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+    }
+    filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+  Filesystem filesystem_;
+};
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_base_dir(GetTestBaseDir());
+  return icing_options;
+}
+
+SchemaProto CreatePersonAndEmailSchema() {
+  return SchemaBuilder()
+      .AddType(SchemaTypeConfigBuilder()
+                   .SetType("Person")
+                   .AddProperty(PropertyConfigBuilder()
+                                    .SetName("name")
+                                    .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                       TOKENIZER_PLAIN)
+                                    .SetCardinality(CARDINALITY_OPTIONAL))
+                   .AddProperty(PropertyConfigBuilder()
+                                    .SetName("emailAddress")
+                                    .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                       TOKENIZER_PLAIN)
+                                    .SetCardinality(CARDINALITY_OPTIONAL)))
+      .AddType(
+          SchemaTypeConfigBuilder()
+              .SetType("Email")
+              .AddProperty(
+                  PropertyConfigBuilder()
+                      .SetName("body")
+                      .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                      .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(
+                  PropertyConfigBuilder()
+                      .SetName("subject")
+                      .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                      .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(PropertyConfigBuilder()
+                               .SetName("sender")
+                               .SetDataTypeDocument(
+                                   "Person", /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+      .Build();
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // Creates and inserts 6 documents, and index 6 termSix, 5 termFive, 4
+  // termFour, 3 termThree, 2 termTwo and one termOne.
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty(
+              "subject", "termOne termTwo termThree termFour termFive termSix")
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject",
+                             "termTwo termThree termFour termFive termSix")
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri3")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject", "termThree termFour termFive termSix")
+          .Build();
+  DocumentProto document4 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri4")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject", "termFour termFive termSix")
+          .Build();
+  DocumentProto document5 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri5")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject", "termFive termSix")
+          .Build();
+  DocumentProto document6 = DocumentBuilder()
+                                .SetKey("namespace", "uri6")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "termSix")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("t");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  // Query all suggestions, and they will be ranked.
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions().at(0).query(), "termsix");
+  ASSERT_THAT(response.suggestions().at(1).query(), "termfive");
+  ASSERT_THAT(response.suggestions().at(2).query(), "termfour");
+  ASSERT_THAT(response.suggestions().at(3).query(), "termthree");
+  ASSERT_THAT(response.suggestions().at(4).query(), "termtwo");
+  ASSERT_THAT(response.suggestions().at(5).query(), "termone");
+
+  // Query first three suggestions, and they will be ranked.
+  suggestion_spec.set_num_to_return(3);
+  response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions().at(0).query(), "termsix");
+  ASSERT_THAT(response.suggestions().at(1).query(), "termfive");
+  ASSERT_THAT(response.suggestions().at(2).query(), "termfour");
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_ShouldReturnInOneNamespace) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo fool")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionFoo;
+  suggestionFoo.set_query("foo");
+  SuggestionResponse::Suggestion suggestionFool;
+  suggestionFool.set_query("fool");
+
+  // namespace1 has 2 results.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.add_namespace_filters("namespace1");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFoo),
+                                   EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_ShouldReturnInMultipleNamespace) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fo")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace3", "uri3")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionFoo;
+  suggestionFoo.set_query("foo");
+  SuggestionResponse::Suggestion suggestionFool;
+  suggestionFool.set_query("fool");
+
+  // namespace2 and namespace3 has 2 results.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.add_namespace_filters("namespace2");
+  suggestion_spec.add_namespace_filters("namespace3");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFoo),
+                                   EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_NamespaceNotFound) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fo")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // Search for non-exist namespace3
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.add_namespace_filters("namespace3");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  EXPECT_THAT(response.status().code(), Eq(StatusProto::OK));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_OtherNamespaceDontContributeToHitCount) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // Index 4 documents,
+  // namespace1 has 2 hit2 for term one
+  // namespace2 has 2 hit2 for term two and 1 hit for term one.
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "termone")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "termone")
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "termone termtwo")
+                                .Build();
+  DocumentProto document4 = DocumentBuilder()
+                                .SetKey("namespace2", "uri3")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "termtwo")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionTermOne;
+  suggestionTermOne.set_query("termone");
+  SuggestionResponse::Suggestion suggestionTermTwo;
+  suggestionTermTwo.set_query("termtwo");
+
+  // only search suggestion for namespace2. The correctly order should be
+  // {"termtwo", "termone"}. If we're not filtering out namespace1 when
+  // calculating our score, then it will be {"termone", "termtwo"}.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("t");
+  suggestion_spec.add_namespace_filters("namespace2");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              ElementsAre(EqualsProto(suggestionTermTwo),
+                          EqualsProto(suggestionTermOne)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_DeletionTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionFool;
+  suggestionFool.set_query("fool");
+
+  // namespace1 has this suggestion
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.add_namespace_filters("namespace1");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+  // namespace2 has this suggestion
+  suggestion_spec.clear_namespace_filters();
+  suggestion_spec.add_namespace_filters("namespace2");
+  response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+  // delete document from namespace 1
+  EXPECT_THAT(icing.Delete("namespace1", "uri1").status(), ProtoIsOk());
+
+  // Now namespace1 will return empty
+  suggestion_spec.clear_namespace_filters();
+  suggestion_spec.add_namespace_filters("namespace1");
+  response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(), IsEmpty());
+
+  // namespace2 still has this suggestion, so we can prove the reason of
+  // namespace 1 cannot find it is we filter it out, not it doesn't exist.
+  suggestion_spec.add_namespace_filters("namespace2");
+  response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_ShouldReturnInOneDocument) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionFool;
+  suggestionFool.set_query("fool");
+  SuggestionResponse::Suggestion suggestionFoo;
+  suggestionFoo.set_query("foo");
+
+  // Only search in namespace1,uri1
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+  NamespaceDocumentUriGroup* namespace1_uri1 =
+      suggestion_spec.add_document_uri_filters();
+  namespace1_uri1->set_namespace_("namespace1");
+  namespace1_uri1->add_document_uris("uri1");
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+  // Only search in namespace1,uri2
+  suggestion_spec.clear_document_uri_filters();
+  NamespaceDocumentUriGroup* namespace1_uri2 =
+      suggestion_spec.add_document_uri_filters();
+  namespace1_uri2->set_namespace_("namespace1");
+  namespace1_uri2->add_document_uris("uri2");
+
+  response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_ShouldReturnInMultipleDocument) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace1", "uri3")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fo")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionFool;
+  suggestionFool.set_query("fool");
+  SuggestionResponse::Suggestion suggestionFoo;
+  suggestionFoo.set_query("foo");
+
+  // Only search document in namespace1,uri1 and namespace2,uri2
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+  NamespaceDocumentUriGroup* namespace1_uri1_uri2 =
+      suggestion_spec.add_document_uri_filters();
+  namespace1_uri1_uri2->set_namespace_("namespace1");
+  namespace1_uri1_uri2->add_document_uris("uri1");
+  namespace1_uri1_uri2->add_document_uris("uri2");
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFool),
+                                   EqualsProto(suggestionFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_ShouldReturnInDesiredDocumentAndNamespace) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace3", "uri3")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fo")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionFool;
+  suggestionFool.set_query("fool");
+  SuggestionResponse::Suggestion suggestionFoo;
+  suggestionFoo.set_query("foo");
+
+  // Only search document in namespace1,uri1 and all documents under namespace2
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+  suggestion_spec.add_namespace_filters("namespace1");
+  suggestion_spec.add_namespace_filters("namespace2");
+  NamespaceDocumentUriGroup* namespace1_uri1 =
+      suggestion_spec.add_document_uri_filters();
+  namespace1_uri1->set_namespace_("namespace1");
+  namespace1_uri1->add_document_uris("uri1");
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFool),
+                                   EqualsProto(suggestionFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_DocumentIdDoesntExist) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // Search for a non-exist document id : namespace3,uri3
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+  suggestion_spec.add_namespace_filters("namespace3");
+  NamespaceDocumentUriGroup* namespace3_uri3 =
+      suggestion_spec.add_document_uri_filters();
+  namespace3_uri3->set_namespace_("namespace3");
+  namespace3_uri3->add_document_uris("uri3");
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_DocumentIdFilterDoesntMatchNamespaceFilter) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // Search for the document namespace1,uri1 with namespace filter in
+  // namespace2.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+  NamespaceDocumentUriGroup* namespace1_uri1 =
+      suggestion_spec.add_document_uri_filters();
+  namespace1_uri1->set_namespace_("namespace1");
+  namespace1_uri1->add_document_uris("uri1");
+  suggestion_spec.add_namespace_filters("namespace2");
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_EmptyDocumentIdInNamespace) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+  // Give empty document uris in namespace 1
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+  NamespaceDocumentUriGroup* namespace1_uri1 =
+      suggestion_spec.add_document_uri_filters();
+  namespace1_uri1->set_namespace_("namespace1");
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_ShouldReturnInDesiredSchemaType) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeDocument(
+                                            "Person",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject", "fool")
+          .AddDocumentProperty("sender", DocumentBuilder()
+                                             .SetKey("namespace", "uri1-sender")
+                                             .SetSchema("Person")
+                                             .AddStringProperty("name", "foo")
+                                             .Build())
+          .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri2")
+                                .SetSchema("Message")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("body", "fo")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionFool;
+  suggestionFool.set_query("fool");
+  SuggestionResponse::Suggestion suggestionFoo;
+  suggestionFoo.set_query("foo");
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+  suggestion_spec.add_schema_type_filters("Email");
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFoo),
+                                   EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_SchemaTypeNotFound) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Message")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("body", "fo")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+  suggestion_spec.add_schema_type_filters("Email");
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_ShouldReturnInDesiredProperty) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject", "fool")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri1-sender")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "foo")
+                                   .AddStringProperty("emailAddress", "fo")
+                                   .Build())
+          .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionFool;
+  suggestionFool.set_query("fool");
+  SuggestionResponse::Suggestion suggestionFoo;
+  suggestionFoo.set_query("foo");
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  // Only search in subject.
+  TypePropertyMask* mask = suggestion_spec.add_type_property_filters();
+  mask->set_schema_type("Email");
+  mask->add_paths("subject");
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+  // Search in subject and sender.name
+  suggestion_spec.clear_type_property_filters();
+  mask = suggestion_spec.add_type_property_filters();
+  mask->set_schema_type("Email");
+  mask->add_paths("subject");
+  mask->add_paths("sender.name");
+
+  response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFoo),
+                                   EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_NestedPropertyReturnNothing) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject", "fool")
+          .AddDocumentProperty("sender", DocumentBuilder()
+                                             .SetKey("namespace", "uri1-sender")
+                                             .SetSchema("Person")
+                                             .AddStringProperty("name", "foo")
+                                             .Build())
+          .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  // Only search in Person.name.
+  suggestion_spec.add_schema_type_filters("Person");
+  TypePropertyMask* mask = suggestion_spec.add_type_property_filters();
+  mask->set_schema_type("Person");
+  mask->add_paths("name");
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_PropertyFilterAndSchemaFilter) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeDocument(
+                                            "Person",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject", "fool")
+          .AddDocumentProperty("sender", DocumentBuilder()
+                                             .SetKey("namespace", "uri1-sender")
+                                             .SetSchema("Person")
+                                             .AddStringProperty("name", "foo")
+                                             .Build())
+          .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri2")
+                                .SetSchema("Message")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("body", "fo")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionFoo;
+  suggestionFoo.set_query("foo");
+  SuggestionResponse::Suggestion suggestionFo;
+  suggestionFo.set_query("fo");
+
+  // Search in sender.name of Email and everything in Message.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+  suggestion_spec.add_schema_type_filters("Email");
+  suggestion_spec.add_schema_type_filters("Message");
+  TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters();
+  mask1->set_schema_type("Email");
+  mask1->add_paths("sender.name");
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFoo),
+                                   EqualsProto(suggestionFo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_PropertyFilterNotMatchSchemaFilter) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeDocument(
+                                            "Person",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Message")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("body", "fo")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+  // Search in sender.name of Email but schema type is Message.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+  suggestion_spec.add_schema_type_filters("Message");
+  TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters();
+  mask1->set_schema_type("Email");
+  mask1->add_paths("sender.name");
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_OrderByTermFrequency) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Message")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty(
+              "body", "termthree termthree termthree termtwo termtwo termone")
+          .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+  // Search in sender.name of Email but schema type is Message.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("t");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::EXACT_ONLY);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY);
+
+  SuggestionResponse::Suggestion suggestionTermOne;
+  suggestionTermOne.set_query("termone");
+  SuggestionResponse::Suggestion suggestionTermTwo;
+  suggestionTermTwo.set_query("termtwo");
+  SuggestionResponse::Suggestion suggestionTermThree;
+  suggestionTermThree.set_query("termthree");
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              ElementsAre(EqualsProto(suggestionTermThree),
+                          EqualsProto(suggestionTermTwo),
+                          EqualsProto(suggestionTermOne)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_ExpiredTest) {
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(100)
+                                .SetTtlMs(500)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(100)
+                                .SetTtlMs(1000)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  {
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetSystemTimeMilliseconds(400);
+
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+                ProtoIsOk());
+
+    ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+    SuggestionResponse::Suggestion suggestionFool;
+    suggestionFool.set_query("fool");
+
+    // namespace1 has this suggestion
+    SuggestionSpecProto suggestion_spec;
+    suggestion_spec.set_prefix("f");
+    suggestion_spec.add_namespace_filters("namespace1");
+    suggestion_spec.set_num_to_return(10);
+    suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+        TermMatchType::PREFIX);
+    suggestion_spec.mutable_scoring_spec()->set_rank_by(
+        SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+    SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+    ASSERT_THAT(response.status(), ProtoIsOk());
+    ASSERT_THAT(response.suggestions(),
+                UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+    // namespace2 has this suggestion
+    suggestion_spec.clear_namespace_filters();
+    suggestion_spec.add_namespace_filters("namespace2");
+    response = icing.SearchSuggestions(suggestion_spec);
+    ASSERT_THAT(response.status(), ProtoIsOk());
+    ASSERT_THAT(response.suggestions(),
+                UnorderedElementsAre(EqualsProto(suggestionFool)));
+  }
+  // We reinitialize here so we can feed in a fake clock this time
+  {
+    // Time needs to be past document1 creation time (100) + ttl (500) for it
+    // to count as "expired". document2 is not expired since its ttl is 1000.
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetSystemTimeMilliseconds(800);
+
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    SuggestionSpecProto suggestion_spec;
+    suggestion_spec.set_prefix("f");
+    suggestion_spec.add_namespace_filters("namespace1");
+    suggestion_spec.set_num_to_return(10);
+    suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+        TermMatchType::PREFIX);
+    suggestion_spec.mutable_scoring_spec()->set_rank_by(
+        SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+    // Now namespace1 will return empty
+    suggestion_spec.clear_namespace_filters();
+    suggestion_spec.add_namespace_filters("namespace1");
+    SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+    ASSERT_THAT(response.status(), ProtoIsOk());
+    ASSERT_THAT(response.suggestions(), IsEmpty());
+
+    // namespace2 still has this suggestion
+    SuggestionResponse::Suggestion suggestionFool;
+    suggestionFool.set_query("fool");
+
+    suggestion_spec.add_namespace_filters("namespace2");
+    response = icing.SearchSuggestions(suggestion_spec);
+    ASSERT_THAT(response.status(), ProtoIsOk());
+    ASSERT_THAT(response.suggestions(),
+                UnorderedElementsAre(EqualsProto(suggestionFool)));
+  }
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_emptyPrefix) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_NonPositiveNumToReturn) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("prefix");
+  suggestion_spec.set_num_to_return(0);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_MultipleTerms_And) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "bar fo")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionBarFo;
+  suggestionBarFo.set_query("bar fo");
+
+  // Search "bar AND f" only document 1 should match the search.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("bar f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionBarFo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_MultipleTerms_Or) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "bar fo")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "cat foo")
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace", "uri3")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionBarCatFo;
+  suggestionBarCatFo.set_query("bar OR cat fo");
+  SuggestionResponse::Suggestion suggestionBarCatFoo;
+  suggestionBarCatFoo.set_query("bar OR cat foo");
+
+  // Search for "(bar OR cat) AND f" both document1 "bar fo" and document2 "cat
+  // foo" could match.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("bar OR cat f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionBarCatFo),
+                                   EqualsProto(suggestionBarCatFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_PropertyRestriction) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject", "fool")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri1-sender")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "foo")
+                                   .AddStringProperty("emailAddress", "fo")
+                                   .Build())
+          .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+  // Add property restriction, only search for subject.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("subject:f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  SuggestionResponse::Suggestion suggestionSubjectFool;
+  suggestionSubjectFool.set_query("subject:fool");
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionSubjectFool)));
+
+  // Add property restriction, only search for nested sender.name
+  suggestion_spec.set_prefix("sender.name:f");
+  SuggestionResponse::Suggestion suggestionSenderNameFoo;
+  suggestionSenderNameFoo.set_query("sender.name:foo");
+
+  response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionSenderNameFoo)));
+
+  // Add property restriction, only search for nonExist section
+  suggestion_spec.set_prefix("none:f");
+
+  response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+       SearchSuggestionsTest_AndOperatorPlusPropertyRestriction) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject", "bar fo")  // "bar fo"
+          .AddStringProperty("body", "fool")
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri2")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject", "bar cat foo")  // "bar cat fool"
+          .AddStringProperty("body", "fool")
+          .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace1", "uri3")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")  // "fool"
+                                .AddStringProperty("body", "fool")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  // Search for "bar AND subject:f"
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("bar subject:f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  SuggestionResponse::Suggestion suggestionBarSubjectFo;
+  suggestionBarSubjectFo.set_query("bar subject:fo");
+  SuggestionResponse::Suggestion suggestionBarSubjectFoo;
+  suggestionBarSubjectFoo.set_query("bar subject:foo");
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionBarSubjectFo),
+                                   EqualsProto(suggestionBarSubjectFoo)));
+
+  // Search for "bar AND cat AND subject:f"
+  suggestion_spec.set_prefix("bar cat subject:f");
+  SuggestionResponse::Suggestion suggestionBarCatSubjectFoo;
+  suggestionBarCatSubjectFoo.set_query("bar cat subject:foo");
+
+  response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionBarCatSubjectFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_InvalidPrefixTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject", "bar fo")  // "bar fo"
+          .AddStringProperty("body", "fool")
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri2")
+          .SetSchema("Email")
+          .SetCreationTimestampMs(10)
+          .AddStringProperty("subject", "bar cat foo")  // "bar cat fool"
+          .AddStringProperty("body", "fool")
+          .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace1", "uri3")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")  // "fool"
+                                .AddStringProperty("body", "fool")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  // Search for "f OR"
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f OR");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  suggestion_spec.mutable_scoring_spec()->set_rank_by(
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  if (SearchSpecProto::default_instance().search_type() ==
+      SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+    EXPECT_THAT(response.status(), ProtoIsOk());
+    EXPECT_THAT(response.suggestions(), IsEmpty());
+  } else {
+    EXPECT_THAT(response.status(),
+                ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+    EXPECT_THAT(response.suggestions(), IsEmpty());
+  }
+
+  // TODO(b/208654892): Update handling for hyphens to only consider it a hyphen
+  // within a TEXT token (rather than a MINUS token) when surrounded on both
+  // sides by TEXT rather than just preceded by TEXT.
+  // Search for "f-"
+  suggestion_spec.set_prefix("f-");
+  response = icing.SearchSuggestions(suggestion_spec);
+  EXPECT_THAT(response.status(), ProtoIsOk());
+  EXPECT_THAT(response.suggestions(), IsEmpty());
+
+  // Search for "f:"
+  suggestion_spec.set_prefix("f:");
+  response = icing.SearchSuggestions(suggestion_spec);
+  if (SearchSpecProto::default_instance().search_type() ==
+      SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+    EXPECT_THAT(response.status(), ProtoIsOk());
+    EXPECT_THAT(response.suggestions(), IsEmpty());
+  } else {
+    EXPECT_THAT(response.status(),
+                ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+    EXPECT_THAT(response.suggestions(), IsEmpty());
+  }
+
+  // Search for "OR OR - :"
+  suggestion_spec.set_prefix("OR OR - :");
+  response = icing.SearchSuggestions(suggestion_spec);
+  if (SearchSpecProto::default_instance().search_type() ==
+      SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+    EXPECT_THAT(response.status(), ProtoIsOk());
+    EXPECT_THAT(response.suggestions(), IsEmpty());
+  } else {
+    EXPECT_THAT(response.status(),
+                ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+    EXPECT_THAT(response.suggestions(), IsEmpty());
+  }
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index b0946c9..ddb83a8 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -26,19 +26,30 @@
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/mock-filesystem.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/portable/endian.h"
 #include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
 #include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
 #include "icing/proto/search.pb.h"
 #include "icing/proto/status.pb.h"
-#include "icing/schema/schema-store.h"
-#include "icing/schema/section.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
-#include "icing/testing/snippet-helpers.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 
@@ -48,13 +59,11 @@ namespace lib {
 namespace {
 
 using ::icing::lib::portable_equals_proto::EqualsProto;
-using ::testing::_;
 using ::testing::Eq;
+using ::testing::Ge;
 using ::testing::Gt;
 using ::testing::HasSubstr;
 using ::testing::IsEmpty;
-using ::testing::Lt;
-using ::testing::Matcher;
 using ::testing::Return;
 using ::testing::SizeIs;
 using ::testing::StrEq;
@@ -65,19 +74,33 @@ class TestIcingSearchEngine : public IcingSearchEngine {
  public:
   TestIcingSearchEngine(const IcingSearchEngineOptions& options,
                         std::unique_ptr<const Filesystem> filesystem,
-                        std::unique_ptr<FakeClock> clock)
-      : IcingSearchEngine(options, std::move(filesystem), std::move(clock)) {}
+                        std::unique_ptr<const IcingFilesystem> icing_filesystem,
+                        std::unique_ptr<Clock> clock,
+                        std::unique_ptr<JniCache> jni_cache)
+      : IcingSearchEngine(options, std::move(filesystem),
+                          std::move(icing_filesystem), std::move(clock),
+                          std::move(jni_cache)) {}
 };
 
 std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
 
+// This test is meant to cover all tests relating to IcingSearchEngine apis not
+// specifically covered by the other IcingSearchEngine*Test.
 class IcingSearchEngineTest : public testing::Test {
  protected:
   void SetUp() override {
-    // File generated via icu_data_file rule in //icing/BUILD.
-    std::string icu_data_file_path =
-        GetTestFilePath("icing/icu.dat");
-    ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      // If we've specified using the reverse-JNI method for segmentation (i.e.
+      // not ICU), then we won't have the ICU data file included to set up.
+      // Technically, we could choose to use reverse-JNI for segmentation AND
+      // include an ICU data file, but that seems unlikely and our current BUILD
+      // setup doesn't do this.
+      // File generated via icu_data_file rule in //icing/BUILD.
+      std::string icu_data_file_path =
+          GetTestFilePath("icing/icu.dat");
+      ICING_ASSERT_OK(
+          icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+    }
     filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
   }
 
@@ -91,21 +114,9 @@ class IcingSearchEngineTest : public testing::Test {
   Filesystem filesystem_;
 };
 
-constexpr int kMaxSupportedDocumentSize = (1u << 24) - 1;
-
 // Non-zero value so we don't override it to be the current time
 constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
 
-std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; }
-
-std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; }
-
-std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
-
-std::string GetHeaderFilename() {
-  return GetTestBaseDir() + "/icing_search_engine_header";
-}
-
 IcingSearchEngineOptions GetDefaultIcingOptions() {
   IcingSearchEngineOptions icing_options;
   icing_options.set_base_dir(GetTestBaseDir());
@@ -122,42 +133,48 @@ DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
 }
 
 SchemaProto CreateMessageSchema() {
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("Message");
-
-  auto body = type->add_properties();
-  body->set_property_name("body");
-  body->set_data_type(PropertyConfigProto::DataType::STRING);
-  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-
-  return schema;
+  return SchemaBuilder()
+      .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+          PropertyConfigBuilder()
+              .SetName("body")
+              .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+              .SetCardinality(CARDINALITY_REQUIRED)))
+      .Build();
 }
 
-SchemaProto CreateEmailSchema() {
-  SchemaProto schema;
-  auto* type = schema.add_types();
-  type->set_schema_type("Email");
-
-  auto* body = type->add_properties();
-  body->set_property_name("body");
-  body->set_data_type(PropertyConfigProto::DataType::STRING);
-  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-  auto* subj = type->add_properties();
-  subj->set_property_name("subject");
-  subj->set_data_type(PropertyConfigProto::DataType::STRING);
-  subj->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  subj->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  subj->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-
-  return schema;
+SchemaProto CreatePersonAndEmailSchema() {
+  return SchemaBuilder()
+      .AddType(SchemaTypeConfigBuilder()
+                   .SetType("Person")
+                   .AddProperty(PropertyConfigBuilder()
+                                    .SetName("name")
+                                    .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                       TOKENIZER_PLAIN)
+                                    .SetCardinality(CARDINALITY_OPTIONAL))
+                   .AddProperty(PropertyConfigBuilder()
+                                    .SetName("emailAddress")
+                                    .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                       TOKENIZER_PLAIN)
+                                    .SetCardinality(CARDINALITY_OPTIONAL)))
+      .AddType(
+          SchemaTypeConfigBuilder()
+              .SetType("Email")
+              .AddProperty(
+                  PropertyConfigBuilder()
+                      .SetName("body")
+                      .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                      .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(
+                  PropertyConfigBuilder()
+                      .SetName("subject")
+                      .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                      .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(PropertyConfigBuilder()
+                               .SetName("sender")
+                               .SetDataTypeDocument(
+                                   "Person", /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+      .Build();
 }
 
 ScoringSpecProto GetDefaultScoringSpec() {
@@ -166,634 +183,38 @@ ScoringSpecProto GetDefaultScoringSpec() {
   return scoring_spec;
 }
 
-TEST_F(IcingSearchEngineTest, SimpleInitialization) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  DocumentProto document = CreateMessageDocument("namespace", "uri");
-  ASSERT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(DocumentProto(document)).status().code(),
-              Eq(StatusProto::OK));
-}
-
-TEST_F(IcingSearchEngineTest, InitializingAgainSavesNonPersistedData) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  DocumentProto document = CreateMessageDocument("namespace", "uri");
-  ASSERT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
-
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() = document;
-
-  ASSERT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
-
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, MaxIndexMergeSizeReturnsInvalidArgument) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  options.set_index_merge_size(std::numeric_limits<int32_t>::max());
-  IcingSearchEngine icing(options);
-  EXPECT_THAT(icing.Initialize().status().code(),
-              Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, NegativeMergeSizeReturnsInvalidArgument) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  options.set_index_merge_size(-1);
-  IcingSearchEngine icing(options);
-  EXPECT_THAT(icing.Initialize().status().code(),
-              Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, ZeroMergeSizeReturnsInvalidArgument) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  options.set_index_merge_size(0);
-  IcingSearchEngine icing(options);
-  EXPECT_THAT(icing.Initialize().status().code(),
-              Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, GoodIndexMergeSizeReturnsOk) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  // One is fine, if a bit weird. It just means that the lite index will be
-  // smaller and will request a merge any time content is added to it.
-  options.set_index_merge_size(1);
-  IcingSearchEngine icing(options);
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-}
-
-TEST_F(IcingSearchEngineTest,
-       NegativeMaxTokensPerDocSizeReturnsInvalidArgument) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  options.set_max_tokens_per_doc(-1);
-  IcingSearchEngine icing(options);
-  EXPECT_THAT(icing.Initialize().status().code(),
-              Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, ZeroMaxTokensPerDocSizeReturnsInvalidArgument) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  options.set_max_tokens_per_doc(0);
-  IcingSearchEngine icing(options);
-  EXPECT_THAT(icing.Initialize().status().code(),
-              Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, GoodMaxTokensPerDocSizeReturnsOk) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  // INT_MAX is valid - it just means that we shouldn't limit the number of
-  // tokens per document. It would be pretty inconceivable that anyone would
-  // produce such a document - the text being indexed alone would take up at
-  // least ~4.3 GiB! - and the document would be rejected before indexing
-  // for exceeding max_document_size, but there's no reason to explicitly
-  // bar it.
-  options.set_max_tokens_per_doc(std::numeric_limits<int32_t>::max());
-  IcingSearchEngine icing(options);
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-}
-
-TEST_F(IcingSearchEngineTest, NegativeMaxTokenLenReturnsInvalidArgument) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  options.set_max_token_length(-1);
-  IcingSearchEngine icing(options);
-  EXPECT_THAT(icing.Initialize().status().code(),
-              Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, ZeroMaxTokenLenReturnsInvalidArgument) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  options.set_max_token_length(0);
-  IcingSearchEngine icing(options);
-  EXPECT_THAT(icing.Initialize().status().code(),
-              Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, MaxTokenLenReturnsOkAndTruncatesTokens) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  // A length of 1 is allowed - even though it would be strange to want
-  // this.
-  options.set_max_token_length(1);
-  IcingSearchEngine icing(options);
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  DocumentProto document = CreateMessageDocument("namespace", "uri");
-  EXPECT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
-
-  // "message" should have been truncated to "m"
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  // The indexed tokens were  truncated to length of 1, so "m" will match
-  search_spec.set_query("m");
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document;
-
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-
-  // The query token is also truncated to length of 1, so "me"->"m" matches "m"
-  search_spec.set_query("me");
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-
-  // The query token is still truncated to length of 1, so "massage"->"m"
-  // matches "m"
-  search_spec.set_query("massage");
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
-       MaxIntMaxTokenLenReturnsOkTooLargeTokenReturnsResourceExhausted) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  // Set token length to max. This is allowed (it just means never to
-  // truncate tokens). However, this does mean that tokens that exceed the
-  // size of the lexicon will cause indexing to fail.
-  options.set_max_token_length(std::numeric_limits<int32_t>::max());
-  IcingSearchEngine icing(options);
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  // Add a document that just barely fits under the max document limit.
-  // This will still fail to index because we won't actually have enough
-  // room in the lexicon to fit this content.
-  std::string enormous_string(kMaxSupportedDocumentSize - 256, 'p');
-  DocumentProto document =
-      DocumentBuilder()
-          .SetKey("namespace", "uri")
-          .SetSchema("Message")
-          .AddStringProperty("body", std::move(enormous_string))
-          .Build();
-  EXPECT_THAT(icing.Put(document).status().code(),
-              Eq(StatusProto::OUT_OF_SPACE));
-
-  SearchSpecProto search_spec;
-  search_spec.set_query("p");
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, FailToCreateDocStore) {
-  auto mock_filesystem = std::make_unique<MockFilesystem>();
-  // This fails DocumentStore::Create()
-  ON_CALL(*mock_filesystem, CreateDirectoryRecursively(_))
-      .WillByDefault(Return(false));
-
-  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                              std::move(mock_filesystem),
-                              std::make_unique<FakeClock>());
-
-  InitializeResultProto initialize_result_proto = icing.Initialize();
-  EXPECT_THAT(initialize_result_proto.status().code(),
-              Eq(StatusProto::INTERNAL));
-  EXPECT_THAT(initialize_result_proto.status().message(),
-              HasSubstr("Could not create directory"));
-}
-
-TEST_F(IcingSearchEngineTest,
-       CircularReferenceCreateSectionManagerReturnsInvalidArgument) {
-  // Create a type config with a circular reference.
-  SchemaProto schema;
-  auto* type = schema.add_types();
-  type->set_schema_type("Message");
-
-  auto* body = type->add_properties();
-  body->set_property_name("recipient");
-  body->set_schema_type("Person");
-  body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-
-  type = schema.add_types();
-  type->set_schema_type("Person");
-
-  body = type->add_properties();
-  body->set_property_name("recipient");
-  body->set_schema_type("Message");
-  body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(schema).status().code(),
-              Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, PutWithoutSchemaFailedPrecondition) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  DocumentProto document = CreateMessageDocument("namespace", "uri");
-  PutResultProto put_result_proto = icing.Put(document);
-  EXPECT_THAT(put_result_proto.status().code(),
-              Eq(StatusProto::FAILED_PRECONDITION));
-  EXPECT_THAT(put_result_proto.status().message(), HasSubstr("Schema not set"));
-}
-
-TEST_F(IcingSearchEngineTest, FailToReadSchema) {
-  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
-  {
-    // Successfully initialize and set a schema
-    IcingSearchEngine icing(icing_options);
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-  }
-
-  auto mock_filesystem = std::make_unique<MockFilesystem>();
-
-  // This fails FileBackedProto::Read() when we try to check the schema we
-  // had previously set
-  ON_CALL(*mock_filesystem,
-          OpenForRead(Eq(icing_options.base_dir() + "/schema_dir/schema.pb")))
-      .WillByDefault(Return(-1));
-
-  TestIcingSearchEngine test_icing(icing_options, std::move(mock_filesystem),
-                                   std::make_unique<FakeClock>());
-
-  InitializeResultProto initialize_result_proto = test_icing.Initialize();
-  EXPECT_THAT(initialize_result_proto.status().code(),
-              Eq(StatusProto::INTERNAL));
-  EXPECT_THAT(initialize_result_proto.status().message(),
-              HasSubstr("Unable to open file for read"));
-}
-
-TEST_F(IcingSearchEngineTest, FailToWriteSchema) {
-  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
-  auto mock_filesystem = std::make_unique<MockFilesystem>();
-  // This fails FileBackedProto::Write()
-  ON_CALL(*mock_filesystem,
-          OpenForWrite(Eq(icing_options.base_dir() + "/schema_dir/schema.pb")))
-      .WillByDefault(Return(-1));
-
-  TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
-                              std::make_unique<FakeClock>());
-
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  SetSchemaResultProto set_schema_result_proto =
-      icing.SetSchema(CreateMessageSchema());
-  EXPECT_THAT(set_schema_result_proto.status().code(),
-              Eq(StatusProto::INTERNAL));
-  EXPECT_THAT(set_schema_result_proto.status().message(),
-              HasSubstr("Unable to open file for write"));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchema) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  auto message_document = CreateMessageDocument("namespace", "uri");
-
-  auto schema_with_message = CreateMessageSchema();
-
-  SchemaProto schema_with_email;
-  SchemaTypeConfigProto* type = schema_with_email.add_types();
-  type->set_schema_type("Email");
-  PropertyConfigProto* property = type->add_properties();
-  property->set_property_name("title");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-  SchemaProto schema_with_email_and_message = schema_with_email;
-  type = schema_with_email_and_message.add_types();
-  type->set_schema_type("Message");
-  property = type->add_properties();
-  property->set_property_name("body");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-  // Create an arbitrary invalid schema
-  SchemaProto invalid_schema;
-  SchemaTypeConfigProto* empty_type = invalid_schema.add_types();
-  empty_type->set_schema_type("");
-
-  // Make sure we can't set invalid schemas
-  EXPECT_THAT(icing.SetSchema(invalid_schema).status().code(),
-              Eq(StatusProto::INVALID_ARGUMENT));
-
-  // Can add an document of a set schema
-  EXPECT_THAT(icing.SetSchema(schema_with_message).status().code(),
-              Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Put(message_document).status().code(), Eq(StatusProto::OK));
-
-  // Schema with Email doesn't have Message, so would result incompatible
-  // data
-  EXPECT_THAT(icing.SetSchema(schema_with_email).status().code(),
-              Eq(StatusProto::FAILED_PRECONDITION));
-
-  // Can expand the set of schema types and add an document of a new
-  // schema type
-  EXPECT_THAT(icing.SetSchema(SchemaProto(schema_with_email_and_message))
-                  .status()
-                  .code(),
-              Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Put(message_document).status().code(), Eq(StatusProto::OK));
-
-  // Can't add an document whose schema isn't set
-  auto photo_document = DocumentBuilder()
-                            .SetKey("namespace", "uri")
-                            .SetSchema("Photo")
-                            .AddStringProperty("creator", "icing")
-                            .Build();
-  PutResultProto put_result_proto = icing.Put(photo_document);
-  EXPECT_THAT(put_result_proto.status().code(), Eq(StatusProto::NOT_FOUND));
-  EXPECT_THAT(put_result_proto.status().message(),
-              HasSubstr("'Photo' not found"));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  SchemaProto schema_with_no_indexed_property = CreateMessageSchema();
-  schema_with_no_indexed_property.mutable_types(0)
-      ->mutable_properties(0)
-      ->clear_indexing_config();
-
-  EXPECT_THAT(icing.SetSchema(schema_with_no_indexed_property).status().code(),
-              Eq(StatusProto::OK));
-  // Nothing will be index and Search() won't return anything.
-  EXPECT_THAT(
-      icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-      Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_query("message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
-  SearchResultProto empty_result;
-  empty_result.mutable_status()->set_code(StatusProto::OK);
-
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(empty_result));
-
-  SchemaProto schema_with_indexed_property = CreateMessageSchema();
-  // Index restoration should be triggered here because new schema requires more
-  // properties to be indexed.
-  EXPECT_THAT(icing.SetSchema(schema_with_indexed_property).status().code(),
-              Eq(StatusProto::OK));
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      CreateMessageDocument("namespace", "uri");
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  SchemaProto schema_with_optional_subject;
-  auto type = schema_with_optional_subject.add_types();
-  type->set_schema_type("email");
-
-  // Add a OPTIONAL property
-  auto property = type->add_properties();
-  property->set_property_name("subject");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-  EXPECT_THAT(icing.SetSchema(schema_with_optional_subject).status().code(),
-              Eq(StatusProto::OK));
-
-  DocumentProto email_document_without_subject =
-      DocumentBuilder()
-          .SetKey("namespace", "without_subject")
-          .SetSchema("email")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  DocumentProto email_document_with_subject =
-      DocumentBuilder()
-          .SetKey("namespace", "with_subject")
-          .SetSchema("email")
-          .AddStringProperty("subject", "foo")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-
-  EXPECT_THAT(icing.Put(email_document_without_subject).status().code(),
-              Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Put(email_document_with_subject).status().code(),
-              Eq(StatusProto::OK));
-
-  SchemaProto schema_with_required_subject;
-  type = schema_with_required_subject.add_types();
-  type->set_schema_type("email");
-
-  // Add a REQUIRED property
-  property = type->add_properties();
-  property->set_property_name("subject");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  // Can't set the schema since it's incompatible
-  SetSchemaResultProto expected_set_schema_result_proto;
-  expected_set_schema_result_proto.mutable_status()->set_code(
-      StatusProto::FAILED_PRECONDITION);
-  expected_set_schema_result_proto.mutable_status()->set_message(
-      "Schema is incompatible.");
-  expected_set_schema_result_proto.add_incompatible_schema_types("email");
-
-  EXPECT_THAT(icing.SetSchema(schema_with_required_subject),
-              EqualsProto(expected_set_schema_result_proto));
-
-  // Force set it
-  expected_set_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
-  expected_set_schema_result_proto.mutable_status()->clear_message();
-  EXPECT_THAT(icing.SetSchema(schema_with_required_subject,
-                              /*ignore_errors_and_delete_documents=*/true),
-              EqualsProto(expected_set_schema_result_proto));
-
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() = email_document_with_subject;
-
-  EXPECT_THAT(icing.Get("namespace", "with_subject"),
-              EqualsProto(expected_get_result_proto));
-
-  // The document without a subject got deleted because it failed validation
-  // against the new schema
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
-  expected_get_result_proto.mutable_status()->set_message(
-      "Document (namespace, without_subject) not found.");
-  expected_get_result_proto.clear_document();
-
-  EXPECT_THAT(icing.Get("namespace", "without_subject"),
-              EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaDeletesDocumentsAndReturnsOk) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("email");
-  type = schema.add_types();
-  type->set_schema_type("message");
-
-  EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
-
-  DocumentProto email_document =
-      DocumentBuilder()
-          .SetKey("namespace", "email_uri")
-          .SetSchema("email")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  DocumentProto message_document =
-      DocumentBuilder()
-          .SetKey("namespace", "message_uri")
-          .SetSchema("message")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-
-  EXPECT_THAT(icing.Put(email_document).status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Put(message_document).status().code(), Eq(StatusProto::OK));
-
-  // Clear the schema and only add the "email" type, essentially deleting the
-  // "message" type
-  SchemaProto new_schema;
-  type = new_schema.add_types();
-  type->set_schema_type("email");
-
-  // Can't set the schema since it's incompatible
-  SetSchemaResultProto expected_result;
-  expected_result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
-  expected_result.mutable_status()->set_message("Schema is incompatible.");
-  expected_result.add_deleted_schema_types("message");
-
-  EXPECT_THAT(icing.SetSchema(new_schema), EqualsProto(expected_result));
-
-  // Force set it
-  expected_result.mutable_status()->set_code(StatusProto::OK);
-  expected_result.mutable_status()->clear_message();
-  EXPECT_THAT(icing.SetSchema(new_schema,
-                              /*ignore_errors_and_delete_documents=*/true),
-              EqualsProto(expected_result));
-
-  // "email" document is still there
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() = email_document;
-
-  EXPECT_THAT(icing.Get("namespace", "email_uri"),
-              EqualsProto(expected_get_result_proto));
-
-  // "message" document got deleted
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
-  expected_get_result_proto.mutable_status()->set_message(
-      "Document (namespace, message_uri) not found.");
-  expected_get_result_proto.clear_document();
-
-  EXPECT_THAT(icing.Get("namespace", "message_uri"),
-              EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, GetSchemaNotFound) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  EXPECT_THAT(icing.GetSchema().status().code(), Eq(StatusProto::NOT_FOUND));
-}
-
-TEST_F(IcingSearchEngineTest, GetSchemaOk) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  GetSchemaResultProto expected_get_schema_result_proto;
-  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
-  EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, GetSchemaTypeFailedPrecondition) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  GetSchemaTypeResultProto get_schema_type_result_proto =
-      icing.GetSchemaType("nonexistent_schema");
-  EXPECT_THAT(get_schema_type_result_proto.status().code(),
-              Eq(StatusProto::FAILED_PRECONDITION));
-  EXPECT_THAT(get_schema_type_result_proto.status().message(),
-              HasSubstr("Schema not set"));
-}
-
-TEST_F(IcingSearchEngineTest, GetSchemaTypeOk) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  GetSchemaTypeResultProto expected_get_schema_type_result_proto;
-  expected_get_schema_type_result_proto.mutable_status()->set_code(
-      StatusProto::OK);
-  *expected_get_schema_type_result_proto.mutable_schema_type_config() =
-      CreateMessageSchema().types(0);
-  EXPECT_THAT(icing.GetSchemaType(CreateMessageSchema().types(0).schema_type()),
-              EqualsProto(expected_get_schema_type_result_proto));
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+                              int64_t timestamp_ms,
+                              UsageReport::UsageType usage_type) {
+  UsageReport usage_report;
+  usage_report.set_document_namespace(name_space);
+  usage_report.set_document_uri(uri);
+  usage_report.set_usage_timestamp_ms(timestamp_ms);
+  usage_report.set_usage_type(usage_type);
+  return usage_report;
 }
 
 TEST_F(IcingSearchEngineTest, GetDocument) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
 
   // Simple put and get
-  ASSERT_THAT(
-      icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-      Eq(StatusProto::OK));
+  ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+              ProtoIsOk());
 
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() =
       CreateMessageDocument("namespace", "uri");
-  ASSERT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
+  ASSERT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Put an invalid document
   PutResultProto put_result_proto = icing.Put(DocumentProto());
-  EXPECT_THAT(put_result_proto.status().code(),
-              Eq(StatusProto::INVALID_ARGUMENT));
+  EXPECT_THAT(put_result_proto.status(),
+              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
   EXPECT_THAT(put_result_proto.status().message(),
               HasSubstr("'namespace' is empty"));
 
@@ -802,1785 +223,624 @@ TEST_F(IcingSearchEngineTest, GetDocument) {
   expected_get_result_proto.mutable_status()->set_message(
       "Document (wrong, uri) not found.");
   expected_get_result_proto.clear_document();
-  ASSERT_THAT(icing.Get("wrong", "uri"),
+  ASSERT_THAT(icing.Get("wrong", "uri", GetResultSpecProto::default_instance()),
               EqualsProto(expected_get_result_proto));
 }
 
-TEST_F(IcingSearchEngineTest, SearchReturnsValidResults) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
-  ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
-
-  DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
-  ASSERT_THAT(icing.Put(document_two).status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("message");
-
-  ResultSpecProto result_spec;
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
-  result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
-  result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
-
-  SearchResultProto results =
-      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
-  EXPECT_THAT(results.status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(results.results(), SizeIs(2));
-  EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
-  EXPECT_THAT(GetMatch(results.results(0).document(),
-                       results.results(0).snippet(), "body",
-                       /*snippet_index=*/0),
-              Eq("message"));
-  EXPECT_THAT(
-      GetWindow(results.results(0).document(), results.results(0).snippet(),
-                "body", /*snippet_index=*/0),
-      Eq("message body"));
-  EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
-  EXPECT_THAT(
-      GetMatch(results.results(1).document(), results.results(1).snippet(),
-               "body", /*snippet_index=*/0),
-      IsEmpty());
-  EXPECT_THAT(
-      GetWindow(results.results(1).document(), results.results(1).snippet(),
-                "body", /*snippet_index=*/0),
-      IsEmpty());
-
-  search_spec.set_query("foo");
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
-  ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
-
-  DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
-  ASSERT_THAT(icing.Put(document_two).status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("message");
-
-  ResultSpecProto result_spec;
-  result_spec.set_num_per_page(1);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document_two;
-
-  SearchResultProto search_result_proto =
-      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
-  EXPECT_THAT(search_result_proto.status().code(), Eq(StatusProto::OK));
-  // The token is a random number so we don't verify it.
-  expected_search_result_proto.set_next_page_token(
-      search_result_proto.next_page_token());
-  EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("");
-
-  ResultSpecProto result_spec;
-  result_spec.set_num_per_page(0);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), result_spec),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("");
-
-  ResultSpecProto result_spec;
-  result_spec.set_num_per_page(-5);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(
-      StatusProto::INVALID_ARGUMENT);
-  expected_search_result_proto.mutable_status()->set_message(
-      "ResultSpecProto.num_per_page cannot be negative.");
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), result_spec),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) {
-  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
-  {
-    // Set the schema up beforehand.
-    IcingSearchEngine icing(icing_options);
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    // Schema will be persisted to disk when icing goes out of scope.
-  }
-
-  {
-    // Ensure that icing initializes the schema and section_manager
-    // properly from the pre-existing file.
-    IcingSearchEngine icing(icing_options);
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-    EXPECT_THAT(
-        icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-        Eq(StatusProto::OK));
-    // The index and document store will be persisted to disk when icing goes
-    // out of scope.
-  }
-
-  {
-    // Ensure that the index is brought back up without problems and we
-    // can query for the content that we expect.
-    IcingSearchEngine icing(icing_options);
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-    SearchSpecProto search_spec;
-    search_spec.set_term_match_type(TermMatchType::PREFIX);
-    search_spec.set_query("message");
-
-    SearchResultProto expected_search_result_proto;
-    expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-    *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-        CreateMessageDocument("namespace", "uri");
-
-    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                             ResultSpecProto::default_instance()),
-                EqualsProto(expected_search_result_proto));
-
-    search_spec.set_query("foo");
-
-    SearchResultProto empty_result;
-    empty_result.mutable_status()->set_code(StatusProto::OK);
-    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                             ResultSpecProto::default_instance()),
-                EqualsProto(empty_result));
-  }
-}
-
-TEST_F(IcingSearchEngineTest, SearchShouldReturnEmpty) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("message");
-
-  // Empty result, no next-page token
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
-  SearchResultProto search_result_proto =
-      icing.Search(search_spec, GetDefaultScoringSpec(),
-                   ResultSpecProto::default_instance());
-
-  EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  // Creates and inserts 5 documents
-  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
-  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
-  DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
-  DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
-  DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
-  ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document4).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document5).status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("message");
-
-  ResultSpecProto result_spec;
-  result_spec.set_num_per_page(2);
-
-  // Searches and gets the first page, 2 results
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document5;
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document4;
-  SearchResultProto search_result_proto =
-      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
-  EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
-  uint64_t next_page_token = search_result_proto.next_page_token();
-  // Since the token is a random number, we don't need to verify
-  expected_search_result_proto.set_next_page_token(next_page_token);
-  EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
-
-  // Second page, 2 results
-  expected_search_result_proto.clear_results();
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document3;
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document2;
-  EXPECT_THAT(icing.GetNextPage(next_page_token),
-              EqualsProto(expected_search_result_proto));
-
-  // Third page, 1 result
-  expected_search_result_proto.clear_results();
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document1;
-  EXPECT_THAT(icing.GetNextPage(next_page_token),
-              EqualsProto(expected_search_result_proto));
-
-  // No more results
-  expected_search_result_proto.clear_results();
-  expected_search_result_proto.clear_next_page_token();
-  EXPECT_THAT(icing.GetNextPage(next_page_token),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  // Creates and inserts 5 documents
-  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
-  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
-  DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
-  DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
-  DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
-  ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document4).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document5).status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("message");
-
-  ScoringSpecProto scoring_spec;
-  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
-
-  ResultSpecProto result_spec;
-  result_spec.set_num_per_page(2);
-
-  // Searches and gets the first page, 2 results
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document5;
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document4;
-  SearchResultProto search_result_proto =
-      icing.Search(search_spec, scoring_spec, result_spec);
-  EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
-  uint64_t next_page_token = search_result_proto.next_page_token();
-  // Since the token is a random number, we don't need to verify
-  expected_search_result_proto.set_next_page_token(next_page_token);
-  EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
-
-  // Second page, 2 results
-  expected_search_result_proto.clear_results();
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document3;
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document2;
-  EXPECT_THAT(icing.GetNextPage(next_page_token),
-              EqualsProto(expected_search_result_proto));
-
-  // Third page, 1 result
-  expected_search_result_proto.clear_results();
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document1;
-  EXPECT_THAT(icing.GetNextPage(next_page_token),
-              EqualsProto(expected_search_result_proto));
-
-  // No more results
-  expected_search_result_proto.clear_results();
-  expected_search_result_proto.clear_next_page_token();
-  EXPECT_THAT(icing.GetNextPage(next_page_token),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  // Creates and inserts 5 documents
-  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
-  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
-  DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
-  DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
-  DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
-  ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document4).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document5).status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("message");
-
-  ResultSpecProto result_spec;
-  result_spec.set_num_per_page(2);
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
-  result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
-  result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
-
-  // Searches and gets the first page, 2 results with 2 snippets
-  SearchResultProto search_result =
-      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
-  ASSERT_THAT(search_result.status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(search_result.results(), SizeIs(2));
-  ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
-
-  EXPECT_THAT(search_result.results(0).document(), EqualsProto(document5));
-  EXPECT_THAT(GetMatch(search_result.results(0).document(),
-                       search_result.results(0).snippet(), "body",
-                       /*snippet_index=*/0),
-              Eq("message"));
-  EXPECT_THAT(GetWindow(search_result.results(0).document(),
-                        search_result.results(0).snippet(), "body",
-                        /*snippet_index=*/0),
-              Eq("message body"));
-  EXPECT_THAT(search_result.results(1).document(), EqualsProto(document4));
-  EXPECT_THAT(GetMatch(search_result.results(1).document(),
-                       search_result.results(1).snippet(), "body",
-                       /*snippet_index=*/0),
-              Eq("message"));
-  EXPECT_THAT(GetWindow(search_result.results(1).document(),
-                        search_result.results(1).snippet(), "body",
-                        /*snippet_index=*/0),
-              Eq("message body"));
-
-  // Second page, 2 result with 1 snippet
-  search_result = icing.GetNextPage(search_result.next_page_token());
-  ASSERT_THAT(search_result.status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(search_result.results(), SizeIs(2));
-  ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
-
-  EXPECT_THAT(search_result.results(0).document(), EqualsProto(document3));
-  EXPECT_THAT(GetMatch(search_result.results(0).document(),
-                       search_result.results(0).snippet(), "body",
-                       /*snippet_index=*/0),
-              Eq("message"));
-  EXPECT_THAT(GetWindow(search_result.results(0).document(),
-                        search_result.results(0).snippet(), "body",
-                        /*snippet_index=*/0),
-              Eq("message body"));
-  EXPECT_THAT(search_result.results(1).document(), EqualsProto(document2));
-  EXPECT_THAT(search_result.results(1).snippet().entries_size(), Eq(0));
-
-  // Third page, 1 result with 0 snippets
-  search_result = icing.GetNextPage(search_result.next_page_token());
-  ASSERT_THAT(search_result.status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(search_result.results(), SizeIs(1));
-  ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
-
-  EXPECT_THAT(search_result.results(0).document(), EqualsProto(document1));
-  EXPECT_THAT(search_result.results(0).snippet().entries_size(), Eq(0));
-}
-
-TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
-  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
-  ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("message");
-
-  ResultSpecProto result_spec;
-  result_spec.set_num_per_page(1);
-
-  // Searches and gets the first page, 1 result
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document2;
-  SearchResultProto search_result_proto =
-      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
-  EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
-  uint64_t next_page_token = search_result_proto.next_page_token();
-  // Since the token is a random number, we don't need to verify
-  expected_search_result_proto.set_next_page_token(next_page_token);
-  EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
-  // Now document1 is still to be fetched.
-
-  // Invalidates token
-  icing.InvalidateNextPageToken(next_page_token);
-
-  // Tries to fetch the second page, no result since it's invalidated
-  expected_search_result_proto.clear_results();
-  expected_search_result_proto.clear_next_page_token();
-  EXPECT_THAT(icing.GetNextPage(next_page_token),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
-       AllPageTokensShouldBeInvalidatedAfterOptimization) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
+TEST_F(IcingSearchEngineTest, GetDocumentProjectionEmpty) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
 
-  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
-  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
-  ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("message");
-
-  ResultSpecProto result_spec;
-  result_spec.set_num_per_page(1);
-
-  // Searches and gets the first page, 1 result
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document2;
-  SearchResultProto search_result_proto =
-      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
-  EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
-  uint64_t next_page_token = search_result_proto.next_page_token();
-  // Since the token is a random number, we don't need to verify
-  expected_search_result_proto.set_next_page_token(next_page_token);
-  EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
-  // Now document1 is still to be fetched.
-
-  OptimizeResultProto optimize_result_proto;
-  optimize_result_proto.mutable_status()->set_code(StatusProto::OK);
-  optimize_result_proto.mutable_status()->set_message("");
-  ASSERT_THAT(icing.Optimize(), EqualsProto(optimize_result_proto));
-
-  // Tries to fetch the second page, no results since all tokens have been
-  // invalidated during Optimize()
-  expected_search_result_proto.clear_results();
-  expected_search_result_proto.clear_next_page_token();
-  EXPECT_THAT(icing.GetNextPage(next_page_token),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) {
-  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
 
-  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+  GetResultSpecProto result_spec;
+  TypePropertyMask* mask = result_spec.add_type_property_masks();
+  mask->set_schema_type(document.schema());
+  mask->add_paths("");
 
   GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
-  expected_get_result_proto.mutable_status()->set_message(
-      "Document (namespace, uri1) not found.");
-  {
-    IcingSearchEngine icing(icing_options);
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-
-    // Deletes document1
-    ASSERT_THAT(icing.Delete("namespace", "uri1").status().code(),
-                Eq(StatusProto::OK));
-    const std::string document_log_path =
-        icing_options.base_dir() + "/document_dir/document_log";
-    int64_t document_log_size_before =
-        filesystem()->GetFileSize(document_log_path.c_str());
-    ASSERT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
-    int64_t document_log_size_after =
-        filesystem()->GetFileSize(document_log_path.c_str());
-
-    // Validates that document can't be found right after Optimize()
-    EXPECT_THAT(icing.Get("namespace", "uri1"),
-                EqualsProto(expected_get_result_proto));
-    // Validates that document is actually removed from document log
-    EXPECT_THAT(document_log_size_after, Lt(document_log_size_before));
-  }  // Destroys IcingSearchEngine to make sure nothing is cached.
-
-  IcingSearchEngine icing(icing_options);
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Get("namespace", "uri1"),
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document;
+  expected_get_result_proto.mutable_document()->clear_properties();
+  ASSERT_THAT(icing.Get("namespace", "uri", result_spec),
               EqualsProto(expected_get_result_proto));
 }
 
-TEST_F(IcingSearchEngineTest, OptimizationShouldDeleteTemporaryDirectory) {
-  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-  IcingSearchEngine icing(icing_options);
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  // Create a tmp dir that will be used in Optimize() to swap files,
-  // this validates that any tmp dirs will be deleted before using.
-  const std::string tmp_dir =
-      icing_options.base_dir() + "/document_dir_optimize_tmp";
-
-  const std::string tmp_file = tmp_dir + "/file";
-  ASSERT_TRUE(filesystem()->CreateDirectory(tmp_dir.c_str()));
-  ScopedFd fd(filesystem()->OpenForWrite(tmp_file.c_str()));
-  ASSERT_TRUE(fd.is_valid());
-  ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
-  fd.reset();
-
-  EXPECT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
-
-  EXPECT_FALSE(filesystem()->DirectoryExists(tmp_dir.c_str()));
-  EXPECT_FALSE(filesystem()->FileExists(tmp_file.c_str()));
-}
-
-TEST_F(IcingSearchEngineTest, GetOptimizeInfoHasCorrectStats) {
-  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
-  DocumentProto document2 = DocumentBuilder()
-                                .SetKey("namespace", "uri2")
-                                .SetSchema("Message")
-                                .AddStringProperty("body", "message body")
-                                .SetCreationTimestampMs(100)
-                                .SetTtlMs(500)
-                                .Build();
+TEST_F(IcingSearchEngineTest, GetDocumentWildCardProjectionEmpty) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
 
-  auto fake_clock = std::make_unique<FakeClock>();
-  fake_clock->SetSystemTimeMilliseconds(1000);
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
 
-  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                              std::make_unique<Filesystem>(),
-                              std::move(fake_clock));
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  // Just initialized, nothing is optimizable yet.
-  GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
-  EXPECT_THAT(optimize_info.status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
-  EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
-
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-
-  // Only have active documents, nothing is optimizable yet.
-  optimize_info = icing.GetOptimizeInfo();
-  EXPECT_THAT(optimize_info.status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
-  EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
-
-  // Deletes document1
-  ASSERT_THAT(icing.Delete("namespace", "uri1").status().code(),
-              Eq(StatusProto::OK));
-
-  optimize_info = icing.GetOptimizeInfo();
-  EXPECT_THAT(optimize_info.status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1));
-  EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0));
-  int64_t first_estimated_optimizable_bytes =
-      optimize_info.estimated_optimizable_bytes();
-
-  // Add a second document, but it'll be expired since the time (1000) is
-  // greater than the document's creation timestamp (100) + the document's ttl
-  // (500)
-  ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-
-  optimize_info = icing.GetOptimizeInfo();
-  EXPECT_THAT(optimize_info.status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2));
-  EXPECT_THAT(optimize_info.estimated_optimizable_bytes(),
-              Gt(first_estimated_optimizable_bytes));
-
-  // Optimize
-  ASSERT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
-
-  // Nothing is optimizable now that everything has been optimized away.
-  optimize_info = icing.GetOptimizeInfo();
-  EXPECT_THAT(optimize_info.status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
-  EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
-}
-
-TEST_F(IcingSearchEngineTest, GetAndPutShouldWorkAfterOptimization) {
-  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
-  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
-  DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+  GetResultSpecProto result_spec;
+  TypePropertyMask* mask = result_spec.add_type_property_masks();
+  mask->set_schema_type("*");
+  mask->add_paths("");
 
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() = document1;
-
-  {
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-
-    ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
-
-    // Validates that Get() and Put() are good right after Optimize()
-    EXPECT_THAT(icing.Get("namespace", "uri1"),
-                EqualsProto(expected_get_result_proto));
-    EXPECT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-  }  // Destroys IcingSearchEngine to make sure nothing is cached.
-
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Get("namespace", "uri1"),
-              EqualsProto(expected_get_result_proto));
-
-  *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace", "uri2"),
+  *expected_get_result_proto.mutable_document() = document;
+  expected_get_result_proto.mutable_document()->clear_properties();
+  ASSERT_THAT(icing.Get("namespace", "uri", result_spec),
               EqualsProto(expected_get_result_proto));
-
-  EXPECT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
 }
 
-TEST_F(IcingSearchEngineTest, DeleteShouldWorkAfterOptimization) {
-  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
-  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
-  {
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
-
-    // Validates that Delete() works right after Optimize()
-    EXPECT_THAT(icing.Delete("namespace", "uri1").status().code(),
-                Eq(StatusProto::OK));
-
-    GetResultProto expected_get_result_proto;
-    expected_get_result_proto.mutable_status()->set_code(
-        StatusProto::NOT_FOUND);
-    expected_get_result_proto.mutable_status()->set_message(
-        "Document (namespace, uri1) not found.");
-    EXPECT_THAT(icing.Get("namespace", "uri1"),
-                EqualsProto(expected_get_result_proto));
-
-    expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-    expected_get_result_proto.mutable_status()->clear_message();
-    *expected_get_result_proto.mutable_document() = document2;
-    EXPECT_THAT(icing.Get("namespace", "uri2"),
-                EqualsProto(expected_get_result_proto));
-  }  // Destroys IcingSearchEngine to make sure nothing is cached.
-
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Delete("namespace", "uri2").status().code(),
-              Eq(StatusProto::OK));
-
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
-  expected_get_result_proto.mutable_status()->set_message(
-      "Document (namespace, uri1) not found.");
-  EXPECT_THAT(icing.Get("namespace", "uri1"),
-              EqualsProto(expected_get_result_proto));
-
-  expected_get_result_proto.mutable_status()->set_message(
-      "Document (namespace, uri2) not found.");
-  EXPECT_THAT(icing.Get("namespace", "uri2"),
-              EqualsProto(expected_get_result_proto));
-}
+TEST_F(IcingSearchEngineTest, GetDocumentProjectionMultipleFieldPaths) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
 
-TEST_F(IcingSearchEngineTest, DeleteBySchemaType) {
-  SchemaProto schema;
-  // Add an email type
-  auto type = schema.add_types();
-  type->set_schema_type("email");
-  auto property = type->add_properties();
-  property->set_property_name("subject");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  // Add an message type
-  type = schema.add_types();
-  type->set_schema_type("message");
-  property = type->add_properties();
-  property->set_property_name("body");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  DocumentProto document1 =
-      DocumentBuilder()
-          .SetKey("namespace1", "uri1")
-          .SetSchema("message")
-          .AddStringProperty("body", "message body1")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  DocumentProto document2 =
+  // 1. Add an email document
+  DocumentProto document =
       DocumentBuilder()
-          .SetKey("namespace2", "uri2")
-          .SetSchema("email")
-          .AddStringProperty("subject", "message body2")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "shopgirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
           .Build();
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() = document1;
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
-
-  *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
 
-  // Delete the first type. The first doc should be irretrievable. The
-  // second should still be present.
-  EXPECT_THAT(icing.DeleteBySchemaType("message").status().code(),
-              Eq(StatusProto::OK));
-
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
-  expected_get_result_proto.mutable_status()->set_message(
-      "Document (namespace1, uri1) not found.");
-  expected_get_result_proto.clear_document();
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  GetResultSpecProto result_spec;
+  TypePropertyMask* mask = result_spec.add_type_property_masks();
+  mask->set_schema_type("Email");
+  mask->add_paths("sender.name");
+  mask->add_paths("subject");
 
+  // 2. Verify that the returned result only contains the 'sender.name'
+  // property and the 'subject' property.
+  GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  expected_get_result_proto.mutable_status()->clear_message();
-  *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
-              EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, DeleteByNamespace) {
-  DocumentProto document1 =
-      DocumentBuilder()
-          .SetKey("namespace1", "uri1")
-          .SetSchema("Message")
-          .AddStringProperty("body", "message body1")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  DocumentProto document2 =
+  *expected_get_result_proto.mutable_document() =
       DocumentBuilder()
-          .SetKey("namespace2", "uri2")
-          .SetSchema("Message")
-          .AddStringProperty("body", "message body2")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri1")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "Meg Ryan")
+                                   .Build())
+          .AddStringProperty("subject", "Hello World!")
           .Build();
-
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() = document1;
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
-
-  *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
+  ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
               EqualsProto(expected_get_result_proto));
-
-  // Delete the first namespace. The first doc should be irretrievable. The
-  // second should still be present.
-  EXPECT_THAT(icing.DeleteByNamespace("namespace1").status().code(),
-              Eq(StatusProto::OK));
-
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
-  expected_get_result_proto.mutable_status()->set_message(
-      "Document (namespace1, uri1) not found.");
-  expected_get_result_proto.clear_document();
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
-
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  expected_get_result_proto.mutable_status()->clear_message();
-  *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
-              EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaShouldWorkAfterOptimization) {
-  // Creates 3 test schemas
-  SchemaProto schema1 = SchemaProto(CreateMessageSchema());
-
-  SchemaProto schema2 = SchemaProto(schema1);
-  auto new_property2 = schema2.mutable_types(0)->add_properties();
-  new_property2->set_property_name("property2");
-  new_property2->set_data_type(PropertyConfigProto::DataType::STRING);
-  new_property2->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  new_property2->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::PREFIX);
-  new_property2->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-
-  SchemaProto schema3 = SchemaProto(schema2);
-  auto new_property3 = schema3.mutable_types(0)->add_properties();
-  new_property3->set_property_name("property3");
-  new_property3->set_data_type(PropertyConfigProto::DataType::STRING);
-  new_property3->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  new_property3->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::PREFIX);
-  new_property3->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-
-  {
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.SetSchema(schema1).status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
-
-    // Validates that SetSchema() works right after Optimize()
-    EXPECT_THAT(icing.SetSchema(schema2).status().code(), Eq(StatusProto::OK));
-  }  // Destroys IcingSearchEngine to make sure nothing is cached.
-
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(schema3).status().code(), Eq(StatusProto::OK));
-}
-
-TEST_F(IcingSearchEngineTest, SearchShouldWorkAfterOptimization) {
-  DocumentProto document = CreateMessageDocument("namespace", "uri");
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("m");
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document;
-
-  {
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    ASSERT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
-
-    // Validates that Search() works right after Optimize()
-    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                             ResultSpecProto::default_instance()),
-                EqualsProto(expected_search_result_proto));
-  }  // Destroys IcingSearchEngine to make sure nothing is cached.
-
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
 }
 
-TEST_F(IcingSearchEngineTest, IcingShouldWorkFineIfOptimizationIsAborted) {
-  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
-  {
-    // Initializes a normal icing to create files needed
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-  }
+TEST_F(IcingSearchEngineTest, GetDocumentWildcardProjectionMultipleFieldPaths) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
 
-  // Creates a mock filesystem in which DeleteDirectoryRecursively() always
-  // fails. This will fail IcingSearchEngine::OptimizeDocumentStore() and makes
-  // it return ABORTED_ERROR.
-  auto mock_filesystem = std::make_unique<MockFilesystem>();
-  ON_CALL(*mock_filesystem, DeleteDirectoryRecursively)
-      .WillByDefault(Return(false));
-
-  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                              std::move(mock_filesystem),
-                              std::make_unique<FakeClock>());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Optimize().status().code(), Eq(StatusProto::ABORTED));
+  // 1. Add an email document
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "shopgirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
 
-  // Now optimization is aborted, we verify that document-related functions
-  // still work as expected.
+  GetResultSpecProto result_spec;
+  TypePropertyMask* mask = result_spec.add_type_property_masks();
+  mask->set_schema_type("*");
+  mask->add_paths("sender.name");
+  mask->add_paths("subject");
 
+  // 2. Verify that the returned result only contains the 'sender.name'
+  // property and the 'subject' property.
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() = document1;
-  EXPECT_THAT(icing.Get("namespace", "uri1"),
+  *expected_get_result_proto.mutable_document() =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri1")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "Meg Ryan")
+                                   .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .Build();
+  ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
               EqualsProto(expected_get_result_proto));
-
-  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
-
-  EXPECT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_query("m");
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document2;
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document1;
-
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest,
-       OptimizationShouldRecoverIfFileDirectoriesAreMissing) {
-  // Creates a mock filesystem in which SwapFiles() always fails and deletes the
-  // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
-  auto mock_filesystem = std::make_unique<MockFilesystem>();
-  ON_CALL(*mock_filesystem, SwapFiles)
-      .WillByDefault([this](const char* one, const char* two) {
-        filesystem()->DeleteDirectoryRecursively(one);
-        filesystem()->DeleteDirectoryRecursively(two);
-        return false;
-      });
+       GetDocumentSpecificProjectionOverridesWildcardProjection) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
 
-  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                              std::move(mock_filesystem),
-                              std::make_unique<FakeClock>());
-
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-  ASSERT_THAT(
-      icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-      Eq(StatusProto::OK));
-
-  // Optimize() fails due to filesystem error
-  EXPECT_THAT(icing.Optimize().status().code(),
-              Eq(StatusProto::WARNING_DATA_LOSS));
-
-  // Document is not found because original file directory is missing
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
-  expected_get_result_proto.mutable_status()->set_message(
-      "Document (namespace, uri) not found.");
-  EXPECT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
-
-  DocumentProto new_document =
+  // 1. Add an email document
+  DocumentProto document =
       DocumentBuilder()
-          .SetKey("namespace", "uri2")
-          .SetSchema("Message")
-          .AddStringProperty("body", "new body")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "shopgirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
           .Build();
-
-  EXPECT_THAT(icing.Put(new_document).status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_query("m");
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
-  // Searching old content returns nothing because original file directory is
-  // missing
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-
-  search_spec.set_query("n");
-
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      new_document;
-
-  // Searching new content returns the new document
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) {
-  // Creates a mock filesystem in which SwapFiles() always fails and empties the
-  // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
-  auto mock_filesystem = std::make_unique<MockFilesystem>();
-  ON_CALL(*mock_filesystem, SwapFiles)
-      .WillByDefault([this](const char* one, const char* two) {
-        filesystem()->DeleteDirectoryRecursively(one);
-        filesystem()->CreateDirectoryRecursively(one);
-        filesystem()->DeleteDirectoryRecursively(two);
-        filesystem()->CreateDirectoryRecursively(two);
-        return false;
-      });
-
-  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                              std::move(mock_filesystem),
-                              std::make_unique<FakeClock>());
-
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-  ASSERT_THAT(
-      icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-      Eq(StatusProto::OK));
-
-  // Optimize() fails due to filesystem error
-  EXPECT_THAT(icing.Optimize().status().code(),
-              Eq(StatusProto::WARNING_DATA_LOSS));
-
-  // Document is not found because original files are missing
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // 2. Add type property masks for the wildcard and the specific type of the
+  // document 'Email'. The wildcard should be ignored and only the 'Email'
+  // projection should apply.
+  GetResultSpecProto result_spec;
+  TypePropertyMask* mask = result_spec.add_type_property_masks();
+  mask->set_schema_type("*");
+  mask->add_paths("subject");
+  mask = result_spec.add_type_property_masks();
+  mask->set_schema_type("Email");
+  mask->add_paths("body");
+
+  // 3. Verify that the returned result only contains the 'body' property.
   GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
-  expected_get_result_proto.mutable_status()->set_message(
-      "Document (namespace, uri) not found.");
-  EXPECT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
-
-  DocumentProto new_document =
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() =
       DocumentBuilder()
-          .SetKey("namespace", "uri2")
-          .SetSchema("Message")
-          .AddStringProperty("body", "new body")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
           .Build();
-
-  EXPECT_THAT(icing.Put(new_document).status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_query("m");
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
-  // Searching old content returns nothing because original files are missing
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-
-  search_spec.set_query("n");
-
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      new_document;
-
-  // Searching new content returns the new document
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) {
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("Message");
-
-  auto body = type->add_properties();
-  body->set_property_name("body");
-  body->set_data_type(PropertyConfigProto::DataType::STRING);
-  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-
-  DocumentProto document = DocumentBuilder()
-                               .SetKey("namespace", "uri")
-                               .SetSchema("Message")
-                               .AddStringProperty("body", "message body")
-                               .SetCreationTimestampMs(100)
-                               .SetTtlMs(500)
-                               .Build();
-
-  SearchSpecProto search_spec;
-  search_spec.set_query("message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document;
-
-  // Time just has to be less than the document's creation timestamp (100) + the
-  // document's ttl (500)
-  auto fake_clock = std::make_unique<FakeClock>();
-  fake_clock->SetSystemTimeMilliseconds(400);
-
-  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                              std::make_unique<Filesystem>(),
-                              std::move(fake_clock));
-
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
-
-  // Check that the document is returned as part of search results
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) {
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("Message");
-
-  auto body = type->add_properties();
-  body->set_property_name("body");
-  body->set_data_type(PropertyConfigProto::DataType::STRING);
-  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-
-  DocumentProto document = DocumentBuilder()
-                               .SetKey("namespace", "uri")
-                               .SetSchema("Message")
-                               .AddStringProperty("body", "message body")
-                               .SetCreationTimestampMs(100)
-                               .SetTtlMs(500)
-                               .Build();
-
-  SearchSpecProto search_spec;
-  search_spec.set_query("message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
-  // Time just has to be greater than the document's creation timestamp (100) +
-  // the document's ttl (500)
-  auto fake_clock = std::make_unique<FakeClock>();
-  fake_clock->SetSystemTimeMilliseconds(700);
-
-  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                              std::make_unique<Filesystem>(),
-                              std::move(fake_clock));
-
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
-
-  // Check that the document is not returned as part of search results
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
+  ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
+              EqualsProto(expected_get_result_proto));
 }
 
-TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("message");
-
-  auto property = type_config->add_properties();
-  property->set_property_name("body");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+TEST_F(IcingSearchEngineTest, GetDocumentProjectionPolymorphism) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Artist")
+                       .AddParentType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("company")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
 
-  DocumentProto message_document =
+  // Add a person document and an artist document
+  DocumentProto document_person =
       DocumentBuilder()
-          .SetKey("namespace", "message_uri")
-          .SetSchema("message")
-          .AddStringProperty("body", "foo")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Meg Ryan")
+          .AddStringProperty("emailAddress", "shopgirl@aol.com")
           .Build();
-
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(message_document).status().code(), Eq(StatusProto::OK));
-
-  // Make sure we can search for message document
-  SearchSpecProto search_spec;
-  search_spec.set_query("foo");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
-  // The message isn't indexed, so we get nothing
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-
-  // With just the schema type filter, we can search for the message
-  search_spec.Clear();
-  search_spec.add_schema_type_filters("message");
-
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      message_document;
-
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-
-  // Since SchemaTypeIds are assigned based on order in the SchemaProto, this
-  // will force a change in the DocumentStore's cached SchemaTypeIds
-  schema.clear_types();
-  type_config = schema.add_types();
-  type_config->set_schema_type("email");
-
-  // Adding a new indexed property will require reindexing
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
-
-  property = type_config->add_properties();
-  property->set_property_name("body");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::PREFIX);
-  property->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-
-  EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
-
-  search_spec.Clear();
-  search_spec.set_query("foo");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-  search_spec.add_schema_type_filters("message");
-
-  // We can still search for the message document
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) {
-  SearchSpecProto search_spec;
-  search_spec.set_query("message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      CreateMessageDocument("namespace", "uri");
-
+  DocumentProto document_artist =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Artist")
+          .AddStringProperty("name", "Meg Artist")
+          .AddStringProperty("emailAddress", "artist@aol.com")
+          .AddStringProperty("company", "aol")
+          .Build();
+  ASSERT_THAT(icing.Put(document_person).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document_artist).status(), ProtoIsOk());
+
+  // Add type property masks
+  GetResultSpecProto result_spec;
+  TypePropertyMask* person_type_property_mask =
+      result_spec.add_type_property_masks();
+  person_type_property_mask->set_schema_type("Person");
+  person_type_property_mask->add_paths("name");
+  // Since Artist is a child type of Person, the TypePropertyMask for Person
+  // will be merged to Artist's TypePropertyMask by polymorphism, so that 'name'
+  // will also show in Artist's projection results.
+  TypePropertyMask* artist_type_property_mask =
+      result_spec.add_type_property_masks();
+  artist_type_property_mask->set_schema_type("Artist");
+  artist_type_property_mask->add_paths("emailAddress");
+
+  // Verify that the returned person document only contains the 'name' property,
+  // and the returned artist document contain both the 'name' and 'emailAddress'
+  // properties.
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() =
-      CreateMessageDocument("namespace", "uri");
-
-  {
-    // Basic initialization/setup
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    EXPECT_THAT(
-        icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-        Eq(StatusProto::OK));
-    EXPECT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
-    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                             ResultSpecProto::default_instance()),
-                EqualsProto(expected_search_result_proto));
-  }  // This should shut down IcingSearchEngine and persist anything it needs to
-
-  EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
-
-  // We should be able to recover from this and access all our previous data
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  // Checks that DocumentLog is still ok
-  EXPECT_THAT(icing.Get("namespace", "uri"),
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Meg Ryan")
+          .Build();
+  ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
               EqualsProto(expected_get_result_proto));
 
-  // Checks that the index is still ok so we can search over it
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-
-  // Checks that Schema is still since it'll be needed to validate the document
-  EXPECT_THAT(
-      icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-      Eq(StatusProto::OK));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderMagic) {
-  SearchSpecProto search_spec;
-  search_spec.set_query("message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      CreateMessageDocument("namespace", "uri");
-
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() =
-      CreateMessageDocument("namespace", "uri");
-
-  {
-    // Basic initialization/setup
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    EXPECT_THAT(
-        icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-        Eq(StatusProto::OK));
-    EXPECT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
-    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                             ResultSpecProto::default_instance()),
-                EqualsProto(expected_search_result_proto));
-  }  // This should shut down IcingSearchEngine and persist anything it needs to
-
-  // Change the header's magic value
-  int32_t invalid_magic = 1;  // Anything that's not the actual kMagic value.
-  filesystem()->PWrite(GetHeaderFilename().c_str(),
-                       offsetof(IcingSearchEngine::Header, magic),
-                       &invalid_magic, sizeof(invalid_magic));
-
-  // We should be able to recover from this and access all our previous data
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  // Checks that DocumentLog is still ok
-  EXPECT_THAT(icing.Get("namespace", "uri"),
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Artist")
+          .AddStringProperty("name", "Meg Artist")
+          .AddStringProperty("emailAddress", "artist@aol.com")
+          .Build();
+  ASSERT_THAT(icing.Get("namespace", "uri2", result_spec),
               EqualsProto(expected_get_result_proto));
-
-  // Checks that the index is still ok so we can search over it
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-
-  // Checks that Schema is still since it'll be needed to validate the document
-  EXPECT_THAT(
-      icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-      Eq(StatusProto::OK));
 }
 
-TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderChecksum) {
-  SearchSpecProto search_spec;
-  search_spec.set_query("message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+TEST_F(IcingSearchEngineTest, GetDocumentProjectionMultipleParentPolymorphism) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("recipient")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("content")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("note")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("EmailMessage")
+                       .AddParentType("Email")
+                       .AddParentType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("recipient")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("content")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("note")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
 
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      CreateMessageDocument("namespace", "uri");
+  // Add an email document and a message document
+  DocumentProto document_email =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("sender", "sender1")
+          .AddStringProperty("recipient", "recipient1")
+          .Build();
+  DocumentProto document_message = DocumentBuilder()
+                                       .SetKey("namespace", "uri2")
+                                       .SetCreationTimestampMs(1000)
+                                       .SetSchema("Message")
+                                       .AddStringProperty("content", "content1")
+                                       .AddStringProperty("note", "note1")
+                                       .Build();
+  // Add an emailMessage document
+  DocumentProto document_email_message =
+      DocumentBuilder()
+          .SetKey("namespace", "uri3")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("EmailMessage")
+          .AddStringProperty("sender", "sender2")
+          .AddStringProperty("recipient", "recipient2")
+          .AddStringProperty("content", "content2")
+          .AddStringProperty("note", "note2")
+          .Build();
 
+  ASSERT_THAT(icing.Put(document_email).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document_message).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document_email_message).status(), ProtoIsOk());
+
+  // Add type property masks for Email and Message, and both of them will apply
+  // to EmailMessage.
+  GetResultSpecProto result_spec;
+  TypePropertyMask* email_type_property_mask =
+      result_spec.add_type_property_masks();
+  email_type_property_mask->set_schema_type("Email");
+  email_type_property_mask->add_paths("sender");
+
+  TypePropertyMask* message_type_property_mask =
+      result_spec.add_type_property_masks();
+  message_type_property_mask->set_schema_type("Message");
+  message_type_property_mask->add_paths("content");
+
+  // Verify that
+  // - The returned email document only contains the 'sender' property.
+  // - The returned message document only contains the 'content' property.
+  // - The returned email message document contains both the 'sender' and
+  //   'content' properties,
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() =
-      CreateMessageDocument("namespace", "uri");
-
-  {
-    // Basic initialization/setup
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    EXPECT_THAT(
-        icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-        Eq(StatusProto::OK));
-    EXPECT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
-    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                             ResultSpecProto::default_instance()),
-                EqualsProto(expected_search_result_proto));
-  }  // This should shut down IcingSearchEngine and persist anything it needs to
-
-  // Change the header's checksum value
-  uint32_t invalid_checksum =
-      1;  // Anything that's not the actual checksum value
-  filesystem()->PWrite(GetHeaderFilename().c_str(),
-                       offsetof(IcingSearchEngine::Header, checksum),
-                       &invalid_checksum, sizeof(invalid_checksum));
-
-  // We should be able to recover from this and access all our previous data
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  // Checks that DocumentLog is still ok
-  EXPECT_THAT(icing.Get("namespace", "uri"),
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("sender", "sender1")
+          .Build();
+  ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
               EqualsProto(expected_get_result_proto));
 
-  // Checks that the index is still ok so we can search over it
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-
-  // Checks that Schema is still since it'll be needed to validate the document
-  EXPECT_THAT(
-      icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-      Eq(StatusProto::OK));
-}
-
-TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptSchema) {
-  {
-    // Basic initialization/setup
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    EXPECT_THAT(
-        icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-        Eq(StatusProto::OK));
-
-    GetResultProto expected_get_result_proto;
-    expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-    *expected_get_result_proto.mutable_document() =
-        CreateMessageDocument("namespace", "uri");
-
-    EXPECT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
-  }  // This should shut down IcingSearchEngine and persist anything it needs to
-
-  const std::string schema_file =
-      absl_ports::StrCat(GetSchemaDir(), "/schema.pb");
-  const std::string corrupt_data = "1234";
-  EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(),
-                                  corrupt_data.size()));
-
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::INTERNAL));
-}
-
-TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptDocumentLog) {
-  {
-    // Basic initialization/setup
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    EXPECT_THAT(
-        icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-        Eq(StatusProto::OK));
-
-    GetResultProto expected_get_result_proto;
-    expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-    *expected_get_result_proto.mutable_document() =
-        CreateMessageDocument("namespace", "uri");
-
-    EXPECT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
-  }  // This should shut down IcingSearchEngine and persist anything it needs to
-
-  const std::string document_log_file =
-      absl_ports::StrCat(GetDocumentDir(), "/document_log");
-  const std::string corrupt_data = "1234";
-  EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
-                                  corrupt_data.data(), corrupt_data.size()));
-
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::INTERNAL));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) {
-  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
-  DocumentProto document2_with_additional_property =
+  *expected_get_result_proto.mutable_document() =
       DocumentBuilder()
           .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
           .SetSchema("Message")
-          .AddStringProperty("additional", "content")
-          .AddStringProperty("body", "message body")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .AddStringProperty("content", "content1")
           .Build();
+  ASSERT_THAT(icing.Get("namespace", "uri2", result_spec),
+              EqualsProto(expected_get_result_proto));
 
-  {
-    // Initializes folder and schema
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-    SchemaProto schema;
-    auto type = schema.add_types();
-    type->set_schema_type("Message");
-
-    auto property = type->add_properties();
-    property->set_property_name("body");
-    property->set_data_type(PropertyConfigProto::DataType::STRING);
-    property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-    property->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::PREFIX);
-    property->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
-
-    property = type->add_properties();
-    property->set_property_name("additional");
-    property->set_data_type(PropertyConfigProto::DataType::STRING);
-    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-    EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
-    EXPECT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-    EXPECT_THAT(icing.Put(document2_with_additional_property).status().code(),
-                Eq(StatusProto::OK));
-
-    // Won't get us anything because "additional" isn't marked as an indexed
-    // property in the schema
-    SearchSpecProto search_spec;
-    search_spec.set_query("additional:content");
-    search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
-    SearchResultProto expected_search_result_proto;
-    expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                             ResultSpecProto::default_instance()),
-                EqualsProto(expected_search_result_proto));
-  }  // This should shut down IcingSearchEngine and persist anything it needs to
-
-  {
-    // This schema will change the SchemaTypeIds from the previous schema_
-    // (since SchemaTypeIds are assigned based on order of the types, and this
-    // new schema changes the ordering of previous types)
-    SchemaProto new_schema;
-    auto type = new_schema.add_types();
-    type->set_schema_type("Email");
-
-    type = new_schema.add_types();
-    type->set_schema_type("Message");
-
-    // Adding a new property changes the SectionIds (since SectionIds are
-    // assigned based on alphabetical order of indexed sections, marking
-    // "additional" as an indexed property will push the "body" property to a
-    // different SectionId)
-    auto property = type->add_properties();
-    property->set_property_name("body");
-    property->set_data_type(PropertyConfigProto::DataType::STRING);
-    property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-    property->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::PREFIX);
-    property->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
-
-    property = type->add_properties();
-    property->set_property_name("additional");
-    property->set_data_type(PropertyConfigProto::DataType::STRING);
-    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    property->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::PREFIX);
-    property->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
-
-    ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<SchemaStore> schema_store,
-        SchemaStore::Create(filesystem(), GetSchemaDir()));
-    ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
-  }  // Will persist new schema
-
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  // We can insert a Email document since we kept the new schema
-  DocumentProto email_document =
+  *expected_get_result_proto.mutable_document() =
       DocumentBuilder()
-          .SetKey("namespace", "email_uri")
-          .SetSchema("Email")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .SetKey("namespace", "uri3")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("EmailMessage")
+          .AddStringProperty("sender", "sender2")
+          .AddStringProperty("content", "content2")
           .Build();
-  EXPECT_THAT(icing.Put(email_document).status().code(), Eq(StatusProto::OK));
-
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() = email_document;
-
-  EXPECT_THAT(icing.Get("namespace", "email_uri"),
+  ASSERT_THAT(icing.Get("namespace", "uri3", result_spec),
               EqualsProto(expected_get_result_proto));
-
-  SearchSpecProto search_spec;
-
-  // The section restrict will ensure we are using the correct, updated
-  // SectionId in the Index
-  search_spec.set_query("additional:content");
-
-  // Schema type filter will ensure we're using the correct, updated
-  // SchemaTypeId in the DocumentStore
-  search_spec.add_schema_type_filters("Message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document2_with_additional_property;
-
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
 }
 
-TEST_F(IcingSearchEngineTest, RecoverFromInconsistentDocumentStore) {
-  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
-  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+TEST_F(IcingSearchEngineTest, GetDocumentProjectionDiamondPolymorphism) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // Create a schema with a diamond inheritance relation.
+  //         Object
+  //      /          \
+  //   Email       Message
+  //       \         /
+  //      EmailMessage
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Object").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("objectId")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Email")
+                       .AddParentType("Object")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("objectId")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("recipient")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Message")
+                       .AddParentType("Object")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("objectId")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("content")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("note")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("EmailMessage")
+                       .AddParentType("Email")
+                       .AddParentType("Message")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("objectId")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("recipient")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("content")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("note")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
 
-  {
-    // Initializes folder and schema, index one document
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    EXPECT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-  }  // This should shut down IcingSearchEngine and persist anything it needs to
+  // Add an email document and a message document
+  DocumentProto document_email =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("objectId", "object1")
+          .AddStringProperty("sender", "sender1")
+          .AddStringProperty("recipient", "recipient1")
+          .Build();
+  DocumentProto document_message = DocumentBuilder()
+                                       .SetKey("namespace", "uri2")
+                                       .SetCreationTimestampMs(1000)
+                                       .SetSchema("Message")
+                                       .AddStringProperty("objectId", "object2")
+                                       .AddStringProperty("content", "content1")
+                                       .AddStringProperty("note", "note1")
+                                       .Build();
+  // Add an emailMessage document
+  DocumentProto document_email_message =
+      DocumentBuilder()
+          .SetKey("namespace", "uri3")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("EmailMessage")
+          .AddStringProperty("objectId", "object3")
+          .AddStringProperty("sender", "sender2")
+          .AddStringProperty("recipient", "recipient2")
+          .AddStringProperty("content", "content2")
+          .AddStringProperty("note", "note2")
+          .Build();
 
-  {
-    ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<SchemaStore> schema_store,
-        SchemaStore::Create(filesystem(), GetSchemaDir()));
-    ICING_EXPECT_OK(schema_store->SetSchema(CreateMessageSchema()));
-
-    // Puts a second document into DocumentStore but doesn't index it.
-    FakeClock fake_clock;
-    ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<DocumentStore> document_store,
-        DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
-                              schema_store.get()));
-    ICING_EXPECT_OK(document_store->Put(document2));
-  }
+  ASSERT_THAT(icing.Put(document_email).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document_message).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document_email_message).status(), ProtoIsOk());
 
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  // Index Restoration should be triggered here and document2 should be
-  // indexed.
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+  // Add type property masks for Object, which should apply to Email, Message
+  // and EmailMessage.
+  GetResultSpecProto result_spec;
+  TypePropertyMask* email_type_property_mask =
+      result_spec.add_type_property_masks();
+  email_type_property_mask->set_schema_type("Object");
+  email_type_property_mask->add_paths("objectId");
 
+  // Verify that all the documents only contain the 'objectId' property.
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() = document1;
-
-  // DocumentStore kept the additional document
-  EXPECT_THAT(icing.Get("namespace", "uri1"),
+  *expected_get_result_proto.mutable_document() =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("objectId", "object1")
+          .Build();
+  ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
               EqualsProto(expected_get_result_proto));
 
-  *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace", "uri2"),
+  *expected_get_result_proto.mutable_document() =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Message")
+          .AddStringProperty("objectId", "object2")
+          .Build();
+  ASSERT_THAT(icing.Get("namespace", "uri2", result_spec),
               EqualsProto(expected_get_result_proto));
 
-  // We indexed the additional document
-  SearchSpecProto search_spec;
-  search_spec.set_query("message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document2;
-
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document1;
-
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) {
-  SearchSpecProto search_spec;
-  search_spec.set_query("message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      CreateMessageDocument("namespace", "uri");
-
-  {
-    // Initializes folder and schema, index one document
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    EXPECT_THAT(
-        icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-        Eq(StatusProto::OK));
-    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                             ResultSpecProto::default_instance()),
-                EqualsProto(expected_search_result_proto));
-  }  // This should shut down IcingSearchEngine and persist anything it needs to
-
-  // Pretend we lost the entire index
-  EXPECT_TRUE(filesystem()->DeleteDirectoryRecursively(
-      absl_ports::StrCat(GetIndexDir(), "/idx/lite.").c_str()));
-
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  // Check that our index is ok by searching over the restored index
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) {
-  SearchSpecProto search_spec;
-  search_spec.set_query("message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      CreateMessageDocument("namespace", "uri");
-
-  {
-    // Initializes folder and schema, index one document
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    EXPECT_THAT(
-        icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-        Eq(StatusProto::OK));
-    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                             ResultSpecProto::default_instance()),
-                EqualsProto(expected_search_result_proto));
-  }  // This should shut down IcingSearchEngine and persist anything it needs to
-
-  // Pretend index is corrupted
-  const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb";
-  ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str()));
-  ASSERT_TRUE(fd.is_valid());
-  ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
-
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-
-  // Check that our index is ok by searching over the restored index
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
+  *expected_get_result_proto.mutable_document() =
+      DocumentBuilder()
+          .SetKey("namespace", "uri3")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("EmailMessage")
+          .AddStringProperty("objectId", "object3")
+          .Build();
+  ASSERT_THAT(icing.Get("namespace", "uri3", result_spec),
+              EqualsProto(expected_get_result_proto));
 }
 
-TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
+TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
 
-  // Creates 3 documents and ensures the relationship in terms of document
-  // score is: document1 < document2 < document3
+  // Creates 3 test documents
   DocumentProto document1 =
       DocumentBuilder()
           .SetKey("namespace", "uri/1")
           .SetSchema("Message")
           .AddStringProperty("body", "message1")
-          .SetScore(1)
           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
           .Build();
   DocumentProto document2 =
@@ -2588,354 +848,274 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) {
           .SetKey("namespace", "uri/2")
           .SetSchema("Message")
           .AddStringProperty("body", "message2")
-          .SetScore(2)
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  DocumentProto document3 =
-      DocumentBuilder()
-          .SetKey("namespace", "uri/3")
-          .SetSchema("Message")
-          .AddStringProperty("body", "message3")
-          .SetScore(3)
           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
           .Build();
 
-  // Intentionally inserts the documents in the order that is different than
-  // their score order
-  ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-
-  // "m" will match all 3 documents
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  // Report usage for doc1 and doc2. The older timestamp 5000 shouldn't be
+  // overridden by 1000. The order will be doc1 > doc2 when ranked by
+  // USAGE_TYPE1_LAST_USED_TIMESTAMP.
+  UsageReport usage_report_doc1_time1 = CreateUsageReport(
+      /*name_space=*/"namespace", /*uri=*/"uri/1", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE1);
+  UsageReport usage_report_doc1_time5 = CreateUsageReport(
+      /*name_space=*/"namespace", /*uri=*/"uri/1", /*timestamp_ms=*/5000,
+      UsageReport::USAGE_TYPE1);
+  UsageReport usage_report_doc2_time3 = CreateUsageReport(
+      /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/3000,
+      UsageReport::USAGE_TYPE1);
+  ASSERT_THAT(icing.ReportUsage(usage_report_doc1_time5).status(), ProtoIsOk());
+  ASSERT_THAT(icing.ReportUsage(usage_report_doc2_time3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.ReportUsage(usage_report_doc1_time1).status(), ProtoIsOk());
+
+  // "m" will match both documents
   SearchSpecProto search_spec;
   search_spec.set_term_match_type(TermMatchType::PREFIX);
   search_spec.set_query("m");
 
-  // Result should be in descending score order
+  // Result should be in descending USAGE_TYPE1_LAST_USED_TIMESTAMP order
   SearchResultProto expected_search_result_proto;
   expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document3;
+      document1;
   *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
       document2;
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document1;
 
-  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
-  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-  EXPECT_THAT(icing.Search(search_spec, scoring_spec,
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
-TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  // Creates 3 documents and ensures the relationship of them is:
-  // document1 < document2 < document3
-  DocumentProto document1 = DocumentBuilder()
-                                .SetKey("namespace", "uri/1")
-                                .SetSchema("Message")
-                                .AddStringProperty("body", "message1")
-                                .SetScore(1)
-                                .SetCreationTimestampMs(1571111111111)
-                                .Build();
-  DocumentProto document2 = DocumentBuilder()
-                                .SetKey("namespace", "uri/2")
-                                .SetSchema("Message")
-                                .AddStringProperty("body", "message2")
-                                .SetScore(2)
-                                .SetCreationTimestampMs(1572222222222)
-                                .Build();
-  DocumentProto document3 = DocumentBuilder()
-                                .SetKey("namespace", "uri/3")
-                                .SetSchema("Message")
-                                .AddStringProperty("body", "message3")
-                                .SetScore(3)
-                                .SetCreationTimestampMs(1573333333333)
-                                .Build();
-
-  // Intentionally inserts the documents in the order that is different than
-  // their score order
-  ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-
-  // "m" will match all 3 documents
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("m");
+TEST_F(IcingSearchEngineTest, ImplicitPersistToDiskFullSavesEverything) {
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }  // Destructing calls a PersistToDisk(FULL)
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
+  // There should be no recovery since everything should be saved properly.
+  InitializeResultProto init_result = icing.Initialize();
+  EXPECT_THAT(init_result.status(), ProtoIsOk());
+  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+              Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+
+  // Schema is still intact.
+  GetSchemaResultProto expected_get_schema_result_proto;
+  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
 
-  SearchResultProto expected_search_result_proto;
-  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document2;
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document1;
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document3;
-
-  // Results should not be ranked by score but returned in reverse insertion
-  // order.
-  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
-  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
-  EXPECT_THAT(icing.Search(search_spec, scoring_spec,
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
-}
+  EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
 
-TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  // Creates 3 documents and ensures the relationship in terms of creation
-  // timestamp score is: document1 < document2 < document3
-  DocumentProto document1 = DocumentBuilder()
-                                .SetKey("namespace", "uri/1")
-                                .SetSchema("Message")
-                                .AddStringProperty("body", "message1")
-                                .SetCreationTimestampMs(1571111111111)
-                                .Build();
-  DocumentProto document2 = DocumentBuilder()
-                                .SetKey("namespace", "uri/2")
-                                .SetSchema("Message")
-                                .AddStringProperty("body", "message2")
-                                .SetCreationTimestampMs(1572222222222)
-                                .Build();
-  DocumentProto document3 = DocumentBuilder()
-                                .SetKey("namespace", "uri/3")
-                                .SetSchema("Message")
-                                .AddStringProperty("body", "message3")
-                                .SetCreationTimestampMs(1573333333333)
-                                .Build();
-
-  // Intentionally inserts the documents in the order that is different than
-  // their score order
-  ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-
-  // "m" will match all 3 documents
+  // Documents are still intact.
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document;
+
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Index is still intact.
   SearchSpecProto search_spec;
   search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("m");
+  search_spec.set_query("message");  // Content in the Message document.
 
-  // Result should be in descending timestamp order
   SearchResultProto expected_search_result_proto;
   expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document3;
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document2;
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document1;
+      document;
 
-  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
-  scoring_spec.set_rank_by(
-      ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
-  EXPECT_THAT(icing.Search(search_spec, scoring_spec,
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
 }
 
-TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
+TEST_F(IcingSearchEngineTest, ExplicitPersistToDiskFullSavesEverything) {
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
 
-  // Creates 3 documents and ensures the relationship in terms of document
-  // score is: document1 < document2 < document3
-  DocumentProto document1 =
-      DocumentBuilder()
-          .SetKey("namespace", "uri/1")
-          .SetSchema("Message")
-          .AddStringProperty("body", "message1")
-          .SetScore(1)
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  DocumentProto document2 =
-      DocumentBuilder()
-          .SetKey("namespace", "uri/2")
-          .SetSchema("Message")
-          .AddStringProperty("body", "message2")
-          .SetScore(2)
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  DocumentProto document3 =
-      DocumentBuilder()
-          .SetKey("namespace", "uri/3")
-          .SetSchema("Message")
-          .AddStringProperty("body", "message3")
-          .SetScore(3)
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
+  // Add schema and documents to our first icing1 instance.
+  IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk());
+  EXPECT_THAT(icing1.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+  // Initialize a second icing2 instance which should have it's own memory
+  // space. If data from icing1 isn't being persisted to the files, then icing2
+  // won't be able to see those changes.
+  IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
+
+  // There should be no recovery since everything should be saved properly.
+  InitializeResultProto init_result = icing2.Initialize();
+  EXPECT_THAT(init_result.status(), ProtoIsOk());
+  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+              Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+
+  // Schema is still intact.
+  GetSchemaResultProto expected_get_schema_result_proto;
+  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
 
-  // Intentionally inserts the documents in the order that is different than
-  // their score order
-  ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
+  EXPECT_THAT(icing2.GetSchema(),
+              EqualsProto(expected_get_schema_result_proto));
 
-  // "m" will match all 3 documents
+  // Documents are still intact.
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document;
+
+  EXPECT_THAT(
+      icing2.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Index is still intact.
   SearchSpecProto search_spec;
   search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("m");
+  search_spec.set_query("message");  // Content in the Message document.
 
-  // Result should be in ascending score order
   SearchResultProto expected_search_result_proto;
   expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document1;
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document2;
-  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-      document3;
+      document;
 
-  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
-  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-  scoring_spec.set_order_by(ScoringSpecProto::Order::ASC);
-  EXPECT_THAT(icing.Search(search_spec, scoring_spec,
-                           ResultSpecProto::default_instance()),
-              EqualsProto(expected_search_result_proto));
+  SearchResultProto actual_results =
+      icing2.Search(search_spec, GetDefaultScoringSpec(),
+                    ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
 }
 
-TEST_F(IcingSearchEngineTest,
-       SetSchemaCanNotDetectPreviousSchemaWasLostWithoutDocuments) {
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("Message");
+TEST_F(IcingSearchEngineTest, NoPersistToDiskLosesAllDocumentsAndIndex) {
+  IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
+  EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk());
+  EXPECT_THAT(
+      icing1.Get("namespace", "uri", GetResultSpecProto::default_instance())
+          .document(),
+      EqualsProto(document));
+
+  // It's intentional that no PersistToDisk call is made before initializing a
+  // second instance of icing.
+
+  IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
+  InitializeResultProto init_result = icing2.Initialize();
+  EXPECT_THAT(init_result.status(), ProtoIsOk());
+  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+              Eq(InitializeStatsProto::PARTIAL_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+              Eq(InitializeStatsProto::DATA_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+
+  // The document shouldn't be found because we forgot to call
+  // PersistToDisk(LITE)!
+  EXPECT_THAT(
+      icing2.Get("namespace", "uri", GetResultSpecProto::default_instance())
+          .status(),
+      ProtoStatusIs(StatusProto::NOT_FOUND));
 
-  auto body = type->add_properties();
-  body->set_property_name("body");
-  body->set_data_type(PropertyConfigProto::DataType::STRING);
-  body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  // Searching also shouldn't get us anything because the index wasn't
+  // recovered.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");  // Content in the Message document.
 
-  // Make an incompatible schema, a previously OPTIONAL field is REQUIRED
-  SchemaProto incompatible_schema = schema;
-  incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
-      PropertyConfigProto::Cardinality::REQUIRED);
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
 
-  {
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
-  }  // This should shut down IcingSearchEngine and persist anything it needs to
-
-  ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
-
-  // Since we don't have any documents yet, we can't detect this edge-case.  But
-  // it should be fine since there aren't any documents to be invalidated.
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(incompatible_schema).status().code(),
-              Eq(StatusProto::OK));
+  SearchResultProto actual_results =
+      icing2.Search(search_spec, GetDefaultScoringSpec(),
+                    ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
 }
 
-TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) {
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("Message");
-
-  auto body = type->add_properties();
-  body->set_property_name("body");
-  body->set_data_type(PropertyConfigProto::DataType::STRING);
-  body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+TEST_F(IcingSearchEngineTest, PersistToDiskLiteSavesGroundTruth) {
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
 
-  // Make an incompatible schema, a previously OPTIONAL field is REQUIRED
-  SchemaProto incompatible_schema = schema;
-  incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
-      PropertyConfigProto::Cardinality::REQUIRED);
+  IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk());
+  EXPECT_THAT(icing1.PersistToDisk(PersistType::LITE).status(), ProtoIsOk());
+  EXPECT_THAT(
+      icing1.Get("namespace", "uri", GetResultSpecProto::default_instance())
+          .document(),
+      EqualsProto(document));
+
+  IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
+  InitializeResultProto init_result = icing2.Initialize();
+  EXPECT_THAT(init_result.status(), ProtoIsOk());
+  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+              Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+
+  // A checksum mismatch gets reported as an IO error. The document store and
+  // index didn't have their derived files included in the checksum previously,
+  // so reinitializing will trigger a checksum mismatch.
+  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+              Eq(InitializeStatsProto::IO_ERROR));
+  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::IO_ERROR));
+
+  // Schema is still intact.
+  GetSchemaResultProto expected_get_schema_result_proto;
+  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
 
-  SearchSpecProto search_spec;
-  search_spec.set_query("message");
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(icing2.GetSchema(),
+              EqualsProto(expected_get_schema_result_proto));
 
-  {
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
-
-    DocumentProto document = CreateMessageDocument("namespace", "uri");
-    ASSERT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
-
-    // Can retrieve by namespace/uri
-    GetResultProto expected_get_result_proto;
-    expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-    *expected_get_result_proto.mutable_document() = document;
-
-    ASSERT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
-
-    // Can search for it
-    SearchResultProto expected_search_result_proto;
-    expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-    *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
-        CreateMessageDocument("namespace", "uri");
-    ASSERT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                             ResultSpecProto::default_instance()),
-                EqualsProto(expected_search_result_proto));
-  }  // This should shut down IcingSearchEngine and persist anything it needs to
-
-  ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
-
-  // Setting the new, different schema will remove incompatible documents
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.SetSchema(incompatible_schema).status().code(),
-              Eq(StatusProto::OK));
-
-  // Can't retrieve by namespace/uri
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
-  expected_get_result_proto.mutable_status()->set_message(
-      "Document (namespace, uri) not found.");
-
-  EXPECT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
+  // The document should be found because we called PersistToDisk(LITE)!
+  EXPECT_THAT(
+      icing2.Get("namespace", "uri", GetResultSpecProto::default_instance())
+          .document(),
+      EqualsProto(document));
 
-  // Can't search for it
-  SearchResultProto empty_result;
-  empty_result.mutable_status()->set_code(StatusProto::OK);
-  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
-                           ResultSpecProto::default_instance()),
-              EqualsProto(empty_result));
-}
+  // Recovered index is still intact.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");  // Content in the Message document.
 
-TEST_F(IcingSearchEngineTest, PersistToDisk) {
-  GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-  *expected_get_result_proto.mutable_document() =
-      CreateMessageDocument("namespace", "uri");
+  SearchResultProto expected_search_result_proto;
+  expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+      document;
 
-  {
-    IcingSearchEngine icing(GetDefaultIcingOptions());
-    EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
-    EXPECT_THAT(
-        icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-        Eq(StatusProto::OK));
-
-    // Persisting shouldn't affect anything
-    EXPECT_THAT(icing.PersistToDisk().status().code(), Eq(StatusProto::OK));
-
-    EXPECT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
-  }  // Destructing persists as well
-
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
+  SearchResultProto actual_results =
+      icing2.Search(search_spec, GetDefaultScoringSpec(),
+                    ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+                                  expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, ResetOk) {
@@ -2943,22 +1123,21 @@ TEST_F(IcingSearchEngineTest, ResetOk) {
   SchemaProto empty_schema = SchemaProto(message_schema);
   empty_schema.clear_types();
 
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(message_schema).status().code(),
-              Eq(StatusProto::OK));
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(message_schema).status(), ProtoIsOk());
 
   int64_t empty_state_size =
       filesystem()->GetFileDiskUsage(GetTestBaseDir().c_str());
 
   DocumentProto document = CreateMessageDocument("namespace", "uri");
-  ASSERT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
 
   // Check that things have been added
   EXPECT_THAT(filesystem()->GetDiskUsage(GetTestBaseDir().c_str()),
               Gt(empty_state_size));
 
-  EXPECT_THAT(icing.Reset().status().code(), Eq(StatusProto::OK));
+  EXPECT_THAT(icing.Reset().status(), ProtoIsOk());
 
   // Check that we're back to an empty state
   EXPECT_EQ(filesystem()->GetFileDiskUsage(GetTestBaseDir().c_str()),
@@ -2967,297 +1146,38 @@ TEST_F(IcingSearchEngineTest, ResetOk) {
   // Sanity check that we can still call other APIs. If things aren't cleared,
   // then this should raise an error since the empty schema is incompatible with
   // the old message_schema.
-  EXPECT_THAT(icing.SetSchema(empty_schema).status().code(),
-              Eq(StatusProto::OK));
+  EXPECT_THAT(icing.SetSchema(empty_schema).status(), ProtoIsOk());
 }
 
-TEST_F(IcingSearchEngineTest, ResetAbortedError) {
+TEST_F(IcingSearchEngineTest, ResetDeleteFailureCausesInternalError) {
   auto mock_filesystem = std::make_unique<MockFilesystem>();
 
-  // This fails IcingSearchEngine::Reset(). But since we didn't actually delete
-  // anything, we'll be able to consider this just an ABORTED call.
+  // This fails IcingSearchEngine::Reset() with status code INTERNAL and leaves
+  // the IcingSearchEngine instance in an uninitialized state.
   ON_CALL(*mock_filesystem,
           DeleteDirectoryRecursively(StrEq(GetTestBaseDir().c_str())))
       .WillByDefault(Return(false));
 
   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
                               std::move(mock_filesystem),
-                              std::make_unique<FakeClock>());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
 
   DocumentProto document = CreateMessageDocument("namespace", "uri");
-  ASSERT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Reset().status().code(), Eq(StatusProto::ABORTED));
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Reset().status(), ProtoStatusIs(StatusProto::INTERNAL));
 
-  // Everything is still intact.
-  // Can get old data.
   GetResultProto expected_get_result_proto;
-  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  expected_get_result_proto.mutable_status()->set_code(
+      StatusProto::FAILED_PRECONDITION);
   *expected_get_result_proto.mutable_document() = document;
-  EXPECT_THAT(icing.Get(document.namespace_(), document.uri()),
-              EqualsProto(expected_get_result_proto));
-
-  // Can add new data.
-  EXPECT_THAT(
-      icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
-      Eq(StatusProto::OK));
-}
-
-TEST_F(IcingSearchEngineTest, ResetInternalError) {
-  auto mock_filesystem = std::make_unique<MockFilesystem>();
-
-  // Let all other calls succeed.
-  EXPECT_CALL(*mock_filesystem, Write(Matcher<const char*>(_), _, _))
-      .WillRepeatedly(Return(true));
-
-  // This prevents IcingSearchEngine from creating a DocumentStore instance on
-  // reinitialization
-  const std::string document_log_path =
-      GetTestBaseDir() + "/document_dir/document_log";
-  EXPECT_CALL(
-      *mock_filesystem,
-      Write(Matcher<const char*>(StrEq(document_log_path.c_str())), _, _))
-      .WillOnce(Return(true))
-      .WillOnce(Return(false));
-
-  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
-                              std::move(mock_filesystem),
-                              std::make_unique<FakeClock>());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-  EXPECT_THAT(icing.Reset().status().code(), Eq(StatusProto::INTERNAL));
-}
-
-TEST_F(IcingSearchEngineTest, SnippetNormalization) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  DocumentProto document_one =
-      DocumentBuilder()
-          .SetKey("namespace", "uri1")
-          .SetSchema("Message")
-          .AddStringProperty("body", "MDI zurich Team Meeting")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
-
-  DocumentProto document_two =
-      DocumentBuilder()
-          .SetKey("namespace", "uri2")
-          .SetSchema("Message")
-          .AddStringProperty("body", "mdi Zürich Team Meeting")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  ASSERT_THAT(icing.Put(document_two).status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-  search_spec.set_query("mdi Zürich");
-
-  ResultSpecProto result_spec;
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
-  result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
-  result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
-
-  SearchResultProto results =
-      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
-  EXPECT_THAT(results.status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(results.results(), SizeIs(2));
-  const DocumentProto& result_document_1 = results.results(0).document();
-  const SnippetProto& result_snippet_1 = results.results(0).snippet();
-  EXPECT_THAT(result_document_1, EqualsProto(document_two));
-  EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
-                       /*snippet_index=*/0),
-              Eq("mdi"));
-  EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
-                        /*snippet_index=*/0),
-              Eq("mdi Zürich Team Meeting"));
-  EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
-                       /*snippet_index=*/1),
-              Eq("Zürich"));
-  EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
-                        /*snippet_index=*/1),
-              Eq("mdi Zürich Team Meeting"));
-
-  const DocumentProto& result_document_2 = results.results(1).document();
-  const SnippetProto& result_snippet_2 = results.results(1).snippet();
-  EXPECT_THAT(result_document_2, EqualsProto(document_one));
-  EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
-                       /*snippet_index=*/0),
-              Eq("MDI"));
-  EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
-                        /*snippet_index=*/0),
-              Eq("MDI zurich Team Meeting"));
-  EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
-                       /*snippet_index=*/1),
-              Eq("zurich"));
-  EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
-                        /*snippet_index=*/1),
-              Eq("MDI zurich Team Meeting"));
-}
-
-TEST_F(IcingSearchEngineTest, SnippetNormalizationPrefix) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  DocumentProto document_one =
-      DocumentBuilder()
-          .SetKey("namespace", "uri1")
-          .SetSchema("Message")
-          .AddStringProperty("body", "MDI zurich Team Meeting")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
-
-  DocumentProto document_two =
-      DocumentBuilder()
-          .SetKey("namespace", "uri2")
-          .SetSchema("Message")
-          .AddStringProperty("body", "mdi Zürich Team Meeting")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  ASSERT_THAT(icing.Put(document_two).status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("md Zür");
-
-  ResultSpecProto result_spec;
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
-  result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
-  result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
-
-  SearchResultProto results =
-      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
-  EXPECT_THAT(results.status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(results.results(), SizeIs(2));
-  const DocumentProto& result_document_1 = results.results(0).document();
-  const SnippetProto& result_snippet_1 = results.results(0).snippet();
-  EXPECT_THAT(result_document_1, EqualsProto(document_two));
-  EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
-                       /*snippet_index=*/0),
-              Eq("mdi"));
-  EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
-                        /*snippet_index=*/0),
-              Eq("mdi Zürich Team Meeting"));
-  EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
-                       /*snippet_index=*/1),
-              Eq("Zürich"));
-  EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
-                        /*snippet_index=*/1),
-              Eq("mdi Zürich Team Meeting"));
-
-  const DocumentProto& result_document_2 = results.results(1).document();
-  const SnippetProto& result_snippet_2 = results.results(1).snippet();
-  EXPECT_THAT(result_document_2, EqualsProto(document_one));
-  EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
-                       /*snippet_index=*/0),
-              Eq("MDI"));
-  EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
-                        /*snippet_index=*/0),
-              Eq("MDI zurich Team Meeting"));
-  EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
-                       /*snippet_index=*/1),
-              Eq("zurich"));
-  EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
-                        /*snippet_index=*/1),
-              Eq("MDI zurich Team Meeting"));
-}
-
-TEST_F(IcingSearchEngineTest, SnippetSectionRestrict) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-  ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status().code(),
-              Eq(StatusProto::OK));
-
-  DocumentProto document_one =
-      DocumentBuilder()
-          .SetKey("namespace", "uri1")
-          .SetSchema("Email")
-          .AddStringProperty("subject", "MDI zurich Team Meeting")
-          .AddStringProperty("body", "MDI zurich Team Meeting")
-          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
-          .Build();
-  ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
-
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::PREFIX);
-  search_spec.set_query("body:Zür");
-
-  ResultSpecProto result_spec;
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
-  result_spec.mutable_snippet_spec()->set_num_matches_per_property(10);
-  result_spec.mutable_snippet_spec()->set_num_to_snippet(10);
-
-  SearchResultProto results =
-      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
-  EXPECT_THAT(results.status().code(), Eq(StatusProto::OK));
-  ASSERT_THAT(results.results(), SizeIs(1));
-  const DocumentProto& result_document = results.results(0).document();
-  const SnippetProto& result_snippet = results.results(0).snippet();
-  EXPECT_THAT(result_document, EqualsProto(document_one));
-  EXPECT_THAT(
-      GetMatch(result_document, result_snippet, "body", /*snippet_index=*/0),
-      Eq("zurich"));
-  EXPECT_THAT(
-      GetWindow(result_document, result_snippet, "body", /*snippet_index=*/0),
-      Eq("MDI zurich Team Meeting"));
-  EXPECT_THAT(
-      GetMatch(result_document, result_snippet, "subject", /*snippet_index=*/0),
-      IsEmpty());
-  EXPECT_THAT(GetWindow(result_document, result_snippet, "subject",
-                        /*snippet_index=*/0),
-              IsEmpty());
-}
-
-TEST_F(IcingSearchEngineTest, UninitializedInstanceFailsSafely) {
-  IcingSearchEngine icing(GetDefaultIcingOptions());
-
-  SchemaProto email_schema = CreateMessageSchema();
-  EXPECT_THAT(icing.SetSchema(email_schema).status().code(),
-              Eq(StatusProto::FAILED_PRECONDITION));
-  EXPECT_THAT(icing.GetSchema().status().code(),
-              Eq(StatusProto::FAILED_PRECONDITION));
-  EXPECT_THAT(
-      icing.GetSchemaType(email_schema.types(0).schema_type()).status().code(),
-      Eq(StatusProto::FAILED_PRECONDITION));
-
-  DocumentProto doc = CreateMessageDocument("namespace", "uri");
-  EXPECT_THAT(icing.Put(doc).status().code(),
-              Eq(StatusProto::FAILED_PRECONDITION));
-  EXPECT_THAT(icing.Get(doc.namespace_(), doc.uri()).status().code(),
-              Eq(StatusProto::FAILED_PRECONDITION));
-  EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status().code(),
-              Eq(StatusProto::FAILED_PRECONDITION));
-  EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status().code(),
-              Eq(StatusProto::FAILED_PRECONDITION));
-  EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type())
-                  .status()
-                  .code(),
-              Eq(StatusProto::FAILED_PRECONDITION));
-
-  SearchSpecProto search_spec = SearchSpecProto::default_instance();
-  ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance();
-  ResultSpecProto result_spec = ResultSpecProto::default_instance();
-  EXPECT_THAT(
-      icing.Search(search_spec, scoring_spec, result_spec).status().code(),
-      Eq(StatusProto::FAILED_PRECONDITION));
-  constexpr int kSomePageToken = 12;
-  EXPECT_THAT(icing.GetNextPage(kSomePageToken).status().code(),
-              Eq(StatusProto::FAILED_PRECONDITION));
-  icing.InvalidateNextPageToken(kSomePageToken);  // Verify this doesn't crash.
-
-  EXPECT_THAT(icing.PersistToDisk().status().code(),
-              Eq(StatusProto::FAILED_PRECONDITION));
-  EXPECT_THAT(icing.Optimize().status().code(),
-              Eq(StatusProto::FAILED_PRECONDITION));
+  EXPECT_THAT(icing
+                  .Get(document.namespace_(), document.uri(),
+                       GetResultSpecProto::default_instance())
+                  .status(),
+              ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
 }
 
 TEST_F(IcingSearchEngineTest, GetAllNamespaces) {
@@ -3298,47 +1218,43 @@ TEST_F(IcingSearchEngineTest, GetAllNamespaces) {
 
     TestIcingSearchEngine icing(GetDefaultIcingOptions(),
                                 std::make_unique<Filesystem>(),
-                                std::move(fake_clock));
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
 
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
-                Eq(StatusProto::OK));
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
 
     // No namespaces exist yet
     GetAllNamespacesResultProto result = icing.GetAllNamespaces();
-    EXPECT_THAT(result.status().code(), Eq(StatusProto::OK));
+    EXPECT_THAT(result.status(), ProtoIsOk());
     EXPECT_THAT(result.namespaces(), IsEmpty());
 
-    ASSERT_THAT(icing.Put(namespace1).status().code(), Eq(StatusProto::OK));
-    ASSERT_THAT(icing.Put(namespace2_uri1).status().code(),
-                Eq(StatusProto::OK));
-    ASSERT_THAT(icing.Put(namespace2_uri2).status().code(),
-                Eq(StatusProto::OK));
-    ASSERT_THAT(icing.Put(namespace3).status().code(), Eq(StatusProto::OK));
+    ASSERT_THAT(icing.Put(namespace1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(namespace2_uri1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(namespace2_uri2).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(namespace3).status(), ProtoIsOk());
 
     // All namespaces should exist now
     result = icing.GetAllNamespaces();
-    EXPECT_THAT(result.status().code(), Eq(StatusProto::OK));
+    EXPECT_THAT(result.status(), ProtoIsOk());
     EXPECT_THAT(result.namespaces(),
                 UnorderedElementsAre("namespace1", "namespace2", "namespace3"));
 
     // After deleting namespace2_uri1 document, we still have namespace2_uri2 in
     // "namespace2" so it should still show up
-    ASSERT_THAT(icing.Delete("namespace2", "uri1").status().code(),
-                Eq(StatusProto::OK));
+    ASSERT_THAT(icing.Delete("namespace2", "uri1").status(), ProtoIsOk());
 
     result = icing.GetAllNamespaces();
-    EXPECT_THAT(result.status().code(), Eq(StatusProto::OK));
+    EXPECT_THAT(result.status(), ProtoIsOk());
     EXPECT_THAT(result.namespaces(),
                 UnorderedElementsAre("namespace1", "namespace2", "namespace3"));
 
     // After deleting namespace2_uri2 document, we no longer have any documents
     // in "namespace2"
-    ASSERT_THAT(icing.Delete("namespace2", "uri2").status().code(),
-                Eq(StatusProto::OK));
+    ASSERT_THAT(icing.Delete("namespace2", "uri2").status(), ProtoIsOk());
 
     result = icing.GetAllNamespaces();
-    EXPECT_THAT(result.status().code(), Eq(StatusProto::OK));
+    EXPECT_THAT(result.status(), ProtoIsOk());
     EXPECT_THAT(result.namespaces(),
                 UnorderedElementsAre("namespace1", "namespace3"));
   }
@@ -3352,16 +1268,112 @@ TEST_F(IcingSearchEngineTest, GetAllNamespaces) {
 
     TestIcingSearchEngine icing(GetDefaultIcingOptions(),
                                 std::make_unique<Filesystem>(),
-                                std::move(fake_clock));
-    ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
 
     // Only valid document left is the one in "namespace1"
     GetAllNamespacesResultProto result = icing.GetAllNamespaces();
-    EXPECT_THAT(result.status().code(), Eq(StatusProto::OK));
+    EXPECT_THAT(result.status(), ProtoIsOk());
     EXPECT_THAT(result.namespaces(), UnorderedElementsAre("namespace1"));
   }
 }
 
+TEST_F(IcingSearchEngineTest, StorageInfoTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Create three documents.
+  DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+  DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+  DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  // Ensure that total_storage_size is set. All the other stats are covered by
+  // the classes that generate them.
+  StorageInfoResultProto result = icing.GetStorageInfo();
+  EXPECT_THAT(result.status(), ProtoIsOk());
+  EXPECT_THAT(result.storage_info().total_storage_size(), Ge(0));
+}
+
+TEST_F(IcingSearchEngineTest, GetDebugInfoVerbosityBasicSucceeds) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Create a document.
+  DocumentProto document = CreateMessageDocument("namespace", "email");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  DebugInfoResultProto result = icing.GetDebugInfo(DebugInfoVerbosity::BASIC);
+  EXPECT_THAT(result.status(), ProtoIsOk());
+
+  // Some sanity checks
+  DebugInfoProto debug_info = result.debug_info();
+  EXPECT_THAT(
+      debug_info.document_info().document_storage_info().num_alive_documents(),
+      Eq(1));
+  EXPECT_THAT(debug_info.document_info().corpus_info(),
+              IsEmpty());  // because verbosity=BASIC
+  EXPECT_THAT(debug_info.schema_info().crc(), Gt(0));
+}
+
+TEST_F(IcingSearchEngineTest,
+       GetDebugInfoVerbosityDetailedSucceedsWithCorpusInfo) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  // Create 4 documents.
+  DocumentProto document1 = CreateMessageDocument("namespace1", "email/1");
+  DocumentProto document2 = CreateMessageDocument("namespace1", "email/2");
+  DocumentProto document3 = CreateMessageDocument("namespace2", "email/3");
+  DocumentProto document4 = CreateMessageDocument("namespace2", "email/4");
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+
+  DebugInfoResultProto result =
+      icing.GetDebugInfo(DebugInfoVerbosity::DETAILED);
+  EXPECT_THAT(result.status(), ProtoIsOk());
+
+  // Some sanity checks
+  DebugInfoProto debug_info = result.debug_info();
+  EXPECT_THAT(
+      debug_info.document_info().document_storage_info().num_alive_documents(),
+      Eq(4));
+  EXPECT_THAT(debug_info.document_info().corpus_info(), SizeIs(2));
+  EXPECT_THAT(debug_info.schema_info().crc(), Gt(0));
+}
+
+TEST_F(IcingSearchEngineTest, GetDebugInfoUninitialized) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  DebugInfoResultProto result =
+      icing.GetDebugInfo(DebugInfoVerbosity::DETAILED);
+  EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+}
+
+TEST_F(IcingSearchEngineTest, GetDebugInfoNoSchemaNoDocumentsSucceeds) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  DebugInfoResultProto result =
+      icing.GetDebugInfo(DebugInfoVerbosity::DETAILED);
+  ASSERT_THAT(result.status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineTest, GetDebugInfoWithSchemaNoDocumentsSucceeds) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  DebugInfoResultProto result =
+      icing.GetDebugInfo(DebugInfoVerbosity::DETAILED);
+  ASSERT_THAT(result.status(), ProtoIsOk());
+}
+
 }  // namespace
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/index/data-indexing-handler.h b/icing/index/data-indexing-handler.h
new file mode 100644
index 0000000..16a1796
--- /dev/null
+++ b/icing/index/data-indexing-handler.h
@@ -0,0 +1,69 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_DATA_INDEXING_HANDLER_H_
+#define ICING_INDEX_DATA_INDEXING_HANDLER_H_
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+// Parent class for indexing different types of data in TokenizedDocument.
+class DataIndexingHandler {
+ public:
+  explicit DataIndexingHandler(const Clock* clock) : clock_(*clock) {}
+
+  virtual ~DataIndexingHandler() = default;
+
+  // Handles the indexing process: add data into the specific type index (e.g.
+  // term index, integer index, qualified id type joinable index) for all
+  // contents in the corresponding type of data in tokenized_document.
+  // For example, IntegerSectionIndexingHandler::Handle should add data into
+  // integer index for all contents in tokenized_document.integer_sections.
+  //
+  // Also it should handle last added DocumentId properly (based on
+  // recovery_mode_) to avoid adding previously indexed documents.
+  //
+  // tokenized_document: document object with different types of tokenized data.
+  // document_id:        id of the document.
+  // recovery_mode:      decides how to handle document_id <=
+  //                     last_added_document_id. If in recovery_mode, then
+  //                     Handle() will simply return OK immediately. Otherwise,
+  //                     returns INVALID_ARGUMENT_ERROR.
+  // put_document_stats: object for collecting stats during indexing. It can be
+  //                     nullptr.
+  //
+  /// Returns:
+  //   - OK on success.
+  //   - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less
+  //     than or equal to the document_id of a previously indexed document in
+  //     non recovery mode.
+  //   - Any other errors. It depends on each implementation.
+  virtual libtextclassifier3::Status Handle(
+      const TokenizedDocument& tokenized_document, DocumentId document_id,
+      bool recovery_mode, PutDocumentStatsProto* put_document_stats) = 0;
+
+ protected:
+  const Clock& clock_;  // Does not own.
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_DATA_INDEXING_HANDLER_H_
diff --git a/icing/index/hit/doc-hit-info.cc b/icing/index/hit/doc-hit-info.cc
deleted file mode 100644
index 80dbbde..0000000
--- a/icing/index/hit/doc-hit-info.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/index/hit/doc-hit-info.h"
-
-#include "icing/legacy/core/icing-string-util.h"
-
-namespace icing {
-namespace lib {
-
-bool DocHitInfo::operator<(const DocHitInfo& other) const {
-  if (document_id() != other.document_id()) {
-    // Sort by document_id descending. This mirrors how the individual hits that
-    // are collapsed into this DocHitInfo would sort with other hits -
-    // document_ids are inverted when encoded in hits. Hits are encoded this way
-    // because they are appended to posting lists and the most recent value
-    // appended to a posting list must have the smallest encoded value of any
-    // hit on the posting list.
-    return document_id() > other.document_id();
-  }
-  if (hit_section_ids_mask() != other.hit_section_ids_mask()) {
-    return hit_section_ids_mask() < other.hit_section_ids_mask();
-  }
-  // Doesn't matter which way we compare this array, as long as
-  // DocHitInfo is unequal when it is unequal.
-  return memcmp(max_hit_score_, other.max_hit_score_, sizeof(max_hit_score_)) <
-         0;
-}
-
-void DocHitInfo::UpdateSection(SectionId section_id, Hit::Score hit_score) {
-  SectionIdMask section_id_mask = (1u << section_id);
-  if (hit_section_ids_mask() & section_id_mask) {
-    max_hit_score_[section_id] =
-        std::max(max_hit_score_[section_id], hit_score);
-  } else {
-    max_hit_score_[section_id] = hit_score;
-    hit_section_ids_mask_ |= section_id_mask;
-  }
-}
-
-void DocHitInfo::MergeSectionsFrom(const DocHitInfo& other) {
-  SectionIdMask other_mask = other.hit_section_ids_mask();
-  while (other_mask) {
-    SectionId section_id = __builtin_ctz(other_mask);
-    UpdateSection(section_id, other.max_hit_score(section_id));
-    other_mask &= ~(1u << section_id);
-  }
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/index/hit/doc-hit-info.h b/icing/index/hit/doc-hit-info.h
index 386822d..2770de2 100644
--- a/icing/index/hit/doc-hit-info.h
+++ b/icing/index/hit/doc-hit-info.h
@@ -25,19 +25,16 @@
 namespace icing {
 namespace lib {
 
-// DocHitInfo provides a collapsed view of all hits for a specific term and doc.
-// Hits contain a document_id, section_id and a hit score. The information in
-// multiple hits is collapse into a DocHitInfo by providing a SectionIdMask of
-// all sections that contained a hit for this term as well as the highest hit
-// score of any hit for each section.
+// DocHitInfo provides a collapsed view of all hits for a specific doc.
+// Hits contain a document_id and section_id. The information in multiple hits
+// is collapse into a DocHitInfo by providing a SectionIdMask of all sections
+// that contained a hit for this term.
 class DocHitInfo {
  public:
   explicit DocHitInfo(DocumentId document_id_in = kInvalidDocumentId,
                       SectionIdMask hit_section_ids_mask = kSectionIdMaskNone)
       : document_id_(document_id_in),
-        hit_section_ids_mask_(hit_section_ids_mask) {
-    memset(max_hit_score_, Hit::kMaxHitScore, sizeof(max_hit_score_));
-  }
+        hit_section_ids_mask_(hit_section_ids_mask) {}
 
   DocumentId document_id() const { return document_id_; }
 
@@ -49,38 +46,44 @@ class DocHitInfo {
     hit_section_ids_mask_ = section_id_mask;
   }
 
-  Hit::Score max_hit_score(SectionId section_id) const {
-    return max_hit_score_[section_id];
+  bool operator<(const DocHitInfo& other) const {
+    if (document_id() != other.document_id()) {
+      // Sort by document_id descending. This mirrors how the individual hits
+      // that are collapsed into this DocHitInfo would sort with other hits -
+      // document_ids are inverted when encoded in hits. Hits are encoded this
+      // way because they are appended to posting lists and the most recent
+      // value appended to a posting list must have the smallest encoded value
+      // of any hit on the posting list.
+      return document_id() > other.document_id();
+    }
+    return hit_section_ids_mask() < other.hit_section_ids_mask();
   }
-
-  bool operator<(const DocHitInfo& other) const;
   bool operator==(const DocHitInfo& other) const {
-    return (*this < other) == (other < *this);
+    return document_id_ == other.document_id_ &&
+           hit_section_ids_mask_ == other.hit_section_ids_mask_;
   }
 
-  // Updates the hit_section_ids_mask and max_hit_score for the section, if
-  // necessary.
-  void UpdateSection(SectionId section_id, Hit::Score hit_score);
+  // Updates the hit_section_ids_mask for the section, if necessary.
+  void UpdateSection(SectionId section_id) {
+    hit_section_ids_mask_ |= (UINT64_C(1) << section_id);
+  }
 
-  // Merges the sections of other into this. The hit_section_ids_masks are or'd
-  // and the max hit score for each section between the two is set.
+  // Merges the sections of other into this. The hit_section_ids_masks are or'd.
   //
   // This does not affect the DocumentId of this or other. If callers care about
   // only merging sections for DocHitInfos with the same DocumentId, callers
   // should check this themselves.
-  void MergeSectionsFrom(const DocHitInfo& other);
+  void MergeSectionsFrom(const SectionIdMask& other_hit_section_ids_mask) {
+    hit_section_ids_mask_ |= other_hit_section_ids_mask;
+  }
 
  private:
   DocumentId document_id_;
   SectionIdMask hit_section_ids_mask_;
-  Hit::Score max_hit_score_[kMaxSectionId + 1];
 } __attribute__((packed));
-static_assert(sizeof(DocHitInfo) == 22, "");
+static_assert(sizeof(DocHitInfo) == 12, "");
 // TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions.
 static_assert(icing_is_packed_pod<DocHitInfo>::value, "go/icing-ubsan");
-static_assert(sizeof(Hit::Score) == 1,
-              "Change how max_hit_score_ is initialized if changing the type "
-              "of Hit::Score");
 
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/index/hit/doc-hit-info_test.cc b/icing/index/hit/doc-hit-info_test.cc
index d8adbc1..13eca9a 100644
--- a/icing/index/hit/doc-hit-info_test.cc
+++ b/icing/index/hit/doc-hit-info_test.cc
@@ -14,143 +14,29 @@
 
 #include "icing/index/hit/doc-hit-info.h"
 
-#include "icing/index/hit/hit.h"
-#include "icing/schema/section.h"
-#include "icing/store/document-id.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
 
 namespace icing {
 namespace lib {
 
 using ::testing::ElementsAre;
-using ::testing::Eq;
-using ::testing::IsTrue;
 using ::testing::Ne;
 
-constexpr DocumentId kSomeDocumentId = 12;
-constexpr DocumentId kSomeOtherDocumentId = 54;
-
-TEST(DocHitInfoTest, InitialMaxHitScores) {
-  DocHitInfo info(kSomeDocumentId);
-  for (SectionId i = 0; i <= kMaxSectionId; ++i) {
-    EXPECT_THAT(info.max_hit_score(i), Eq(Hit::kMaxHitScore));
-  }
-}
-
-TEST(DocHitInfoTest, UpdateHitScores) {
-  DocHitInfo info(kSomeDocumentId);
-  ASSERT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore));
-
-  // Updating a section for the first time, should change its max hit score,
-  // even though the hit score (16) may be lower than the current value returned
-  // by info.max_hit_score(3) (kMaxHitScore)
-  info.UpdateSection(3, 16);
-  EXPECT_THAT(info.max_hit_score(3), Eq(16));
-
-  // Updating a section with a hit score lower than the previously set one
-  // should not update max hit score.
-  info.UpdateSection(3, 15);
-  EXPECT_THAT(info.max_hit_score(3), Eq(16));
-
-  // Updating a section with a hit score higher than the previously set one
-  // should update the max hit score.
-  info.UpdateSection(3, 17);
-  EXPECT_THAT(info.max_hit_score(3), Eq(17));
-
-  // Updating a section with kMaxHitScore should *always* set the max hit
-  // score to kMaxHitScore (regardless of what value kMaxHitScore is
-  // defined with).
-  info.UpdateSection(3, Hit::kMaxHitScore);
-  EXPECT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore));
-
-  // Updating a section that has had kMaxHitScore explicitly set, should
-  // *never* change the max hit score (regardless of what value kMaxHitScore
-  // is defined with).
-  info.UpdateSection(3, 16);
-  EXPECT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore));
-}
-
-TEST(DocHitInfoTest, UpdateSectionIdMask) {
-  DocHitInfo info(kSomeDocumentId);
-  EXPECT_THAT(info.hit_section_ids_mask(), Eq(kSectionIdMaskNone));
-
-  info.UpdateSection(3, 16);
-  EXPECT_THAT(info.hit_section_ids_mask() & 1U << 3, IsTrue());
-
-  // Calling update again shouldn't do anything
-  info.UpdateSection(3, 15);
-  EXPECT_THAT(info.hit_section_ids_mask() & 1U << 3, IsTrue());
-
-  // Updating another section shouldn't do anything
-  info.UpdateSection(2, 77);
-  EXPECT_THAT(info.hit_section_ids_mask() & 1U << 3, IsTrue());
-}
-
-TEST(DocHitInfoTest, MergeSectionsFromDifferentDocumentId) {
-  // Merging infos with different document_ids works.
-  DocHitInfo info1(kSomeDocumentId);
-  DocHitInfo info2(kSomeOtherDocumentId);
-  info2.UpdateSection(7, 12);
-  info1.MergeSectionsFrom(info2);
-  EXPECT_THAT(info1.max_hit_score(7), Eq(12));
-  EXPECT_THAT(info1.document_id(), Eq(kSomeDocumentId));
-}
-
-TEST(DocHitInfoTest, MergeSectionsFromKeepsOldSection) {
-  // Merging shouldn't override sections that are present info1, but not present
-  // in info2.
-  DocHitInfo info1(kSomeDocumentId);
-  info1.UpdateSection(3, 16);
-  DocHitInfo info2(kSomeDocumentId);
-  info1.MergeSectionsFrom(info2);
-  EXPECT_THAT(info1.max_hit_score(3), Eq(16));
-}
-
-TEST(DocHitInfoTest, MergeSectionsFromAddsNewSection) {
-  // Merging should add sections that were not present in info1, but are present
-  // in info2.
-  DocHitInfo info1(kSomeDocumentId);
-  DocHitInfo info2(kSomeDocumentId);
-  info2.UpdateSection(7, 12);
-  info1.MergeSectionsFrom(info2);
-  EXPECT_THAT(info1.max_hit_score(7), Eq(12));
-}
-
-TEST(DocHitInfoTest, MergeSectionsFromSetsHigherHitScore) {
-  // Merging should override the value of a section in info1 if the same section
-  // is present in info2 with a higher hit score.
-  DocHitInfo info1(kSomeDocumentId);
-  info1.UpdateSection(2, 77);
-  DocHitInfo info2(kSomeDocumentId);
-  info2.UpdateSection(2, 89);
-  info1.MergeSectionsFrom(info2);
-  EXPECT_THAT(info1.max_hit_score(2), Eq(89));
-}
-
-TEST(DocHitInfoTest, MergeSectionsFromDoesNotSetLowerHitScore) {
-  // Merging should not override the hit score of a section in info1 if the same
-  // section is present in info2 but with a lower hit score.
-  DocHitInfo info1(kSomeDocumentId);
-  info1.UpdateSection(5, 108);
-  DocHitInfo info2(kSomeDocumentId);
-  info2.UpdateSection(5, 13);
-  info1.MergeSectionsFrom(info2);
-  EXPECT_THAT(info1.max_hit_score(5), Eq(108));
-}
-
 TEST(DocHitInfoTest, Comparison) {
   constexpr DocumentId kDocumentId = 1;
   DocHitInfo info(kDocumentId);
-  info.UpdateSection(1, 12);
+  info.UpdateSection(1);
 
   constexpr DocumentId kHighDocumentId = 15;
   DocHitInfo high_document_id_info(kHighDocumentId);
-  high_document_id_info.UpdateSection(1, 12);
+  high_document_id_info.UpdateSection(1);
 
   DocHitInfo high_section_id_info(kDocumentId);
-  high_section_id_info.UpdateSection(1, 12);
-  high_section_id_info.UpdateSection(6, Hit::kMaxHitScore);
+  high_section_id_info.UpdateSection(1);
+  high_section_id_info.UpdateSection(6);
 
   std::vector<DocHitInfo> infos{info, high_document_id_info,
                                 high_section_id_info};
@@ -160,10 +46,10 @@ TEST(DocHitInfoTest, Comparison) {
 
   // There are no requirements for how DocHitInfos with the same DocumentIds and
   // hit masks will compare, but they must not be equal.
-  DocHitInfo different_hit_score_info(kDocumentId);
-  different_hit_score_info.UpdateSection(1, 76);
-  EXPECT_THAT(info < different_hit_score_info,
-              Ne(different_hit_score_info < info));
+  DocHitInfo different_term_frequency_info(kDocumentId);
+  different_term_frequency_info.UpdateSection(2);
+  EXPECT_THAT(info < different_term_frequency_info,
+              Ne(different_term_frequency_info < info));
 }
 
 }  // namespace lib
diff --git a/icing/index/hit/hit.cc b/icing/index/hit/hit.cc
index 1852bd5..493e62b 100644
--- a/icing/index/hit/hit.cc
+++ b/icing/index/hit/hit.cc
@@ -30,13 +30,25 @@ enum FlagOffset {
   // This hit represents a prefix of a longer term. If exact matches are
   // required, then this hit should be ignored.
   kPrefixHit = 1,
-  // Whether or not the hit has a hit score other than kMaxHitScore.
-  kHasScore = 2,
+  // Whether or not the hit has a term_frequency  other than
+  // kDefaultTermFrequency.
+  kHasTermFrequency = 2,
   kNumFlags = 3,
 };
+
+static_assert(kDocumentIdBits + kSectionIdBits + kNumFlags <
+                  sizeof(Hit::Value) * 8,
+              "Hit::kInvalidValue contains risky value and we should have at "
+              "least one unused bit to avoid potential bugs. Please follow the "
+              "process mentioned in hit.h to correct the value of "
+              "Hit::kInvalidValue and remove this static_assert afterwards.");
+
 static_assert(kDocumentIdBits + kSectionIdBits + kNumFlags <=
                   sizeof(Hit::Value) * 8,
               "HitOverflow");
+static_assert(kDocumentIdBits == 22, "");
+static_assert(kSectionIdBits == 6, "");
+static_assert(kNumFlags == 3, "");
 
 inline DocumentId InvertDocumentId(DocumentId document_id) {
   static_assert(kMaxDocumentId <= (std::numeric_limits<DocumentId>::max() - 1),
@@ -51,9 +63,35 @@ inline DocumentId InvertDocumentId(DocumentId document_id) {
 
 }  // namespace
 
-Hit::Hit(SectionId section_id, DocumentId document_id, Hit::Score score,
-         bool is_in_prefix_section, bool is_prefix_hit)
-    : score_(score) {
+BasicHit::BasicHit(SectionId section_id, DocumentId document_id) {
+  // Values are stored so that when sorted, they appear in document_id
+  // descending, section_id ascending, order. So inverted document_id appears in
+  // the most significant bits, followed by (uninverted) section_id.
+  Value temp_value = 0;
+  bit_util::BitfieldSet(/*new_value=*/InvertDocumentId(document_id),
+                        /*lsb_offset=*/kSectionIdBits, /*len=*/kDocumentIdBits,
+                        /*value_out=*/&temp_value);
+  bit_util::BitfieldSet(/*new_value=*/section_id, /*lsb_offset=*/0,
+                        /*len=*/kSectionIdBits, /*value_out=*/&temp_value);
+  value_ = temp_value;
+}
+
+DocumentId BasicHit::document_id() const {
+  DocumentId inverted_document_id = bit_util::BitfieldGet(
+      value_, /*lsb_offset=*/kSectionIdBits, /*len=*/kDocumentIdBits);
+  // Undo the document_id inversion.
+  return InvertDocumentId(inverted_document_id);
+}
+
+SectionId BasicHit::section_id() const {
+  return bit_util::BitfieldGet(value_, /*lsb_offset=*/0,
+                               /*len=*/kSectionIdBits);
+}
+
+Hit::Hit(SectionId section_id, DocumentId document_id,
+         Hit::TermFrequency term_frequency, bool is_in_prefix_section,
+         bool is_prefix_hit)
+    : term_frequency_(term_frequency) {
   // Values are stored so that when sorted, they appear in document_id
   // descending, section_id ascending, order. Also, all else being
   // equal, non-prefix hits sort before prefix hits. So inverted
@@ -64,9 +102,11 @@ Hit::Hit(SectionId section_id, DocumentId document_id, Hit::Score score,
                         kSectionIdBits + kNumFlags, kDocumentIdBits,
                         &temp_value);
   bit_util::BitfieldSet(section_id, kNumFlags, kSectionIdBits, &temp_value);
-  bit_util::BitfieldSet(score != kMaxHitScore, kHasScore, 1, &temp_value);
-  bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, 1, &temp_value);
-  bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection, 1, &temp_value);
+  bit_util::BitfieldSet(term_frequency != kDefaultTermFrequency,
+                        kHasTermFrequency, /*len=*/1, &temp_value);
+  bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, /*len=*/1, &temp_value);
+  bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection,
+                        /*len=*/1, &temp_value);
   value_ = temp_value;
 }
 
@@ -81,8 +121,8 @@ SectionId Hit::section_id() const {
   return bit_util::BitfieldGet(value(), kNumFlags, kSectionIdBits);
 }
 
-bool Hit::has_score() const {
-  return bit_util::BitfieldGet(value(), kHasScore, 1);
+bool Hit::has_term_frequency() const {
+  return bit_util::BitfieldGet(value(), kHasTermFrequency, 1);
 }
 
 bool Hit::is_prefix_hit() const {
@@ -93,6 +133,11 @@ bool Hit::is_in_prefix_section() const {
   return bit_util::BitfieldGet(value(), kInPrefixSection, 1);
 }
 
+Hit Hit::TranslateHit(Hit old_hit, DocumentId new_document_id) {
+  return Hit(old_hit.section_id(), new_document_id, old_hit.term_frequency(),
+             old_hit.is_in_prefix_section(), old_hit.is_prefix_hit());
+}
+
 bool Hit::EqualsDocumentIdAndSectionId::operator()(const Hit& hit1,
                                                    const Hit& hit2) const {
   return (hit1.value() >> kNumFlags) == (hit2.value() >> kNumFlags);
diff --git a/icing/index/hit/hit.h b/icing/index/hit/hit.h
index d1be204..111b320 100644
--- a/icing/index/hit/hit.h
+++ b/icing/index/hit/hit.h
@@ -15,6 +15,7 @@
 #ifndef ICING_INDEX_HIT_HIT_H_
 #define ICING_INDEX_HIT_HIT_H_
 
+#include <array>
 #include <cstdint>
 #include <limits>
 
@@ -25,56 +26,131 @@
 namespace icing {
 namespace lib {
 
+// BasicHit is a specific encoding that refers to content within a document. A
+// basic hit consists of:
+// - a DocumentId
+// - a SectionId
+// referring to the document and section that the hit corresponds to.
+//
+// The hit is the most basic unit of the index and, when grouped together by
+// term, can be used to encode what terms appear in what documents.
+//
+// BasicHit is for indices (e.g. numeric index) that don't require term
+// frequency.
+class BasicHit {
+ public:
+  // The datatype used to encode BasicHit information: the document_id and
+  // section_id.
+  using Value = uint32_t;
+
+  // WARNING: Changing this value will invalidate any pre-existing posting lists
+  // on user devices.
+  //
+  // kInvalidValue contains:
+  // - 0 for unused bits. Note that unused bits are always 0 for both valid and
+  //   invalid BasicHit values.
+  // - Inverted kInvalidDocumentId
+  // - SectionId 0 (valid), which is ok because inverted kInvalidDocumentId has
+  //   already invalidated the value. In fact, we currently use all 2^6 section
+  //   ids and there is no "invalid section id", so it doesn't matter what
+  //   SectionId we set for kInvalidValue.
+  static constexpr Value kInvalidValue = 0;
+
+  explicit BasicHit(SectionId section_id, DocumentId document_id);
+
+  explicit BasicHit() : value_(kInvalidValue) {}
+
+  bool is_valid() const { return value_ != kInvalidValue; }
+  Value value() const { return value_; }
+  DocumentId document_id() const;
+  SectionId section_id() const;
+
+  bool operator<(const BasicHit& h2) const { return value_ < h2.value_; }
+  bool operator==(const BasicHit& h2) const { return value_ == h2.value_; }
+
+ private:
+  // Value bits layout: 4 unused + 22 document_id + 6 section id.
+  Value value_;
+} __attribute__((packed));
+static_assert(sizeof(BasicHit) == 4, "");
+
 // Hit is a specific encoding that refers to content within a document. A hit
 // consists of:
 // - a DocumentId
 // - a SectionId
 // referring to the document and section that the hit corresponds to, as well as
 // metadata about the hit:
-// - whether the Hit has a Score other than the default value
+// - whether the Hit has a TermFrequency other than the default value
 // - whether the Hit does not appear exactly in the document, but instead
 //   represents a term that is a prefix of a term in the document
 // - whether the Hit came from a section that has prefix expansion enabled
-// and a score for the hit. Ranging from [0,255] a higher score indicates a
-// higher quality hit.
+//   and a term frequency for the hit.
+//
 // The hit is the most basic unit of the index and, when grouped together by
 // term, can be used to encode what terms appear in what documents.
 class Hit {
  public:
   // The datatype used to encode Hit information: the document_id, section_id
-  // and the has_score, prefix hit and in prefix section flags.
+  // and the has_term_frequency, prefix hit and in prefix section flags.
   using Value = uint32_t;
 
   // WARNING: Changing this value will invalidate any pre-existing posting lists
   // on user devices.
+  //
+  // WARNING:
+  // - Hit::kInvalidValue should contain inverted kInvalidDocumentId, which is
+  //   b'00...0. However, currently we set it as UINT32_MAX and actually it
+  //   contains b'11...1, which is the inverted document_id 0.
+  // - It means Hit::kInvalidValue contains valid (document_id, section_id,
+  //   flags), so we potentially cannot distinguish if a Hit is invalid or not.
+  //   The invalidity is an essential feature for posting list since we use it
+  //   to determine the state of the posting list.
+  // - The reason why it won't break the current posting list is because the
+  //   unused bit(s) are set as 1 for Hit::kInvalidValue and 0 for all valid
+  //   Hits. In other words, the unused bit(s) are actually serving as "invalid
+  //   flag".
+  // - If we want to exhaust all unused bits in the future, then we have to
+  //   change Hit::kInvalidValue to set the inverted document_id section
+  //   correctly (b'00...0, refer to BasicHit::kInvalidValue as an example).
+  // - Also this problem is guarded by static_assert in hit.cc. If exhausting
+  //   all unused bits, then the static_assert will detect and fail. We can
+  //   safely remove the static_assert check after following the above process
+  //   to resolve the incorrect Hit::kInvalidValue issue.
   static constexpr Value kInvalidValue = std::numeric_limits<Value>::max();
   // Docs are sorted in reverse, and 0 is never used as the inverted
   // DocumentId (because it is the inverse of kInvalidValue), so it is always
   // the max in a descending sort.
   static constexpr Value kMaxDocumentIdSortValue = 0;
 
-  // A score reflecting the "quality" of this hit. The higher the score, the
-  // higher quality the hit.
-  using Score = uint8_t;
-  // By default, hits are given the highest possible score.
-  static constexpr Score kMaxHitScore = std::numeric_limits<Score>::max();
+  // The Term Frequency of a Hit.
+  using TermFrequency = uint8_t;
+  using TermFrequencyArray = std::array<Hit::TermFrequency, kTotalNumSections>;
+  // Max TermFrequency is 255.
+  static constexpr TermFrequency kMaxTermFrequency =
+      std::numeric_limits<TermFrequency>::max();
+  static constexpr TermFrequency kDefaultTermFrequency = 1;
+  static constexpr TermFrequency kNoTermFrequency = 0;
 
-  explicit Hit(Value value = kInvalidValue, Score score = kMaxHitScore)
-      : value_(value), score_(score) {}
-  Hit(SectionId section_id, DocumentId document_id, Score score,
-      bool is_in_prefix_section = false, bool is_prefix_hit = false);
+  explicit Hit(Value value = kInvalidValue,
+               TermFrequency term_frequency = kDefaultTermFrequency)
+      : value_(value), term_frequency_(term_frequency) {}
+  Hit(SectionId section_id, DocumentId document_id,
+      TermFrequency term_frequency, bool is_in_prefix_section = false,
+      bool is_prefix_hit = false);
 
   bool is_valid() const { return value() != kInvalidValue; }
   Value value() const { return value_; }
   DocumentId document_id() const;
   SectionId section_id() const;
-  // Whether or not the hit contains a non-default score. Hits with non-default
-  // score are considered to be of lower quality.
-  bool has_score() const;
-  Score score() const { return score_; }
+  // Whether or not the hit contains a valid term frequency.
+  bool has_term_frequency() const;
+  TermFrequency term_frequency() const { return term_frequency_; }
   bool is_prefix_hit() const;
   bool is_in_prefix_section() const;
 
+  // Creates a new hit based on old_hit but with new_document_id set.
+  static Hit TranslateHit(Hit old_hit, DocumentId new_document_id);
+
   bool operator<(const Hit& h2) const { return value() < h2.value(); }
   bool operator==(const Hit& h2) const { return value() == h2.value(); }
 
@@ -83,10 +159,10 @@ class Hit {
   };
 
  private:
-  // Value and score must be in this order.
-  // Value bits layout: 5 unused + 20 document_id + 4 section id + 3 flags.
+  // Value and TermFrequency must be in this order.
+  // Value bits layout: 1 unused + 22 document_id + 6 section id + 3 flags.
   Value value_;
-  Score score_;
+  TermFrequency term_frequency_;
 } __attribute__((packed));
 static_assert(sizeof(Hit) == 5, "");
 // TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions.
diff --git a/icing/index/hit/hit_test.cc b/icing/index/hit/hit_test.cc
index 17db66b..0086d91 100644
--- a/icing/index/hit/hit_test.cc
+++ b/icing/index/hit/hit_test.cc
@@ -26,6 +26,7 @@ namespace {
 
 using ::testing::ElementsAre;
 using ::testing::Eq;
+using ::testing::Ge;
 using ::testing::IsFalse;
 using ::testing::IsTrue;
 using ::testing::Lt;
@@ -33,46 +34,103 @@ using ::testing::Not;
 
 static constexpr DocumentId kSomeDocumentId = 24;
 static constexpr SectionId kSomeSectionid = 5;
-static constexpr Hit::Score kSomeHitScore = 57;
+static constexpr Hit::TermFrequency kSomeTermFrequency = 57;
 
-TEST(HitTest, HasScoreFlag) {
-  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
-  EXPECT_THAT(h1.has_score(), IsFalse());
-  EXPECT_THAT(h1.score(), Eq(Hit::kMaxHitScore));
+TEST(BasicHitTest, Accessors) {
+  BasicHit h1(kSomeSectionid, kSomeDocumentId);
+  EXPECT_THAT(h1.document_id(), Eq(kSomeDocumentId));
+  EXPECT_THAT(h1.section_id(), Eq(kSomeSectionid));
+}
+
+TEST(BasicHitTest, Invalid) {
+  BasicHit default_invalid;
+  EXPECT_THAT(default_invalid.is_valid(), IsFalse());
+
+  // Also make sure the invalid BasicHit contains an invalid document id.
+  EXPECT_THAT(default_invalid.document_id(), Eq(kInvalidDocumentId));
+  EXPECT_THAT(default_invalid.section_id(), Eq(kMinSectionId));
+}
+
+TEST(BasicHitTest, Valid) {
+  BasicHit maximum_document_id_hit(kSomeSectionid, kMaxDocumentId);
+  EXPECT_THAT(maximum_document_id_hit.is_valid(), IsTrue());
+
+  BasicHit maximum_section_id_hit(kMaxSectionId, kSomeDocumentId);
+  EXPECT_THAT(maximum_section_id_hit.is_valid(), IsTrue());
+
+  BasicHit minimum_document_id_hit(kSomeSectionid, kMinDocumentId);
+  EXPECT_THAT(minimum_document_id_hit.is_valid(), IsTrue());
+
+  BasicHit minimum_section_id_hit(kMinSectionId, kSomeDocumentId);
+  EXPECT_THAT(minimum_section_id_hit.is_valid(), IsTrue());
+
+  BasicHit all_maximum_hit(kMaxSectionId, kMaxDocumentId);
+  EXPECT_THAT(all_maximum_hit.is_valid(), IsTrue());
 
-  Hit h2(kSomeSectionid, kSomeDocumentId, kSomeHitScore);
-  EXPECT_THAT(h2.has_score(), IsTrue());
-  EXPECT_THAT(h2.score(), Eq(kSomeHitScore));
+  BasicHit all_minimum_hit(kMinSectionId, kMinDocumentId);
+  EXPECT_THAT(all_minimum_hit.is_valid(), IsTrue());
+
+  // We use invalid BasicHit for std::lower_bound. Verify that value of the
+  // smallest valid BasicHit (which contains kMinSectionId, kMaxDocumentId) is
+  // >= BasicHit::kInvalidValue.
+  BasicHit smallest_hit(kMinSectionId, kMaxDocumentId);
+  ASSERT_THAT(smallest_hit.is_valid(), IsTrue());
+  EXPECT_THAT(smallest_hit.value(), Ge(BasicHit::kInvalidValue));
+}
+
+TEST(BasicHitTest, Comparison) {
+  BasicHit hit(/*section_id=*/1, /*document_id=*/243);
+  // DocumentIds are sorted in ascending order. So a hit with a lower
+  // document_id should be considered greater than one with a higher
+  // document_id.
+  BasicHit higher_document_id_hit(/*section_id=*/1, /*document_id=*/2409);
+  BasicHit higher_section_id_hit(/*section_id=*/15, /*document_id=*/243);
+
+  std::vector<BasicHit> hits{hit, higher_document_id_hit,
+                             higher_section_id_hit};
+  std::sort(hits.begin(), hits.end());
+  EXPECT_THAT(hits,
+              ElementsAre(higher_document_id_hit, hit, higher_section_id_hit));
+}
+
+TEST(HitTest, HasTermFrequencyFlag) {
+  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
+  EXPECT_THAT(h1.has_term_frequency(), IsFalse());
+  EXPECT_THAT(h1.term_frequency(), Eq(Hit::kDefaultTermFrequency));
+
+  Hit h2(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency);
+  EXPECT_THAT(h2.has_term_frequency(), IsTrue());
+  EXPECT_THAT(h2.term_frequency(), Eq(kSomeTermFrequency));
 }
 
 TEST(HitTest, IsPrefixHitFlag) {
-  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
   EXPECT_THAT(h1.is_prefix_hit(), IsFalse());
 
-  Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+  Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
          /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   EXPECT_THAT(h2.is_prefix_hit(), IsFalse());
 
-  Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+  Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
          /*is_in_prefix_section=*/false, /*is_prefix_hit=*/true);
   EXPECT_THAT(h3.is_prefix_hit(), IsTrue());
 }
 
 TEST(HitTest, IsInPrefixSectionFlag) {
-  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
   EXPECT_THAT(h1.is_in_prefix_section(), IsFalse());
 
-  Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+  Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
          /*is_in_prefix_section=*/false);
   EXPECT_THAT(h2.is_in_prefix_section(), IsFalse());
 
-  Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+  Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
          /*is_in_prefix_section=*/true);
   EXPECT_THAT(h3.is_in_prefix_section(), IsTrue());
 }
 
 TEST(HitTest, Accessors) {
-  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
   EXPECT_THAT(h1.document_id(), Eq(kSomeDocumentId));
   EXPECT_THAT(h1.section_id(), Eq(kSomeSectionid));
 }
@@ -88,47 +146,64 @@ TEST(HitTest, Valid) {
   Hit explicit_valid(kSomeValue);
   EXPECT_THAT(explicit_valid.is_valid(), IsTrue());
 
-  Hit maximum_document_id_hit(kSomeSectionid, kMaxDocumentId, kSomeHitScore);
+  Hit maximum_document_id_hit(kSomeSectionid, kMaxDocumentId,
+                              kSomeTermFrequency);
   EXPECT_THAT(maximum_document_id_hit.is_valid(), IsTrue());
 
-  Hit maximum_section_id_hit(kMaxSectionId, kSomeDocumentId, kSomeHitScore);
+  Hit maximum_section_id_hit(kMaxSectionId, kSomeDocumentId,
+                             kSomeTermFrequency);
   EXPECT_THAT(maximum_section_id_hit.is_valid(), IsTrue());
 
-  Hit minimum_document_id_hit(kSomeSectionid, 0, kSomeHitScore);
+  Hit minimum_document_id_hit(kSomeSectionid, 0, kSomeTermFrequency);
   EXPECT_THAT(minimum_document_id_hit.is_valid(), IsTrue());
 
-  Hit minimum_section_id_hit(0, kSomeDocumentId, kSomeHitScore);
+  Hit minimum_section_id_hit(0, kSomeDocumentId, kSomeTermFrequency);
   EXPECT_THAT(minimum_section_id_hit.is_valid(), IsTrue());
+
+  // We use Hit with value Hit::kMaxDocumentIdSortValue for std::lower_bound in
+  // the lite index. Verify that the value of the smallest valid Hit (which
+  // contains kMinSectionId, kMaxDocumentId and 3 flags = false) is >=
+  // Hit::kMaxDocumentIdSortValue.
+  Hit smallest_hit(kMinSectionId, kMaxDocumentId, Hit::kDefaultTermFrequency);
+  ASSERT_THAT(smallest_hit.is_valid(), IsTrue());
+  ASSERT_THAT(smallest_hit.has_term_frequency(), IsFalse());
+  ASSERT_THAT(smallest_hit.is_prefix_hit(), IsFalse());
+  ASSERT_THAT(smallest_hit.is_in_prefix_section(), IsFalse());
+  EXPECT_THAT(smallest_hit.value(), Ge(Hit::kMaxDocumentIdSortValue));
 }
 
 TEST(HitTest, Comparison) {
-  Hit hit(1, 243, Hit::kMaxHitScore);
+  Hit hit(1, 243, Hit::kDefaultTermFrequency);
   // DocumentIds are sorted in ascending order. So a hit with a lower
   // document_id should be considered greater than one with a higher
   // document_id.
-  Hit higher_document_id_hit(1, 2409, Hit::kMaxHitScore);
-  Hit higher_section_id_hit(15, 243, Hit::kMaxHitScore);
-  // Whether or not a hit score was set is considered, but the score itself is
-  // not.
-  Hit hitscore_hit(1, 243, 12);
-  Hit prefix_hit(1, 243, Hit::kMaxHitScore, /*is_in_prefix_section=*/false,
+  Hit higher_document_id_hit(1, 2409, Hit::kDefaultTermFrequency);
+  Hit higher_section_id_hit(15, 243, Hit::kDefaultTermFrequency);
+  // Whether or not a term frequency was set is considered, but the term
+  // frequency itself is not.
+  Hit term_frequency_hit(1, 243, 12);
+  Hit prefix_hit(1, 243, Hit::kDefaultTermFrequency,
+                 /*is_in_prefix_section=*/false,
                  /*is_prefix_hit=*/true);
-  Hit hit_in_prefix_section(1, 243, Hit::kMaxHitScore,
+  Hit hit_in_prefix_section(1, 243, Hit::kDefaultTermFrequency,
                             /*is_in_prefix_section=*/true,
                             /*is_prefix_hit=*/false);
 
-  std::vector<Hit> hits{
-      hit,        higher_document_id_hit, higher_section_id_hit, hitscore_hit,
-      prefix_hit, hit_in_prefix_section};
+  std::vector<Hit> hits{hit,
+                        higher_document_id_hit,
+                        higher_section_id_hit,
+                        term_frequency_hit,
+                        prefix_hit,
+                        hit_in_prefix_section};
   std::sort(hits.begin(), hits.end());
-  EXPECT_THAT(hits,
-              ElementsAre(higher_document_id_hit, hit, hit_in_prefix_section,
-                          prefix_hit, hitscore_hit, higher_section_id_hit));
-
-  Hit higher_hitscore_hit(1, 243, 108);
-  // Hit score value is not considered when comparing hits.
-  EXPECT_THAT(hitscore_hit, Not(Lt(higher_hitscore_hit)));
-  EXPECT_THAT(higher_hitscore_hit, Not(Lt(hitscore_hit)));
+  EXPECT_THAT(
+      hits, ElementsAre(higher_document_id_hit, hit, hit_in_prefix_section,
+                        prefix_hit, term_frequency_hit, higher_section_id_hit));
+
+  Hit higher_term_frequency_hit(1, 243, 108);
+  // The term frequency value is not considered when comparing hits.
+  EXPECT_THAT(term_frequency_hit, Not(Lt(higher_term_frequency_hit)));
+  EXPECT_THAT(higher_term_frequency_hit, Not(Lt(term_frequency_hit)));
 }
 
 }  // namespace
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index 7076257..9a773e8 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -14,98 +14,34 @@
 
 #include "icing/index/index-processor.h"
 
-#include <cstdint>
 #include <memory>
-#include <string>
-#include <string_view>
-#include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/absl_ports/canonical_errors.h"
-#include "icing/absl_ports/str_cat.h"
-#include "icing/index/index.h"
-#include "icing/legacy/core/icing-string-util.h"
-#include "icing/proto/document.pb.h"
-#include "icing/proto/schema.pb.h"
-#include "icing/proto/term.pb.h"
-#include "icing/schema/section-manager.h"
-#include "icing/schema/section.h"
+#include "icing/index/data-indexing-handler.h"
+#include "icing/proto/logging.pb.h"
 #include "icing/store/document-id.h"
-#include "icing/tokenization/language-segmenter.h"
-#include "icing/tokenization/token.h"
-#include "icing/tokenization/tokenizer-factory.h"
-#include "icing/tokenization/tokenizer.h"
-#include "icing/transform/normalizer.h"
 #include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
 
 namespace icing {
 namespace lib {
 
-libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>>
-IndexProcessor::Create(const SchemaStore* schema_store,
-                       const LanguageSegmenter* lang_segmenter,
-                       const Normalizer* normalizer, Index* index,
-                       const IndexProcessor::Options& options) {
-  ICING_RETURN_ERROR_IF_NULL(schema_store);
-  ICING_RETURN_ERROR_IF_NULL(lang_segmenter);
-  ICING_RETURN_ERROR_IF_NULL(normalizer);
-  ICING_RETURN_ERROR_IF_NULL(index);
-
-  return std::unique_ptr<IndexProcessor>(new IndexProcessor(
-      schema_store, lang_segmenter, normalizer, index, options));
-}
-
 libtextclassifier3::Status IndexProcessor::IndexDocument(
-    const DocumentProto& document, DocumentId document_id) {
-  if (index_->last_added_document_id() != kInvalidDocumentId &&
-      document_id <= index_->last_added_document_id()) {
-    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-        "DocumentId %d must be greater than last added document_id %d",
-        document_id, index_->last_added_document_id()));
+    const TokenizedDocument& tokenized_document, DocumentId document_id,
+    PutDocumentStatsProto* put_document_stats) {
+  std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
+
+  for (auto& data_indexing_handler : data_indexing_handlers_) {
+    ICING_RETURN_IF_ERROR(data_indexing_handler->Handle(
+        tokenized_document, document_id, recovery_mode_, put_document_stats));
   }
-  ICING_ASSIGN_OR_RETURN(std::vector<Section> sections,
-                         schema_store_.ExtractSections(document));
-  uint32_t num_tokens = 0;
-  libtextclassifier3::Status overall_status;
-  for (const Section& section : sections) {
-    // TODO(b/152934343): pass real namespace ids in
-    Index::Editor editor =
-        index_->Edit(document_id, section.metadata.id,
-                     section.metadata.term_match_type, /*namespace_id=*/0);
-    for (std::string_view subcontent : section.content) {
-      ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer> tokenizer,
-                             tokenizer_factory::CreateIndexingTokenizer(
-                                 section.metadata.tokenizer, &lang_segmenter_));
-      ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> itr,
-                             tokenizer->Tokenize(subcontent));
-      while (itr->Advance()) {
-        if (++num_tokens > options_.max_tokens_per_document) {
-          switch (options_.token_limit_behavior) {
-            case Options::TokenLimitBehavior::kReturnError:
-              return absl_ports::ResourceExhaustedError(
-                  "Max number of tokens reached!");
-            case Options::TokenLimitBehavior::kSuppressError:
-              return libtextclassifier3::Status::OK;
-          }
-        }
-        std::string term = normalizer_.NormalizeTerm(itr->GetToken().text);
-        // Add this term to the index. Even if adding this hit fails, we keep
-        // trying to add more hits because it's possible that future hits could
-        // still be added successfully. For instance if the lexicon is full, we
-        // might fail to add a hit for a new term, but should still be able to
-        // add hits for terms that are already in the index.
-        auto status = editor.AddHit(term.c_str());
-        if (overall_status.ok() && !status.ok()) {
-          // If we've succeeded to add everything so far, set overall_status to
-          // represent this new failure. If we've already failed, no need to
-          // update the status - we're already going to return a resource
-          // exhausted error.
-          overall_status = status;
-        }
-      }
-    }
+
+  if (put_document_stats != nullptr) {
+    put_document_stats->set_index_latency_ms(
+        index_timer->GetElapsedMilliseconds());
   }
-  return overall_status;
+
+  return libtextclassifier3::Status::OK;
 }
 
 }  // namespace lib
diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index c3ccac3..9b96f00 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h
@@ -16,84 +16,49 @@
 #define ICING_INDEX_INDEX_PROCESSOR_H_
 
 #include <cstdint>
-#include <string>
+#include <memory>
+#include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/index/index.h"
-#include "icing/proto/document.pb.h"
-#include "icing/schema/schema-store.h"
-#include "icing/schema/section-manager.h"
+#include "icing/index/data-indexing-handler.h"
+#include "icing/proto/logging.pb.h"
 #include "icing/store/document-id.h"
-#include "icing/tokenization/language-segmenter.h"
-#include "icing/tokenization/token.h"
-#include "icing/transform/normalizer.h"
+#include "icing/util/tokenized-document.h"
 
 namespace icing {
 namespace lib {
 
 class IndexProcessor {
  public:
-  struct Options {
-    int32_t max_tokens_per_document;
+  explicit IndexProcessor(std::vector<std::unique_ptr<DataIndexingHandler>>&&
+                              data_indexing_handlers,
+                          const Clock* clock, bool recovery_mode = false)
+      : data_indexing_handlers_(std::move(data_indexing_handlers)),
+        clock_(*clock),
+        recovery_mode_(recovery_mode) {}
 
-    // Indicates how a document exceeding max_tokens_per_document should be
-    // handled.
-    enum class TokenLimitBehavior {
-      // When set, the first max_tokens_per_document will be indexed. If the
-      // token count exceeds max_tokens_per_document, a ResourceExhausted error
-      // will be returned.
-      kReturnError,
-      // When set, the first max_tokens_per_document will be indexed. If the
-      // token count exceeds max_tokens_per_document, OK will be returned.
-      kSuppressError,
-    };
-    TokenLimitBehavior token_limit_behavior;
-  };
-
-  // Factory function to create an IndexProcessor which does not take ownership
-  // of any input components, and all pointers must refer to valid objects that
-  // outlive the created IndexProcessor instance.
+  // Add tokenized document to the index, associated with document_id. If the
+  // number of tokens in the document exceeds max_tokens_per_document, then only
+  // the first max_tokens_per_document will be added to the index. All tokens of
+  // length exceeding max_token_length will be shortened to max_token_length.
   //
-  // Returns:
-  //   An IndexProcessor on success
-  //   FAILED_PRECONDITION if any of the pointers is null.
-  static libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>> Create(
-      const SchemaStore* schema_store, const LanguageSegmenter* lang_segmenter,
-      const Normalizer* normalizer, Index* index, const Options& options);
-
-  // Add document to the index, associated with document_id. If the number of
-  // tokens in the document exceeds max_tokens_per_document, then only the first
-  // max_tokens_per_document will be added to the index. All tokens of length
-  // exceeding max_token_length will be shortened to max_token_length.
+  // Indexing a document *may* trigger an index merge. If a merge fails, then
+  // all content in the index will be lost.
+  //
+  // If put_document_stats is present, the fields related to indexing will be
+  // populated.
   //
   // Returns:
-  //   INVALID_ARGUMENT if document_id is less than the document_id of a
-  //   previously indexed
-  //                    document or tokenization fails.
-  //   RESOURCE_EXHAUSTED if the index is full and can't add anymore content.
-  //   NOT_FOUND if there is no definition for the document's schema type.
-  //   INTERNAL_ERROR if any other errors occur
-  libtextclassifier3::Status IndexDocument(const DocumentProto& document,
-                                           DocumentId document_id);
+  //   - OK on success.
+  //   - Any DataIndexingHandler errors.
+  libtextclassifier3::Status IndexDocument(
+      const TokenizedDocument& tokenized_document, DocumentId document_id,
+      PutDocumentStatsProto* put_document_stats = nullptr);
 
  private:
-  IndexProcessor(const SchemaStore* schema_store,
-                 const LanguageSegmenter* lang_segmenter,
-                 const Normalizer* normalizer, Index* index,
-                 const Options& options)
-      : schema_store_(*schema_store),
-        lang_segmenter_(*lang_segmenter),
-        normalizer_(*normalizer),
-        index_(index),
-        options_(options) {}
-
-  std::string NormalizeToken(const Token& token);
-
-  const SchemaStore& schema_store_;
-  const LanguageSegmenter& lang_segmenter_;
-  const Normalizer& normalizer_;
-  Index* const index_;
-  const Options options_;
+  std::vector<std::unique_ptr<DataIndexingHandler>> data_indexing_handlers_;
+  const Clock& clock_;  // Does not own.
+  bool recovery_mode_;
 };
 
 }  // namespace lib
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index 00d116f..8f5e319 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -12,32 +12,50 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "testing/base/public/benchmark.h"
 #include "gmock/gmock.h"
+#include "third_party/absl/flags/flag.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/index/data-indexing-handler.h"
 #include "icing/index/index-processor.h"
 #include "icing/index/index.h"
+#include "icing/index/integer-section-indexing-handler.h"
+#include "icing/index/numeric/integer-index.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/term-indexing-handler.h"
 #include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/index/icing-filesystem.h"
 #include "icing/schema/schema-store.h"
-#include "icing/schema/schema-util.h"
-#include "icing/schema/section-manager.h"
+#include "icing/store/document-id.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
 #include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
 
 // Run on a Linux workstation:
 //    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
 //    //icing/index:index-processor_benchmark
 //
 //    $ blaze-bin/icing/index/index-processor_benchmark
-//    --benchmarks=all
+//    --benchmark_filter=all
 //
 // Run on an Android device:
 //    Make target //icing/tokenization:language-segmenter depend on
@@ -53,7 +71,8 @@
 //    $ adb push blaze-bin/icing/index/index-processor_benchmark
 //    /data/local/tmp/
 //
-//    $ adb shell /data/local/tmp/index-processor_benchmark --benchmarks=all
+//    $ adb shell /data/local/tmp/index-processor_benchmark
+//    --benchmark_filter=all
 //    --adb
 
 // Flag to tell the benchmark that it'll be run on an Android device via adb,
@@ -65,6 +84,8 @@ namespace lib {
 
 namespace {
 
+using ::testing::IsTrue;
+
 // Creates a fake type config with 10 properties (p0 - p9)
 void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) {
   type_config->set_schema_type("Fake_Type");
@@ -74,11 +95,11 @@ void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) {
     property->set_property_name(
         IcingStringUtil::StringPrintf("p%d", i));  //  p0 - p9
     property->set_data_type(PropertyConfigProto::DataType::STRING);
-    property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-    property->mutable_indexing_config()->set_term_match_type(
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    property->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::EXACT_ONLY);
-    property->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    property->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
   }
 }
 
@@ -132,10 +153,13 @@ DocumentProto CreateDocumentWithHiragana(int content_length) {
       .Build();
 }
 
-std::unique_ptr<Index> CreateIndex(const IcingFilesystem& filesystem,
+std::unique_ptr<Index> CreateIndex(const IcingFilesystem& icing_filesystem,
+                                   const Filesystem& filesystem,
                                    const std::string& index_dir) {
-  Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10);
-  return Index::Create(options, &filesystem).ValueOrDie();
+  Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/1024 * 8);
+  return Index::Create(options, &filesystem, &icing_filesystem).ValueOrDie();
 }
 
 std::unique_ptr<Normalizer> CreateNormalizer() {
@@ -145,14 +169,20 @@ std::unique_ptr<Normalizer> CreateNormalizer() {
       .ValueOrDie();
 }
 
-std::unique_ptr<SchemaStore> CreateSchemaStore() {
-  Filesystem filesystem;
+std::unique_ptr<SchemaStore> CreateSchemaStore(const Filesystem& filesystem,
+                                               const Clock* clock,
+                                               const std::string& base_dir) {
+  std::string schema_store_dir = base_dir + "/schema_store_test";
+  filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
+
   std::unique_ptr<SchemaStore> schema_store =
-      SchemaStore::Create(&filesystem, GetTestTempDir()).ValueOrDie();
+      SchemaStore::Create(&filesystem, schema_store_dir, clock).ValueOrDie();
 
   SchemaProto schema;
   CreateFakeTypeConfig(schema.add_types());
-  auto set_schema_status = schema_store->SetSchema(schema);
+  auto set_schema_status = schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false);
 
   if (!set_schema_status.ok()) {
     ICING_LOG(ERROR) << set_schema_status.status().error_message();
@@ -161,22 +191,27 @@ std::unique_ptr<SchemaStore> CreateSchemaStore() {
   return schema_store;
 }
 
-void CleanUp(const IcingFilesystem& filesystem, const std::string& index_dir) {
-  filesystem.DeleteDirectoryRecursively(index_dir.c_str());
+libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DataIndexingHandler>>>
+CreateDataIndexingHandlers(const Clock* clock, const Normalizer* normalizer,
+                           Index* index, NumericIndex<int64_t>* integer_index) {
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+      TermIndexingHandler::Create(
+          clock, normalizer, index,
+          /*build_property_existence_metadata_hits=*/true));
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<IntegerSectionIndexingHandler>
+          integer_section_indexing_handler,
+      IntegerSectionIndexingHandler::Create(clock, integer_index));
+
+  std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+  handlers.push_back(std::move(term_indexing_handler));
+  handlers.push_back(std::move(integer_section_indexing_handler));
+  return handlers;
 }
 
-std::unique_ptr<IndexProcessor> CreateIndexProcessor(
-    const SchemaStore* schema_store,
-    const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
-    Index* index) {
-  IndexProcessor::Options processor_options{};
-  processor_options.max_tokens_per_document = 1024 * 1024 * 10;
-  processor_options.token_limit_behavior =
-      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
-
-  return IndexProcessor::Create(schema_store, language_segmenter, normalizer,
-                                index, processor_options)
-      .ValueOrDie();
+void CleanUp(const Filesystem& filesystem, const std::string& base_dir) {
+  filesystem.DeleteDirectoryRecursively(base_dir.c_str());
 }
 
 void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
@@ -186,29 +221,58 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
         GetTestFilePath("icing/icu.dat")));
   }
 
-  IcingFilesystem filesystem;
-  std::string index_dir = GetTestTempDir() + "/index_test/";
-
-  CleanUp(filesystem, index_dir);
-
-  std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+  IcingFilesystem icing_filesystem;
+  Filesystem filesystem;
+  std::string base_dir = GetTestTempDir() + "/index_processor_benchmark";
+  std::string index_dir = base_dir + "/index_test/";
+  std::string integer_index_dir = base_dir + "/integer_index_test/";
+
+  CleanUp(filesystem, base_dir);
+  ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()),
+              IsTrue());
+
+  std::unique_ptr<Index> index =
+      CreateIndex(icing_filesystem, filesystem, index_dir);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      IntegerIndex::Create(filesystem, integer_index_dir,
+                           IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
+                           /*pre_mapping_fbv=*/true));
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
   std::unique_ptr<LanguageSegmenter> language_segmenter =
-      language_segmenter_factory::Create().ValueOrDie();
+      language_segmenter_factory::Create(std::move(options)).ValueOrDie();
   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
-  std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
-  std::unique_ptr<IndexProcessor> index_processor =
-      CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
-                           normalizer.get(), index.get());
+  Clock clock;
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, &clock, base_dir);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<std::unique_ptr<DataIndexingHandler>> handlers,
+      CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(),
+                                 integer_index.get()));
+  auto index_processor =
+      std::make_unique<IndexProcessor>(std::move(handlers), &clock);
 
   DocumentProto input_document = CreateDocumentWithOneProperty(state.range(0));
+  TokenizedDocument tokenized_document(std::move(
+      TokenizedDocument::Create(schema_store.get(), language_segmenter.get(),
+                                input_document)
+          .ValueOrDie()));
 
   DocumentId document_id = 0;
   for (auto _ : state) {
     ICING_ASSERT_OK(
-        index_processor->IndexDocument(input_document, document_id++));
+        index_processor->IndexDocument(tokenized_document, document_id++));
   }
 
-  CleanUp(filesystem, index_dir);
+  index_processor.reset();
+  schema_store.reset();
+  normalizer.reset();
+  language_segmenter.reset();
+  integer_index.reset();
+  index.reset();
+
+  CleanUp(filesystem, base_dir);
 }
 BENCHMARK(BM_IndexDocumentWithOneProperty)
     ->Arg(1000)
@@ -233,30 +297,59 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) {
         GetTestFilePath("icing/icu.dat")));
   }
 
-  IcingFilesystem filesystem;
-  std::string index_dir = GetTestTempDir() + "/index_test/";
-
-  CleanUp(filesystem, index_dir);
-
-  std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+  IcingFilesystem icing_filesystem;
+  Filesystem filesystem;
+  std::string base_dir = GetTestTempDir() + "/index_processor_benchmark";
+  std::string index_dir = base_dir + "/index_test/";
+  std::string integer_index_dir = base_dir + "/integer_index_test/";
+
+  CleanUp(filesystem, base_dir);
+  ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()),
+              IsTrue());
+
+  std::unique_ptr<Index> index =
+      CreateIndex(icing_filesystem, filesystem, index_dir);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      IntegerIndex::Create(filesystem, integer_index_dir,
+                           IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
+                           /*pre_mapping_fbv=*/true));
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
   std::unique_ptr<LanguageSegmenter> language_segmenter =
-      language_segmenter_factory::Create().ValueOrDie();
+      language_segmenter_factory::Create(std::move(options)).ValueOrDie();
   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
-  std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
-  std::unique_ptr<IndexProcessor> index_processor =
-      CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
-                           normalizer.get(), index.get());
+  Clock clock;
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, &clock, base_dir);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<std::unique_ptr<DataIndexingHandler>> handlers,
+      CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(),
+                                 integer_index.get()));
+  auto index_processor =
+      std::make_unique<IndexProcessor>(std::move(handlers), &clock);
 
   DocumentProto input_document =
       CreateDocumentWithTenProperties(state.range(0));
+  TokenizedDocument tokenized_document(std::move(
+      TokenizedDocument::Create(schema_store.get(), language_segmenter.get(),
+                                input_document)
+          .ValueOrDie()));
 
   DocumentId document_id = 0;
   for (auto _ : state) {
     ICING_ASSERT_OK(
-        index_processor->IndexDocument(input_document, document_id++));
+        index_processor->IndexDocument(tokenized_document, document_id++));
   }
 
-  CleanUp(filesystem, index_dir);
+  index_processor.reset();
+  schema_store.reset();
+  normalizer.reset();
+  language_segmenter.reset();
+  integer_index.reset();
+  index.reset();
+
+  CleanUp(filesystem, base_dir);
 }
 BENCHMARK(BM_IndexDocumentWithTenProperties)
     ->Arg(1000)
@@ -281,30 +374,59 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) {
         GetTestFilePath("icing/icu.dat")));
   }
 
-  IcingFilesystem filesystem;
-  std::string index_dir = GetTestTempDir() + "/index_test/";
-
-  CleanUp(filesystem, index_dir);
-
-  std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+  IcingFilesystem icing_filesystem;
+  Filesystem filesystem;
+  std::string base_dir = GetTestTempDir() + "/index_processor_benchmark";
+  std::string index_dir = base_dir + "/index_test/";
+  std::string integer_index_dir = base_dir + "/integer_index_test/";
+
+  CleanUp(filesystem, base_dir);
+  ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()),
+              IsTrue());
+
+  std::unique_ptr<Index> index =
+      CreateIndex(icing_filesystem, filesystem, index_dir);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      IntegerIndex::Create(filesystem, integer_index_dir,
+                           IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
+                           /*pre_mapping_fbv=*/true));
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
   std::unique_ptr<LanguageSegmenter> language_segmenter =
-      language_segmenter_factory::Create().ValueOrDie();
+      language_segmenter_factory::Create(std::move(options)).ValueOrDie();
   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
-  std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
-  std::unique_ptr<IndexProcessor> index_processor =
-      CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
-                           normalizer.get(), index.get());
+  Clock clock;
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, &clock, base_dir);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<std::unique_ptr<DataIndexingHandler>> handlers,
+      CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(),
+                                 integer_index.get()));
+  auto index_processor =
+      std::make_unique<IndexProcessor>(std::move(handlers), &clock);
 
   DocumentProto input_document =
       CreateDocumentWithDiacriticLetters(state.range(0));
+  TokenizedDocument tokenized_document(std::move(
+      TokenizedDocument::Create(schema_store.get(), language_segmenter.get(),
+                                input_document)
+          .ValueOrDie()));
 
   DocumentId document_id = 0;
   for (auto _ : state) {
     ICING_ASSERT_OK(
-        index_processor->IndexDocument(input_document, document_id++));
+        index_processor->IndexDocument(tokenized_document, document_id++));
   }
 
-  CleanUp(filesystem, index_dir);
+  index_processor.reset();
+  schema_store.reset();
+  normalizer.reset();
+  language_segmenter.reset();
+  integer_index.reset();
+  index.reset();
+
+  CleanUp(filesystem, base_dir);
 }
 BENCHMARK(BM_IndexDocumentWithDiacriticLetters)
     ->Arg(1000)
@@ -329,29 +451,58 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) {
         GetTestFilePath("icing/icu.dat")));
   }
 
-  IcingFilesystem filesystem;
-  std::string index_dir = GetTestTempDir() + "/index_test/";
-
-  CleanUp(filesystem, index_dir);
-
-  std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+  IcingFilesystem icing_filesystem;
+  Filesystem filesystem;
+  std::string base_dir = GetTestTempDir() + "/index_processor_benchmark";
+  std::string index_dir = base_dir + "/index_test/";
+  std::string integer_index_dir = base_dir + "/integer_index_test/";
+
+  CleanUp(filesystem, base_dir);
+  ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()),
+              IsTrue());
+
+  std::unique_ptr<Index> index =
+      CreateIndex(icing_filesystem, filesystem, index_dir);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      IntegerIndex::Create(filesystem, integer_index_dir,
+                           IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
+                           /*pre_mapping_fbv=*/true));
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
   std::unique_ptr<LanguageSegmenter> language_segmenter =
-      language_segmenter_factory::Create().ValueOrDie();
+      language_segmenter_factory::Create(std::move(options)).ValueOrDie();
   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
-  std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
-  std::unique_ptr<IndexProcessor> index_processor =
-      CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
-                           normalizer.get(), index.get());
+  Clock clock;
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, &clock, base_dir);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<std::unique_ptr<DataIndexingHandler>> handlers,
+      CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(),
+                                 integer_index.get()));
+  auto index_processor =
+      std::make_unique<IndexProcessor>(std::move(handlers), &clock);
 
   DocumentProto input_document = CreateDocumentWithHiragana(state.range(0));
+  TokenizedDocument tokenized_document(std::move(
+      TokenizedDocument::Create(schema_store.get(), language_segmenter.get(),
+                                input_document)
+          .ValueOrDie()));
 
   DocumentId document_id = 0;
   for (auto _ : state) {
     ICING_ASSERT_OK(
-        index_processor->IndexDocument(input_document, document_id++));
+        index_processor->IndexDocument(tokenized_document, document_id++));
   }
 
-  CleanUp(filesystem, index_dir);
+  index_processor.reset();
+  schema_store.reset();
+  normalizer.reset();
+  language_segmenter.reset();
+  integer_index.reset();
+  index.reset();
+
+  CleanUp(filesystem, base_dir);
 }
 BENCHMARK(BM_IndexDocumentWithHiragana)
     ->Arg(1000)
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index 8dfb9c2..3d1be68 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -19,162 +19,341 @@
 #include <memory>
 #include <string>
 #include <string_view>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
+#include "icing/text_classifier/lib3/utils/base/status.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/index/data-indexing-handler.h"
 #include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
 #include "icing/index/index.h"
+#include "icing/index/integer-section-indexing-handler.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/integer-index.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/term-indexing-handler.h"
+#include "icing/index/term-property-id.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
 #include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
-#include "icing/schema/schema-util.h"
-#include "icing/schema/section-manager.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/random-string.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
+#include "icing/util/crc32.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
 
 namespace icing {
 namespace lib {
 
 namespace {
 
-// type and property names of FakeType
+constexpr std::string_view kIpsumText =
+    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis "
+    "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida "
+    "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam "
+    "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo "
+    "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, "
+    "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula "
+    "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et "
+    "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, "
+    "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis "
+    "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. "
+    "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. "
+    "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur "
+    "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
+    "placerat semper.";
+
+// schema types
 constexpr std::string_view kFakeType = "FakeType";
+constexpr std::string_view kNestedType = "NestedType";
+
+// Indexable properties and section Id. Section Id is determined by the
+// lexicographical order of indexable property path.
 constexpr std::string_view kExactProperty = "exact";
+constexpr std::string_view kIndexableIntegerProperty = "indexableInteger";
 constexpr std::string_view kPrefixedProperty = "prefixed";
+constexpr std::string_view kRepeatedProperty = "repeated";
+constexpr std::string_view kRfc822Property = "rfc822";
+constexpr std::string_view kSubProperty = "submessage";  // submessage.nested
+constexpr std::string_view kNestedProperty = "nested";   // submessage.nested
+// TODO (b/246964044): remove ifdef guard when url-tokenizer is ready for export
+// to Android.
+#ifdef ENABLE_URL_TOKENIZER
+constexpr std::string_view kUrlExactProperty = "urlExact";
+constexpr std::string_view kUrlPrefixedProperty = "urlPrefixed";
+#endif  // ENABLE_URL_TOKENIZER
+constexpr std::string_view kVerbatimExactProperty = "verbatimExact";
+constexpr std::string_view kVerbatimPrefixedProperty = "verbatimPrefixed";
+
+constexpr SectionId kExactSectionId = 0;
+constexpr SectionId kIndexableIntegerSectionId = 1;
+constexpr SectionId kPrefixedSectionId = 2;
+constexpr SectionId kRepeatedSectionId = 3;
+constexpr SectionId kRfc822SectionId = 4;
+constexpr SectionId kNestedSectionId = 5;  // submessage.nested
+#ifdef ENABLE_URL_TOKENIZER
+constexpr SectionId kUrlExactSectionId = 6;
+constexpr SectionId kUrlPrefixedSectionId = 7;
+constexpr SectionId kVerbatimExactSectionId = 8;
+constexpr SectionId kVerbatimPrefixedSectionId = 9;
+#else   // !ENABLE_URL_TOKENIZER
+constexpr SectionId kVerbatimExactSectionId = 6;
+constexpr SectionId kVerbatimPrefixedSectionId = 7;
+#endif  // ENABLE_URL_TOKENIZER
+
+// Other non-indexable properties.
 constexpr std::string_view kUnindexedProperty1 = "unindexed1";
 constexpr std::string_view kUnindexedProperty2 = "unindexed2";
-constexpr std::string_view kSubProperty = "submessage";
-constexpr std::string_view kNestedProperty = "nested";
-constexpr std::string_view kRepeatedProperty = "repeated";
 
 constexpr DocumentId kDocumentId0 = 0;
 constexpr DocumentId kDocumentId1 = 1;
 
-constexpr SectionId kExactSectionId = 0;
-constexpr SectionId kPrefixedSectionId = 1;
-constexpr SectionId kRepeatedSectionId = 2;
-constexpr SectionId kNestedSectionId = 3;
-
 using Cardinality = PropertyConfigProto::Cardinality;
 using DataType = PropertyConfigProto::DataType;
 using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::IsEmpty;
+using ::testing::IsTrue;
+using ::testing::SizeIs;
 using ::testing::Test;
 
+#ifdef ENABLE_URL_TOKENIZER
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_URL =
+    StringIndexingConfig::TokenizerType::URL;
+#endif  // ENABLE_URL_TOKENIZER
+
 class IndexProcessorTest : public Test {
  protected:
   void SetUp() override {
-    ICING_ASSERT_OK(
-        // File generated via icu_data_file rule in //icing/BUILD.
-        icu_data_file_helper::SetUpICUDataFile(
-            GetTestFilePath("icing/icu.dat")));
-
-    index_dir_ = GetTestTempDir() + "/index_test/";
-    Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
-    ICING_ASSERT_OK_AND_ASSIGN(index_,
-                               Index::Create(options, &icing_filesystem_));
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    base_dir_ = GetTestTempDir() + "/index_processor_test";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    index_dir_ = base_dir_ + "/index";
+    integer_index_dir_ = base_dir_ + "/integer_index";
+    qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index";
+    schema_store_dir_ = base_dir_ + "/schema_store";
+    doc_store_dir_ = base_dir_ + "/doc_store";
+
+    Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/1024 * 8);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        index_, Index::Create(options, &filesystem_, &icing_filesystem_));
 
-    ICING_ASSERT_OK_AND_ASSIGN(lang_segmenter_,
-                               language_segmenter_factory::Create());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        integer_index_,
+        IntegerIndex::Create(
+            filesystem_, integer_index_dir_,
+            IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
+            /*pre_mapping_fbv=*/false));
+
+    ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+                               QualifiedIdJoinIndexImplV1::Create(
+                                   filesystem_, qualified_id_join_index_dir_,
+                                   /*pre_mapping_fbv=*/false,
+                                   /*use_persistent_hash_map=*/false));
+
+    language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        lang_segmenter_,
+        language_segmenter_factory::Create(std::move(segmenter_options)));
 
     ICING_ASSERT_OK_AND_ASSIGN(
         normalizer_,
         normalizer_factory::Create(
-
             /*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
 
+    ASSERT_TRUE(
+        filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()));
     ICING_ASSERT_OK_AND_ASSIGN(
-        schema_store_, SchemaStore::Create(&filesystem_, GetTestTempDir()));
-    SchemaProto schema;
-    CreateFakeTypeConfig(schema.add_types());
-    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
-
-    IndexProcessor::Options processor_options;
-    processor_options.max_tokens_per_document = 1000;
-    processor_options.token_limit_behavior =
-        IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kFakeType)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kExactProperty)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPrefixedProperty)
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kUnindexedProperty1)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kUnindexedProperty2)
+                                     .SetDataType(TYPE_BYTES)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kRepeatedProperty)
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_REPEATED))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kVerbatimExactProperty)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_VERBATIM)
+                                     .SetCardinality(CARDINALITY_REPEATED))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kVerbatimPrefixedProperty)
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_VERBATIM)
+                                     .SetCardinality(CARDINALITY_REPEATED))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kRfc822Property)
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_RFC822)
+                                     .SetCardinality(CARDINALITY_REPEATED))
+#ifdef ENABLE_URL_TOKENIZER
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kUrlExactProperty)
+                            .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_URL)
+                            .SetCardinality(CARDINALITY_REPEATED))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kUrlPrefixedProperty)
+                            .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_URL)
+                            .SetCardinality(CARDINALITY_REPEATED))
+#endif  // ENABLE_URL_TOKENIZER
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kIndexableIntegerProperty)
+                                     .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                     .SetCardinality(CARDINALITY_REPEATED))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kSubProperty)
+                            .SetDataTypeDocument(
+                                kNestedType, /*index_nested_properties=*/true)
+                            .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kNestedType)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kNestedProperty)
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    doc_store_ = std::move(create_result.document_store);
 
     ICING_ASSERT_OK_AND_ASSIGN(
-        index_processor_,
-        IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                               normalizer_.get(), index_.get(),
-                               processor_options));
+        std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+        TermIndexingHandler::Create(
+            &fake_clock_, normalizer_.get(), index_.get(),
+            /*build_property_existence_metadata_hits=*/true));
+    ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
+                                   integer_section_indexing_handler,
+                               IntegerSectionIndexingHandler::Create(
+                                   &fake_clock_, integer_index_.get()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexingHandler>
+            qualified_id_join_indexing_handler,
+        QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                               qualified_id_join_index_.get()));
+    std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+    handlers.push_back(std::move(term_indexing_handler));
+    handlers.push_back(std::move(integer_section_indexing_handler));
+    handlers.push_back(std::move(qualified_id_join_indexing_handler));
+
+    index_processor_ =
+        std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
+
+    mock_icing_filesystem_ = std::make_unique<IcingMockFilesystem>();
   }
 
   void TearDown() override {
-    filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
-  }
-
-  std::unique_ptr<IndexProcessor> index_processor_;
-  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
-  std::unique_ptr<Normalizer> normalizer_;
-  std::unique_ptr<Index> index_;
-  std::unique_ptr<SchemaStore> schema_store_;
-
- private:
-  static void AddProperty(std::string_view name, DataType::Code type,
-                          Cardinality::Code cardinality,
-                          TermMatchType::Code term_match_type,
-                          SchemaTypeConfigProto* type_config) {
-    auto* prop = type_config->add_properties();
-    prop->set_property_name(std::string(name));
-    prop->set_data_type(type);
-    prop->set_cardinality(cardinality);
-    prop->mutable_indexing_config()->set_term_match_type(term_match_type);
-    prop->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    index_processor_.reset();
+    doc_store_.reset();
+    schema_store_.reset();
+    normalizer_.reset();
+    lang_segmenter_.reset();
+    qualified_id_join_index_.reset();
+    integer_index_.reset();
+    index_.reset();
+
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
   }
 
-  static void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) {
-    type_config->set_schema_type(std::string(kFakeType));
-
-    AddProperty(std::string(kExactProperty), DataType::STRING,
-                Cardinality::REQUIRED, TermMatchType::EXACT_ONLY, type_config);
-
-    AddProperty(std::string(kPrefixedProperty), DataType::STRING,
-                Cardinality::OPTIONAL, TermMatchType::PREFIX, type_config);
-
-    // Don't set IndexingConfig
-    auto* prop = type_config->add_properties();
-    prop->set_property_name(std::string(kUnindexedProperty1));
-    prop->set_data_type(DataType::STRING);
-    prop->set_cardinality(Cardinality::OPTIONAL);
-
-    AddProperty(std::string(kUnindexedProperty2), DataType::BYTES,
-                Cardinality::OPTIONAL, TermMatchType::UNKNOWN, type_config);
-
-    AddProperty(std::string(kRepeatedProperty), DataType::STRING,
-                Cardinality::REPEATED, TermMatchType::PREFIX, type_config);
-
-    AddProperty(kSubProperty, DataType::DOCUMENT, Cardinality::OPTIONAL,
-                TermMatchType::UNKNOWN, type_config);
-
-    std::string recipients_name =
-        absl_ports::StrCat(kSubProperty, kPropertySeparator, kNestedProperty);
-    AddProperty(recipients_name, DataType::STRING, Cardinality::OPTIONAL,
-                TermMatchType::PREFIX, type_config);
-  }
+  std::unique_ptr<IcingMockFilesystem> mock_icing_filesystem_;
 
   Filesystem filesystem_;
   IcingFilesystem icing_filesystem_;
+  FakeClock fake_clock_;
+  std::string base_dir_;
   std::string index_dir_;
+  std::string integer_index_dir_;
+  std::string qualified_id_join_index_dir_;
+  std::string schema_store_dir_;
+  std::string doc_store_dir_;
+
+  std::unique_ptr<Index> index_;
+  std::unique_ptr<NumericIndex<int64_t>> integer_index_;
+  std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index_;
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+  std::unique_ptr<Normalizer> normalizer_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> doc_store_;
+
+  std::unique_ptr<IndexProcessor> index_processor_;
 };
 
 std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
@@ -185,31 +364,18 @@ std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
   return infos;
 }
 
-TEST_F(IndexProcessorTest, CreationWithNullPointerShouldFail) {
-  IndexProcessor::Options processor_options;
-  processor_options.max_tokens_per_document = 1000;
-  processor_options.token_limit_behavior =
-      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
-
-  EXPECT_THAT(IndexProcessor::Create(/*schema_store=*/nullptr,
-                                     lang_segmenter_.get(), normalizer_.get(),
-                                     index_.get(), processor_options),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-
-  EXPECT_THAT(IndexProcessor::Create(
-                  schema_store_.get(), /*lang_segmenter=*/nullptr,
-                  normalizer_.get(), index_.get(), processor_options),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-
-  EXPECT_THAT(IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                                     /*normalizer=*/nullptr, index_.get(),
-                                     processor_options),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-
-  EXPECT_THAT(IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                                     normalizer_.get(), /*index=*/nullptr,
-                                     processor_options),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+std::vector<DocHitInfoTermFrequencyPair> GetHitsWithTermFrequency(
+    std::unique_ptr<DocHitInfoIterator> iterator) {
+  std::vector<DocHitInfoTermFrequencyPair> infos;
+  while (iterator->Advance().ok()) {
+    std::vector<TermMatchInfo> matched_terms_stats;
+    iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+    for (const TermMatchInfo& term_match_info : matched_terms_stats) {
+      infos.push_back(DocHitInfoTermFrequencyPair(
+          iterator->doc_hit_info(), term_match_info.term_frequencies));
+    }
+  }
+  return infos;
 }
 
 TEST_F(IndexProcessorTest, NoTermMatchTypeContent) {
@@ -221,8 +387,29 @@ TEST_F(IndexProcessorTest, NoTermMatchTypeContent) {
           .AddBytesProperty(std::string(kUnindexedProperty2),
                             "attachment bytes")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexProcessorTest, NoValidContent) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "?...!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexProcessorTest, OneDoc) {
@@ -232,19 +419,30 @@ TEST_F(IndexProcessorTest, OneDoc) {
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kExactProperty), "hello world")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
-                             index_->GetIterator("hello", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
-  EXPECT_THAT(GetHits(std::move(itr)),
-              ElementsAre(EqualsDocHitInfo(
-                  kDocumentId0, std::vector<SectionId>{kExactSectionId})));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("hello", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  std::vector<DocHitInfoTermFrequencyPair> hits =
+      GetHitsWithTermFrequency(std::move(itr));
+  std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
+      {kExactSectionId, 1}};
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expectedMap)));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      itr, index_->GetIterator("hello", 1U << kPrefixedSectionId,
-                               TermMatchType::EXACT_ONLY));
+      itr, index_->GetIterator(
+               "hello", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+               1U << kPrefixedSectionId, TermMatchType::EXACT_ONLY));
   EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
 }
 
@@ -256,35 +454,69 @@ TEST_F(IndexProcessorTest, MultipleDocs) {
           .AddStringProperty(std::string(kExactProperty), "hello world")
           .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
+  std::string coffeeRepeatedString = "coffee";
+  for (int i = 0; i < Hit::kMaxTermFrequency + 1; i++) {
+    coffeeRepeatedString += " coffee";
+  }
+
   document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/2")
           .SetSchema(std::string(kFakeType))
-          .AddStringProperty(std::string(kExactProperty), "pitbull")
-          .AddStringProperty(std::string(kPrefixedProperty), "mr. world wide")
+          .AddStringProperty(std::string(kExactProperty), coffeeRepeatedString)
+          .AddStringProperty(std::string(kPrefixedProperty),
+                             "mr. world world wide")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId1),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
-                             index_->GetIterator("world", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("world", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  std::vector<DocHitInfoTermFrequencyPair> hits =
+      GetHitsWithTermFrequency(std::move(itr));
+  std::unordered_map<SectionId, Hit::TermFrequency> expectedMap1{
+      {kPrefixedSectionId, 2}};
+  std::unordered_map<SectionId, Hit::TermFrequency> expectedMap2{
+      {kExactSectionId, 1}};
   EXPECT_THAT(
-      GetHits(std::move(itr)),
-      ElementsAre(EqualsDocHitInfo(kDocumentId1,
-                                   std::vector<SectionId>{kPrefixedSectionId}),
-                  EqualsDocHitInfo(kDocumentId0,
-                                   std::vector<SectionId>{kExactSectionId})));
+      hits, ElementsAre(
+                EqualsDocHitInfoWithTermFrequency(kDocumentId1, expectedMap1),
+                EqualsDocHitInfoWithTermFrequency(kDocumentId0, expectedMap2)));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      itr, index_->GetIterator("world", 1U << kPrefixedSectionId,
-                               TermMatchType::EXACT_ONLY));
-  EXPECT_THAT(GetHits(std::move(itr)),
-              ElementsAre(EqualsDocHitInfo(
-                  kDocumentId1, std::vector<SectionId>{kPrefixedSectionId})));
+      itr, index_->GetIterator(
+               "world", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+               1U << kPrefixedSectionId, TermMatchType::EXACT_ONLY));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
+      {kPrefixedSectionId, 2}};
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId1, expectedMap)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("coffee", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  expectedMap = {{kExactSectionId, Hit::kMaxTermFrequency}};
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId1, expectedMap)));
 }
 
 TEST_F(IndexProcessorTest, DocWithNestedProperty) {
@@ -296,16 +528,25 @@ TEST_F(IndexProcessorTest, DocWithNestedProperty) {
           .AddDocumentProperty(
               std::string(kSubProperty),
               DocumentBuilder()
+                  .SetKey("icing", "nested_type/1")
+                  .SetSchema(std::string(kNestedType))
                   .AddStringProperty(std::string(kNestedProperty),
                                      "rocky raccoon")
                   .Build())
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
-                             index_->GetIterator("rocky", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("rocky", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kNestedSectionId})));
@@ -320,108 +561,92 @@ TEST_F(IndexProcessorTest, DocWithRepeatedProperty) {
           .AddStringProperty(std::string(kRepeatedProperty), "rocky",
                              "italian stallion")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
-                             index_->GetIterator("italian", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("italian", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kRepeatedSectionId})));
 }
 
-TEST_F(IndexProcessorTest, TooManyTokensReturnError) {
-  // Only allow the first four tokens ("hello", "world", "good", "night") to be
-  // indexed.
-  IndexProcessor::Options options;
-  options.max_tokens_per_document = 4;
-  options.token_limit_behavior =
-      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+// TODO(b/196771754) This test is disabled on Android because it takes too long
+// to generate all of the unique terms and the test times out. Try storing these
+// unique terms in a file that the test can read from.
+#ifndef __ANDROID__
 
-  ICING_ASSERT_OK_AND_ASSIGN(
-      index_processor_,
-      IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                             normalizer_.get(), index_.get(), options));
+TEST_F(IndexProcessorTest, HitBufferExhaustedTest) {
+  // Testing has shown that adding ~600,000 hits will fill up the hit buffer.
+  std::vector<std::string> unique_terms_ = GenerateUniqueTerms(200000);
+  std::string content = absl_ports::StrJoin(unique_terms_, " ");
 
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/1")
           .SetSchema(std::string(kFakeType))
-          .AddStringProperty(std::string(kExactProperty), "hello world")
-          .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
+          .AddStringProperty(std::string(kExactProperty), content)
+          .AddStringProperty(std::string(kPrefixedProperty), content)
+          .AddStringProperty(std::string(kRepeatedProperty), content)
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0),
-              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED,
+                       testing::HasSubstr("Hit buffer is full!")));
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
-
-  // "night" should have been indexed.
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
-                             index_->GetIterator("night", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
-  EXPECT_THAT(GetHits(std::move(itr)),
-              ElementsAre(EqualsDocHitInfo(
-                  kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
-
-  // "moon" should not have been.
-  ICING_ASSERT_OK_AND_ASSIGN(itr,
-                             index_->GetIterator("moon", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
-  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
 }
 
-TEST_F(IndexProcessorTest, TooManyTokensSuppressError) {
-  // Only allow the first four tokens ("hello", "world", "good", "night") to be
-  // indexed.
-  IndexProcessor::Options options;
-  options.max_tokens_per_document = 4;
-  options.token_limit_behavior =
-      IndexProcessor::Options::TokenLimitBehavior::kSuppressError;
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      index_processor_,
-      IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                             normalizer_.get(), index_.get(), options));
+TEST_F(IndexProcessorTest, LexiconExhaustedTest) {
+  // Testing has shown that adding ~300,000 terms generated this way will
+  // fill up the lexicon.
+  std::vector<std::string> unique_terms_ = GenerateUniqueTerms(300000);
+  std::string content = absl_ports::StrJoin(unique_terms_, " ");
 
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/1")
           .SetSchema(std::string(kFakeType))
-          .AddStringProperty(std::string(kExactProperty), "hello world")
-          .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
+          .AddStringProperty(std::string(kExactProperty), content)
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
-
-  // "night" should have been indexed.
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
-                             index_->GetIterator("night", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
-  EXPECT_THAT(GetHits(std::move(itr)),
-              ElementsAre(EqualsDocHitInfo(
-                  kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
-
-  // "moon" should not have been.
-  ICING_ASSERT_OK_AND_ASSIGN(itr,
-                             index_->GetIterator("moon", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
-  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
 }
 
+#endif  // __ANDROID__
+
 TEST_F(IndexProcessorTest, TooLongTokens) {
   // Only allow the tokens of length four, truncating "hello", "world" and
   // "night".
-  IndexProcessor::Options options;
-  options.max_tokens_per_document = 1000;
-
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Normalizer> normalizer,
                              normalizer_factory::Create(
                                  /*max_term_byte_size=*/4));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      index_processor_,
-      IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                             normalizer.get(), index_.get(), options));
+      std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+      TermIndexingHandler::Create(
+          &fake_clock_, normalizer.get(), index_.get(),
+          /*build_property_existence_metadata_hits=*/true));
+  std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+  handlers.push_back(std::move(term_indexing_handler));
+
+  index_processor_ =
+      std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
 
   DocumentProto document =
       DocumentBuilder()
@@ -430,27 +655,36 @@ TEST_F(IndexProcessorTest, TooLongTokens) {
           .AddStringProperty(std::string(kExactProperty), "hello world")
           .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
   // "good" should have been indexed normally.
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
-                             index_->GetIterator("good", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("good", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
 
   // "night" should not have been.
-  ICING_ASSERT_OK_AND_ASSIGN(itr,
-                             index_->GetIterator("night", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("night", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
   EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
 
   // "night" should have been truncated to "nigh".
-  ICING_ASSERT_OK_AND_ASSIGN(itr,
-                             index_->GetIterator("nigh", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("nigh", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
@@ -463,7 +697,12 @@ TEST_F(IndexProcessorTest, NonPrefixedContentPrefixQuery) {
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kExactProperty), "best rocky movies")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
   document =
@@ -472,13 +711,20 @@ TEST_F(IndexProcessorTest, NonPrefixedContentPrefixQuery) {
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kPrefixedProperty), "rocky raccoon")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId1),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 
   // Only document_id 1 should surface in a prefix query for "Rock"
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("rock", kSectionIdMaskAll, TermMatchType::PREFIX));
+      index_->GetIterator("rock", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId1, std::vector<SectionId>{kPrefixedSectionId})));
@@ -491,7 +737,12 @@ TEST_F(IndexProcessorTest, TokenNormalization) {
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kExactProperty), "ALL UPPER CASE")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
   document =
@@ -500,12 +751,19 @@ TEST_F(IndexProcessorTest, TokenNormalization) {
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kExactProperty), "all lower case")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId1),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
-                             index_->GetIterator("case", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("case", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
   EXPECT_THAT(
       GetHits(std::move(itr)),
       ElementsAre(EqualsDocHitInfo(kDocumentId1,
@@ -520,29 +778,138 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIds) {
           .SetKey("icing", "fake_type/1")
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kExactProperty), "ALL UPPER CASE")
+          .AddInt64Property(std::string(kIndexableIntegerProperty), 123)
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId1),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 
-  // Indexing a document with document_id < last_added_document_id should cause
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t index_element_size,
+                             index_->GetElementsSize());
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 integer_index_crc,
+                             integer_index_->UpdateChecksums());
+
+  // Indexing a document with document_id <= last_added_document_id should cause
   // a failure.
   document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/2")
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kExactProperty), "all lower case")
+          .AddInt64Property(std::string(kIndexableIntegerProperty), 456)
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0),
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  // Verify that both index_ and integer_index_ are unchanged.
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+  EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(index_element_size));
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kDocumentId1));
+  EXPECT_THAT(integer_index_->UpdateChecksums(),
+              IsOkAndHolds(integer_index_crc));
 
   // As should indexing a document document_id == last_added_document_id.
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0),
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId1),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  // Verify that both index_ and integer_index_ are unchanged.
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+  EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(index_element_size));
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kDocumentId1));
+  EXPECT_THAT(integer_index_->UpdateChecksums(),
+              IsOkAndHolds(integer_index_crc));
+}
 
+TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+      TermIndexingHandler::Create(
+          &fake_clock_, normalizer_.get(), index_.get(),
+          /*build_property_existence_metadata_hits=*/true));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
+                                 integer_section_indexing_handler,
+                             IntegerSectionIndexingHandler::Create(
+                                 &fake_clock_, integer_index_.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler>
+          qualified_id_join_indexing_handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+  handlers.push_back(std::move(term_indexing_handler));
+  handlers.push_back(std::move(integer_section_indexing_handler));
+  handlers.push_back(std::move(qualified_id_join_indexing_handler));
+
+  IndexProcessor index_processor(std::move(handlers), &fake_clock_,
+                                 /*recovery_mode=*/true);
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "ALL UPPER CASE")
+          .AddInt64Property(std::string(kIndexableIntegerProperty), 123)
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor.IndexDocument(tokenized_document, kDocumentId1),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t index_element_size,
+                             index_->GetElementsSize());
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 integer_index_crc,
+                             integer_index_->UpdateChecksums());
+
+  // Indexing a document with document_id <= last_added_document_id in recovery
+  // mode should not get any error, but IndexProcessor should still ignore it
+  // and index data should remain unchanged.
+  document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/2")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "all lower case")
+          .AddInt64Property(std::string(kIndexableIntegerProperty), 456)
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor.IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  // Verify that both index_ and integer_index_ are unchanged.
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+  EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(index_element_size));
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kDocumentId1));
+  EXPECT_THAT(integer_index_->UpdateChecksums(),
+              IsOkAndHolds(integer_index_crc));
+
+  // As should indexing a document document_id == last_added_document_id.
+  EXPECT_THAT(index_processor.IndexDocument(tokenized_document, kDocumentId1),
+              IsOk());
+  // Verify that both index_ and integer_index_ are unchanged.
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+  EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(index_element_size));
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kDocumentId1));
+  EXPECT_THAT(integer_index_->UpdateChecksums(),
+              IsOkAndHolds(integer_index_crc));
 }
 
 TEST_F(IndexProcessorTest, NonAsciiIndexing) {
+  language_segmenter_factory::SegmenterOptions segmenter_options(
+      ULOC_SIMPLIFIED_CHINESE);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      lang_segmenter_,
+      language_segmenter_factory::Create(std::move(segmenter_options)));
+
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/1")
@@ -550,12 +917,19 @@ TEST_F(IndexProcessorTest, NonAsciiIndexing) {
           .AddStringProperty(std::string(kExactProperty),
                              "你好，世界！你好：世界。“你好”世界？")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
-                             index_->GetIterator("你好", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("你好", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kExactSectionId})));
@@ -563,24 +937,13 @@ TEST_F(IndexProcessorTest, NonAsciiIndexing) {
 
 TEST_F(IndexProcessorTest,
        LexiconFullIndexesSmallerTokensReturnsResourceExhausted) {
-  IndexProcessor::Options processor_options;
-  processor_options.max_tokens_per_document = 1000;
-  processor_options.token_limit_behavior =
-      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      index_processor_,
-      IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                             normalizer_.get(), index_.get(),
-                             processor_options));
-
   // This is the maximum token length that an empty lexicon constructed for a
   // lite index with merge size of 1MiB can support.
   constexpr int kMaxTokenLength = 16777217;
   // Create a string "ppppppp..." with a length that is too large to fit into
   // the lexicon.
   std::string enormous_string(kMaxTokenLength + 1, 'p');
-  DocumentProto document =
+  DocumentProto document_one =
       DocumentBuilder()
           .SetKey("icing", "fake_type/1")
           .SetSchema(std::string(kFakeType))
@@ -588,23 +951,656 @@ TEST_F(IndexProcessorTest,
                              absl_ports::StrCat(enormous_string, " foo"))
           .AddStringProperty(std::string(kPrefixedProperty), "bar baz")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0),
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document_one));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexProcessorTest, IndexingDocAutomaticMerge) {
+  // Create the index with a smaller index_merge_size - merging every time we
+  // add 101 documents. This will result in a small LiteIndex, which will be
+  // easier to fill up. The LiteIndex itself will have a size larger than the
+  // index_merge_size because it adds extra buffer to ensure that it always has
+  // room to fit whatever document will trigger the merge.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), kIpsumText)
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  Index::Options options(index_dir_,
+                         /*index_merge_size=*/document.ByteSizeLong() * 100,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/64);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+      TermIndexingHandler::Create(
+          &fake_clock_, normalizer_.get(), index_.get(),
+          /*build_property_existence_metadata_hits=*/true));
+  std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+  handlers.push_back(std::move(term_indexing_handler));
+
+  index_processor_ =
+      std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
+
+  DocumentId doc_id = 0;
+  // Have determined experimentally that indexing 3373 documents with this text
+  // will cause the LiteIndex to fill up. Further indexing will fail unless the
+  // index processor properly merges the LiteIndex into the MainIndex and
+  // empties the LiteIndex.
+  constexpr int kNumDocsLiteIndexExhaustion = 3373;
+  for (; doc_id < kNumDocsLiteIndexExhaustion; ++doc_id) {
+    EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, doc_id),
+                IsOk());
+    EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
+  }
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, doc_id),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
+}
+
+TEST_F(IndexProcessorTest, IndexingDocMergeFailureResets) {
+  // 1. Setup a mock filesystem to fail to grow the main index.
+  auto open_write_lambda = [this](const char* filename) {
+    std::string main_lexicon_suffix =
+        "/main-lexicon.prop." +
+        std::to_string(GetHasHitsInPrefixSectionPropertyId());
+    std::string filename_string(filename);
+    if (filename_string.length() >= main_lexicon_suffix.length() &&
+        filename_string.substr(
+            filename_string.length() - main_lexicon_suffix.length(),
+            main_lexicon_suffix.length()) == main_lexicon_suffix) {
+      return -1;
+    }
+    return this->filesystem_.OpenForWrite(filename);
+  };
+  ON_CALL(*mock_icing_filesystem_, OpenForWrite)
+      .WillByDefault(open_write_lambda);
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPrefixedProperty), kIpsumText)
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  // 2. Recreate the index with the mock filesystem and a merge size that will
+  // only allow one document to be added before requiring a merge.
+  Index::Options options(index_dir_,
+                         /*index_merge_size=*/document.ByteSizeLong(),
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/16);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index_,
+      Index::Create(options, &filesystem_, mock_icing_filesystem_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<TermIndexingHandler> term_indexing_handler,
+      TermIndexingHandler::Create(
+          &fake_clock_, normalizer_.get(), index_.get(),
+          /*build_property_existence_metadata_hits=*/true));
+  std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+  handlers.push_back(std::move(term_indexing_handler));
+
+  index_processor_ =
+      std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
+
+  // 3. Index one document. This should fit in the LiteIndex without requiring a
+  // merge.
+  DocumentId doc_id = 0;
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, doc_id),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
+
+  // 4. Add one more document to trigger a merge, which should fail and result
+  // in a Reset.
+  ++doc_id;
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, doc_id),
+              StatusIs(libtextclassifier3::StatusCode::DATA_LOSS));
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  // 5. Indexing a new document should succeed.
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, doc_id),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
+}
+
+TEST_F(IndexProcessorTest, ExactVerbatimProperty) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kVerbatimExactProperty),
+                             "Hello, world!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(1));
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
-  EXPECT_THAT(GetHits(std::move(itr)),
-              ElementsAre(EqualsDocHitInfo(
-                  kDocumentId0, std::vector<SectionId>{kExactSectionId})));
+      index_->GetIterator("Hello, world!", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  std::vector<DocHitInfoTermFrequencyPair> hits =
+      GetHitsWithTermFrequency(std::move(itr));
+  std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
+      {kVerbatimExactSectionId, 1}};
+
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expectedMap)));
+}
 
+TEST_F(IndexProcessorTest, PrefixVerbatimProperty) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kVerbatimPrefixedProperty),
+                             "Hello, world!")
+          .Build();
   ICING_ASSERT_OK_AND_ASSIGN(
-      itr,
-      index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
-  EXPECT_THAT(GetHits(std::move(itr)),
-              ElementsAre(EqualsDocHitInfo(
-                  kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(1));
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  // We expect to match the document we indexed as "Hello, w" is a prefix
+  // of "Hello, world!"
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("Hello, w", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  std::vector<DocHitInfoTermFrequencyPair> hits =
+      GetHitsWithTermFrequency(std::move(itr));
+  std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
+      {kVerbatimPrefixedSectionId, 1}};
+
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expectedMap)));
+}
+
+TEST_F(IndexProcessorTest, VerbatimPropertyDoesntMatchSubToken) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kVerbatimPrefixedProperty),
+                             "Hello, world!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(1));
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("world", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  std::vector<DocHitInfo> hits = GetHits(std::move(itr));
+
+  // We should not have hits for term "world" as the index processor should
+  // create a sole token "Hello, world! for the document.
+  EXPECT_THAT(hits, IsEmpty());
+}
+
+// Some phrases that should match exactly to RFC822 tokens. We normalize the
+// tokens, so the case of the string property shouldn't matter.
+TEST_F(IndexProcessorTest, Rfc822PropertyExact) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/1")
+                               .SetSchema(std::string(kFakeType))
+                               .AddStringProperty(std::string(kRfc822Property),
+                                                  "<AlexSav@GOOGLE.com>")
+                               .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(7));
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
+      {kRfc822SectionId, 2}};
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("alexsav", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  std::vector<DocHitInfoTermFrequencyPair> hits =
+      GetHitsWithTermFrequency(std::move(itr));
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expected_map)));
+
+  expected_map = {{kRfc822SectionId, 1}};
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("com", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expected_map)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("alexsav@google.com", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expected_map)));
+}
+
+TEST_F(IndexProcessorTest, Rfc822PropertyExactShouldNotReturnPrefix) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/1")
+                               .SetSchema(std::string(kFakeType))
+                               .AddStringProperty(std::string(kRfc822Property),
+                                                  "<AlexSav@GOOGLE.com>")
+                               .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(7));
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
+      {kRfc822SectionId, 2}};
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("alexsa", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  std::vector<DocHitInfo> hits = GetHits(std::move(itr));
+  EXPECT_THAT(hits, IsEmpty());
+}
+
+// Some prefixes of generated RFC822 tokens.
+#ifdef ENABLE_RFC822_PROPERTY_PREFIX_TEST
+// ENABLE_RFC822_PROPERTY_PREFIX_TEST won't be defined, so this test will not be
+// compiled.
+// TODO(b/250648165): Remove #ifdef to enable this test after fixing the
+//                    indeterministic behavior of prefix query term frequency in
+//                    lite index.
+//
+TEST_F(IndexProcessorTest, Rfc822PropertyPrefix) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/1")
+                               .SetSchema(std::string(kFakeType))
+                               .AddStringProperty(std::string(kRfc822Property),
+                                                  "<alexsav@google.com>")
+                               .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(7));
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
+      {kRfc822SectionId, 1}};
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("alexsav@", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  std::vector<DocHitInfoTermFrequencyPair> hits =
+      GetHitsWithTermFrequency(std::move(itr));
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expected_map)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("goog", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expected_map)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("ale", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expected_map)));
+}
+#endif  // ENABLE_RFC822_PROPERTY_PREFIX_TEST
+
+TEST_F(IndexProcessorTest, Rfc822PropertyNoMatch) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/1")
+                               .SetSchema(std::string(kFakeType))
+                               .AddStringProperty(std::string(kRfc822Property),
+                                                  "<alexsav@google.com>")
+                               .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(7));
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  std::unordered_map<SectionId, Hit::TermFrequency> expect_map{{}};
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("abc.xyz", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  std::vector<DocHitInfo> hits = GetHits(std::move(itr));
+
+  EXPECT_THAT(hits, IsEmpty());
+}
+
+#ifdef ENABLE_URL_TOKENIZER
+TEST_F(IndexProcessorTest, ExactUrlProperty) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kUrlExactProperty),
+                             "http://www.google.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(7));
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("google", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  std::vector<DocHitInfoTermFrequencyPair> hits =
+      GetHitsWithTermFrequency(std::move(itr));
+  std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
+      {kUrlExactSectionId, 1}};
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expected_map)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("http", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  expected_map = {{kUrlExactSectionId, 1}};
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expected_map)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("www.google.com", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  expected_map = {{kUrlExactSectionId, 1}};
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expected_map)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("http://www.google.com", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  expected_map = {{kUrlExactSectionId, 1}};
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expected_map)));
+}
+
+TEST_F(IndexProcessorTest, ExactUrlPropertyDoesNotMatchPrefix) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kUrlExactProperty),
+                             "https://mail.google.com/calendar/render")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(8));
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("co", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  std::vector<DocHitInfoTermFrequencyPair> hits =
+      GetHitsWithTermFrequency(std::move(itr));
+  EXPECT_THAT(hits, IsEmpty());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("mail.go", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  EXPECT_THAT(hits, IsEmpty());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("mail.google.com", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  EXPECT_THAT(hits, IsEmpty());
+}
+
+TEST_F(IndexProcessorTest, PrefixUrlProperty) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kUrlPrefixedProperty),
+                             "http://www.google.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(7));
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  // "goo" is a prefix of "google" and "google.com"
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("goo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  std::vector<DocHitInfoTermFrequencyPair> hits =
+      GetHitsWithTermFrequency(std::move(itr));
+  std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
+      {kUrlPrefixedSectionId, 1}};
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expected_map)));
+
+  // "http" is a prefix of "http" and "http://www.google.com"
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("http", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  expected_map = {{kUrlPrefixedSectionId, 1}};
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expected_map)));
+
+  // "www.go" is a prefix of "www.google.com"
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("www.go", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  expected_map = {{kUrlPrefixedSectionId, 1}};
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expected_map)));
+}
+
+TEST_F(IndexProcessorTest, PrefixUrlPropertyNoMatch) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kUrlPrefixedProperty),
+                             "https://mail.google.com/calendar/render")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(8));
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  // no token starts with "gle", so we should have no hits
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("gle", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  std::vector<DocHitInfoTermFrequencyPair> hits =
+      GetHitsWithTermFrequency(std::move(itr));
+  EXPECT_THAT(hits, IsEmpty());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("w.goo", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  EXPECT_THAT(hits, IsEmpty());
+
+  // tokens have separators removed, so no hits here
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator(".com", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  EXPECT_THAT(hits, IsEmpty());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("calendar/render", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  hits = GetHitsWithTermFrequency(std::move(itr));
+  EXPECT_THAT(hits, IsEmpty());
+}
+#endif  // ENABLE_URL_TOKENIZER
+
+TEST_F(IndexProcessorTest, IndexableIntegerProperty) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddInt64Property(std::string(kIndexableIntegerProperty), 1, 2, 3, 4,
+                            5)
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  // Expected to have 1 integer section.
+  EXPECT_THAT(tokenized_document.integer_sections(), SizeIs(1));
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      integer_index_->GetIterator(kIndexableIntegerProperty, /*key_lower=*/1,
+                                  /*key_upper=*/5, *doc_store_, *schema_store_,
+                                  fake_clock_.GetSystemTimeMilliseconds()));
+
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(
+          kDocumentId0, std::vector<SectionId>{kIndexableIntegerSectionId})));
+}
+
+TEST_F(IndexProcessorTest, IndexableIntegerPropertyNoMatch) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddInt64Property(std::string(kIndexableIntegerProperty), 1, 2, 3, 4,
+                            5)
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  // Expected to have 1 integer section.
+  EXPECT_THAT(tokenized_document.integer_sections(), SizeIs(1));
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      integer_index_->GetIterator(kIndexableIntegerProperty, /*key_lower=*/-1,
+                                  /*key_upper=*/0, *doc_store_, *schema_store_,
+                                  fake_clock_.GetSystemTimeMilliseconds()));
+
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
 }
 
 }  // namespace
diff --git a/icing/index/index.cc b/icing/index/index.cc
index 927acaf..98058be 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -14,26 +14,38 @@
 
 #include "icing/index/index.h"
 
+#include <algorithm>
+#include <cstddef>
 #include <cstdint>
 #include <memory>
 #include <string>
 #include <utility>
+#include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
 #include "icing/index/hit/hit.h"
-#include "icing/index/iterator/doc-hit-info-iterator-term.h"
+#include "icing/index/iterator/doc-hit-info-iterator-or.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
-#include "icing/index/lite-index.h"
+#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
+#include "icing/index/lite/lite-index.h"
+#include "icing/index/main/doc-hit-info-iterator-term-main.h"
+#include "icing/index/main/main-index.h"
 #include "icing/index/term-id-codec.h"
-#include "icing/index/term-property-id.h"
+#include "icing/index/term-metadata.h"
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/legacy/index/icing-dynamic-trie.h"
 #include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/storage.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
+#include "icing/scoring/ranker.h"
+#include "icing/store/document-id.h"
+#include "icing/store/suggestion-result-checker.h"
 #include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
 
@@ -53,35 +65,92 @@ libtextclassifier3::StatusOr<LiteIndex::Options> CreateLiteIndexOptions(
         "Requested hit buffer size %d is too large.",
         options.index_merge_size));
   }
-  return LiteIndex::Options(options.base_dir + "/idx/lite.",
-                            options.index_merge_size);
+  return LiteIndex::Options(
+      options.base_dir + "/idx/lite.", options.index_merge_size,
+      options.lite_index_sort_at_indexing, options.lite_index_sort_size,
+      options.include_property_existence_metadata_hits);
+}
+
+std::string MakeMainIndexFilepath(const std::string& base_dir) {
+  return base_dir + "/idx/main";
 }
 
-// TODO(tjbarron) implement for real when the main index is added.
 IcingDynamicTrie::Options GetMainLexiconOptions() {
+  // The default values for IcingDynamicTrie::Options is fine for the main
+  // lexicon.
   return IcingDynamicTrie::Options();
 }
 
-// Helper function to check if a term is in the given namespaces.
-// TODO(samzheng): Implement a method PropertyReadersAll.HasAnyProperty().
-bool IsTermInNamespaces(
-    const IcingDynamicTrie::PropertyReadersAll& property_reader,
-    uint32_t value_index, const std::vector<NamespaceId>& namespace_ids) {
-  for (NamespaceId namespace_id : namespace_ids) {
-    if (property_reader.HasProperty(GetNamespacePropertyId(namespace_id),
-                                    value_index)) {
-      return true;
+enum class MergeAction { kTakeLiteTerm, kTakeMainTerm, kMergeTerms };
+
+// Merge the TermMetadata from lite index and main index. If the term exists in
+// both index, sum up its hit count and push it to the term heap.
+// The heap is a min-heap. So that we can avoid some push operation but the time
+// complexity is O(NlgK) which N is total number of term and K is num_to_return.
+std::vector<TermMetadata> MergeAndRankTermMetadatas(
+    std::vector<TermMetadata> lite_term_metadata_list,
+    std::vector<TermMetadata> main_term_metadata_list, int num_to_return) {
+  std::vector<TermMetadata> merged_term_metadata_heap;
+  merged_term_metadata_heap.reserve(
+      std::min(lite_term_metadata_list.size() + main_term_metadata_list.size(),
+               static_cast<size_t>(num_to_return)));
+
+  auto lite_term_itr = lite_term_metadata_list.begin();
+  auto main_term_itr = main_term_metadata_list.begin();
+  MergeAction merge_action;
+  while (lite_term_itr != lite_term_metadata_list.end() ||
+         main_term_itr != main_term_metadata_list.end()) {
+    // Get pointers to the next metadatas in each group, if available
+    // Determine how to merge.
+    if (main_term_itr == main_term_metadata_list.end()) {
+      merge_action = MergeAction::kTakeLiteTerm;
+    } else if (lite_term_itr == lite_term_metadata_list.end()) {
+      merge_action = MergeAction::kTakeMainTerm;
+    } else if (lite_term_itr->content < main_term_itr->content) {
+      merge_action = MergeAction::kTakeLiteTerm;
+    } else if (main_term_itr->content < lite_term_itr->content) {
+      merge_action = MergeAction::kTakeMainTerm;
+    } else {
+      // The next metadatas refer to the same term. Combine them.
+      merge_action = MergeAction::kMergeTerms;
+    }
+    switch (merge_action) {
+      case MergeAction::kTakeLiteTerm:
+        PushToTermHeap(std::move(*lite_term_itr), num_to_return,
+                       merged_term_metadata_heap);
+        ++lite_term_itr;
+        break;
+      case MergeAction::kTakeMainTerm:
+        PushToTermHeap(std::move(*main_term_itr), num_to_return,
+                       merged_term_metadata_heap);
+        ++main_term_itr;
+        break;
+      case MergeAction::kMergeTerms:
+        int total_est_hit_count = lite_term_itr->score + main_term_itr->score;
+        PushToTermHeap(TermMetadata(std::move(lite_term_itr->content),
+                                    total_est_hit_count),
+                       num_to_return, merged_term_metadata_heap);
+        ++lite_term_itr;
+        ++main_term_itr;
+        break;
     }
   }
-
-  return false;
+  // Reverse the list since we pop them from a min heap and we need to return in
+  // decreasing order.
+  std::vector<TermMetadata> merged_term_metadata_list =
+      PopAllTermsFromHeap(merged_term_metadata_heap);
+  std::reverse(merged_term_metadata_list.begin(),
+               merged_term_metadata_list.end());
+  return merged_term_metadata_list;
 }
 
 }  // namespace
 
 libtextclassifier3::StatusOr<std::unique_ptr<Index>> Index::Create(
-    const Options& options, const IcingFilesystem* filesystem) {
+    const Options& options, const Filesystem* filesystem,
+    const IcingFilesystem* icing_filesystem) {
   ICING_RETURN_ERROR_IF_NULL(filesystem);
+  ICING_RETURN_ERROR_IF_NULL(icing_filesystem);
 
   ICING_ASSIGN_OR_RETURN(LiteIndex::Options lite_index_options,
                          CreateLiteIndexOptions(options));
@@ -91,81 +160,174 @@ libtextclassifier3::StatusOr<std::unique_ptr<Index>> Index::Create(
           IcingDynamicTrie::max_value_index(GetMainLexiconOptions()),
           IcingDynamicTrie::max_value_index(
               lite_index_options.lexicon_options)));
-  ICING_ASSIGN_OR_RETURN(std::unique_ptr<LiteIndex> lite_index,
-                         LiteIndex::Create(lite_index_options, filesystem));
-  return std::unique_ptr<Index>(
-      new Index(options, std::move(term_id_codec), std::move(lite_index)));
+
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<LiteIndex> lite_index,
+      LiteIndex::Create(lite_index_options, icing_filesystem));
+  // Sort the lite index if we've enabled sorting the HitBuffer at indexing
+  // time, and there's an unsorted tail exceeding the threshold.
+  if (options.lite_index_sort_at_indexing &&
+      lite_index->HasUnsortedHitsExceedingSortThreshold()) {
+    lite_index->SortHits();
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(MakeMainIndexFilepath(options.base_dir), filesystem,
+                        icing_filesystem));
+  return std::unique_ptr<Index>(new Index(options, std::move(term_id_codec),
+                                          std::move(lite_index),
+                                          std::move(main_index), filesystem));
+}
+
+/* static */ libtextclassifier3::StatusOr<int> Index::ReadFlashIndexMagic(
+    const Filesystem* filesystem, const std::string& base_dir) {
+  return MainIndex::ReadFlashIndexMagic(filesystem,
+                                        MakeMainIndexFilepath(base_dir));
+}
+
+libtextclassifier3::Status Index::TruncateTo(DocumentId document_id) {
+  if (lite_index_->last_added_document_id() != kInvalidDocumentId &&
+      lite_index_->last_added_document_id() > document_id) {
+    ICING_VLOG(1) << "Clipping to " << document_id
+                  << ". Throwing out lite index which is at "
+                  << lite_index_->last_added_document_id();
+    ICING_RETURN_IF_ERROR(lite_index_->Reset());
+  }
+  if (main_index_->last_added_document_id() != kInvalidDocumentId &&
+      main_index_->last_added_document_id() > document_id) {
+    ICING_VLOG(1) << "Clipping to " << document_id
+                  << ". Throwing out lite index which is at "
+                  << main_index_->last_added_document_id();
+    ICING_RETURN_IF_ERROR(main_index_->Reset());
+  }
+  return libtextclassifier3::Status::OK;
 }
 
 libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
-Index::GetIterator(const std::string& term, SectionIdMask section_id_mask,
-                   TermMatchType::Code term_match_type) {
+Index::GetIterator(const std::string& term, int term_start_index,
+                   int unnormalized_term_length, SectionIdMask section_id_mask,
+                   TermMatchType::Code term_match_type,
+                   bool need_hit_term_frequency) {
+  std::unique_ptr<DocHitInfoIterator> lite_itr;
+  std::unique_ptr<DocHitInfoIterator> main_itr;
   switch (term_match_type) {
     case TermMatchType::EXACT_ONLY:
-      return std::make_unique<DocHitInfoIteratorTermExact>(
-          term_id_codec_.get(), lite_index_.get(), term, section_id_mask);
+      lite_itr = std::make_unique<DocHitInfoIteratorTermLiteExact>(
+          term_id_codec_.get(), lite_index_.get(), term, term_start_index,
+          unnormalized_term_length, section_id_mask, need_hit_term_frequency);
+      main_itr = std::make_unique<DocHitInfoIteratorTermMainExact>(
+          main_index_.get(), term, term_start_index, unnormalized_term_length,
+          section_id_mask, need_hit_term_frequency);
+      break;
     case TermMatchType::PREFIX:
-      return std::make_unique<DocHitInfoIteratorTermPrefix>(
-          term_id_codec_.get(), lite_index_.get(), term, section_id_mask);
+      lite_itr = std::make_unique<DocHitInfoIteratorTermLitePrefix>(
+          term_id_codec_.get(), lite_index_.get(), term, term_start_index,
+          unnormalized_term_length, section_id_mask, need_hit_term_frequency);
+      main_itr = std::make_unique<DocHitInfoIteratorTermMainPrefix>(
+          main_index_.get(), term, term_start_index, unnormalized_term_length,
+          section_id_mask, need_hit_term_frequency);
+      break;
     default:
       return absl_ports::InvalidArgumentError(
           absl_ports::StrCat("Invalid TermMatchType: ",
                              TermMatchType::Code_Name(term_match_type)));
   }
+  return std::make_unique<DocHitInfoIteratorOr>(std::move(lite_itr),
+                                                std::move(main_itr));
 }
 
 libtextclassifier3::StatusOr<std::vector<TermMetadata>>
-Index::FindTermsByPrefix(const std::string& prefix,
-                         const std::vector<NamespaceId>& namespace_ids,
-                         int num_to_return) {
-  std::vector<TermMetadata> term_metadata_list;
-  if (num_to_return <= 0) {
-    return term_metadata_list;
-  }
-
+Index::FindLiteTermsByPrefix(
+    const std::string& prefix,
+    SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+    const SuggestionResultChecker* suggestion_result_checker) {
   // Finds all the terms that start with the given prefix in the lexicon.
   IcingDynamicTrie::Iterator term_iterator(lite_index_->lexicon(),
                                            prefix.c_str());
 
-  // A property reader to help check if a term has some property.
-  IcingDynamicTrie::PropertyReadersAll property_reader(lite_index_->lexicon());
-
-  while (term_iterator.IsValid() && term_metadata_list.size() < num_to_return) {
+  std::vector<TermMetadata> term_metadata_list;
+  while (term_iterator.IsValid()) {
     uint32_t term_value_index = term_iterator.GetValueIndex();
 
-    // Skips the terms that don't exist in the given namespaces. We won't skip
-    // any terms if namespace_ids is empty.
-    if (!namespace_ids.empty() &&
-        !IsTermInNamespaces(property_reader, term_value_index, namespace_ids)) {
-      term_iterator.Advance();
-      continue;
-    }
-
     ICING_ASSIGN_OR_RETURN(
         uint32_t term_id,
         term_id_codec_->EncodeTvi(term_value_index, TviType::LITE),
         absl_ports::InternalError("Failed to access terms in lexicon."));
-
-    term_metadata_list.emplace_back(term_iterator.GetKey(),
-                                    lite_index_->CountHits(term_id));
+    ICING_ASSIGN_OR_RETURN(
+        int hit_score,
+        lite_index_->ScoreHits(term_id, score_by, suggestion_result_checker));
+    if (hit_score > 0) {
+      // There is at least one document in the given namespace has this term.
+      term_metadata_list.push_back(
+          TermMetadata(term_iterator.GetKey(), hit_score));
+    }
 
     term_iterator.Advance();
   }
-
   return term_metadata_list;
 }
 
-libtextclassifier3::Status Index::Editor::AddHit(const char* term,
-                                                 Hit::Score score) {
+libtextclassifier3::StatusOr<std::vector<TermMetadata>>
+Index::FindTermsByPrefix(
+    const std::string& prefix, int num_to_return,
+    TermMatchType::Code scoring_match_type,
+    SuggestionScoringSpecProto::SuggestionRankingStrategy::Code rank_by,
+    const SuggestionResultChecker* suggestion_result_checker) {
+  std::vector<TermMetadata> term_metadata_list;
+  if (num_to_return <= 0) {
+    return term_metadata_list;
+  }
+  // Get results from the LiteIndex.
+  // TODO(b/250648165) support score term by prefix_hit in lite_index.
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<TermMetadata> lite_term_metadata_list,
+      FindLiteTermsByPrefix(prefix, rank_by, suggestion_result_checker));
+  // Append results from the MainIndex.
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<TermMetadata> main_term_metadata_list,
+      main_index_->FindTermsByPrefix(prefix, scoring_match_type, rank_by,
+                                     suggestion_result_checker));
+  return MergeAndRankTermMetadatas(std::move(lite_term_metadata_list),
+                                   std::move(main_term_metadata_list),
+                                   num_to_return);
+}
+
+IndexStorageInfoProto Index::GetStorageInfo() const {
+  IndexStorageInfoProto storage_info;
+  int64_t directory_size = filesystem_->GetDiskUsage(options_.base_dir.c_str());
+  storage_info.set_index_size(Filesystem::SanitizeFileSize(directory_size));
+  storage_info = lite_index_->GetStorageInfo(std::move(storage_info));
+  return main_index_->GetStorageInfo(std::move(storage_info));
+}
+
+libtextclassifier3::Status Index::Optimize(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    DocumentId new_last_added_document_id) {
+  if (main_index_->last_added_document_id() != kInvalidDocumentId) {
+    ICING_RETURN_IF_ERROR(main_index_->Optimize(document_id_old_to_new));
+  }
+  return lite_index_->Optimize(document_id_old_to_new, term_id_codec_.get(),
+                               new_last_added_document_id);
+}
+
+libtextclassifier3::Status Index::Editor::BufferTerm(const char* term) {
   // Step 1: See if this term is already in the lexicon
   uint32_t tvi;
-  auto tvi_or = lite_index_->FindTerm(term);
+  auto tvi_or = lite_index_->GetTermId(term);
 
   // Step 2: Update the lexicon, either add the term or update its properties
   if (tvi_or.ok()) {
+    tvi = tvi_or.ValueOrDie();
+    if (seen_tokens_.find(tvi) != seen_tokens_.end()) {
+      ICING_VLOG(1) << "Updating term frequency for term " << term;
+      if (seen_tokens_[tvi] != Hit::kMaxTermFrequency) {
+        ++seen_tokens_[tvi];
+      }
+      return libtextclassifier3::Status::OK;
+    }
     ICING_VLOG(1) << "Term " << term
                   << " is already present in lexicon. Updating.";
-    tvi = tvi_or.ValueOrDie();
     // Already in the lexicon. Just update the properties.
     ICING_RETURN_IF_ERROR(lite_index_->UpdateTermProperties(
         tvi, term_match_type_ == TermMatchType::PREFIX, namespace_id_));
@@ -175,13 +337,20 @@ libtextclassifier3::Status Index::Editor::AddHit(const char* term,
     ICING_ASSIGN_OR_RETURN(
         tvi, lite_index_->InsertTerm(term, term_match_type_, namespace_id_));
   }
+  // Token seen for the first time in the current document.
+  seen_tokens_[tvi] = 1;
+  return libtextclassifier3::Status::OK;
+}
 
-  // Step 3: Add the hit itself
-  Hit hit(section_id_, document_id_, score,
-          term_match_type_ == TermMatchType::PREFIX);
-  ICING_ASSIGN_OR_RETURN(uint32_t term_id,
-                         term_id_codec_->EncodeTvi(tvi, TviType::LITE));
-  return lite_index_->AddHit(term_id, hit);
+libtextclassifier3::Status Index::Editor::IndexAllBufferedTerms() {
+  for (auto itr = seen_tokens_.begin(); itr != seen_tokens_.end(); itr++) {
+    Hit hit(section_id_, document_id_, /*term_frequency=*/itr->second,
+            term_match_type_ == TermMatchType::PREFIX);
+    ICING_ASSIGN_OR_RETURN(
+        uint32_t term_id, term_id_codec_->EncodeTvi(itr->first, TviType::LITE));
+    ICING_RETURN_IF_ERROR(lite_index_->AddHit(term_id, hit));
+  }
+  return libtextclassifier3::Status::OK;
 }
 
 }  // namespace lib
diff --git a/icing/index/index.h b/icing/index/index.h
index f30c8ad..a5d75c4 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -18,22 +18,31 @@
 #include <cstdint>
 #include <memory>
 #include <string>
-#include <unordered_set>
+#include <unordered_map>
 #include <utility>
+#include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
 #include "icing/index/hit/hit.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
-#include "icing/index/lite-index.h"
+#include "icing/index/lite/lite-index.h"
+#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/index/main/main-index-merger.h"
+#include "icing/index/main/main-index.h"
 #include "icing/index/term-id-codec.h"
 #include "icing/index/term-metadata.h"
 #include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/storage.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
 #include "icing/store/namespace-id.h"
-#include "icing/util/crc32.h"
+#include "icing/store/suggestion-result-checker.h"
+#include "icing/util/status-macros.h"
 
 namespace icing {
 namespace lib {
@@ -61,11 +70,22 @@ namespace lib {
 class Index {
  public:
   struct Options {
-    explicit Options(const std::string& base_dir, uint32_t index_merge_size)
-        : base_dir(base_dir), index_merge_size(index_merge_size) {}
+    explicit Options(const std::string& base_dir, uint32_t index_merge_size,
+                     bool lite_index_sort_at_indexing,
+                     uint32_t lite_index_sort_size,
+                     bool include_property_existence_metadata_hits = false)
+        : base_dir(base_dir),
+          index_merge_size(index_merge_size),
+          lite_index_sort_at_indexing(lite_index_sort_at_indexing),
+          lite_index_sort_size(lite_index_sort_size),
+          include_property_existence_metadata_hits(
+              include_property_existence_metadata_hits) {}
 
     std::string base_dir;
     int32_t index_merge_size;
+    bool lite_index_sort_at_indexing;
+    int32_t lite_index_sort_size;
+    bool include_property_existence_metadata_hits;
   };
 
   // Creates an instance of Index in the directory pointed by file_dir.
@@ -76,15 +96,32 @@ class Index {
   //   INVALID_ARGUMENT if options have invalid values
   //   INTERNAL on I/O error
   static libtextclassifier3::StatusOr<std::unique_ptr<Index>> Create(
-      const Options& options, const IcingFilesystem* filesystem);
+      const Options& options, const Filesystem* filesystem,
+      const IcingFilesystem* icing_filesystem);
+
+  // Reads magic from existing flash (main) index file header. We need this
+  // during Icing initialization phase to determine the version.
+  //
+  // Returns
+  //   Valid magic on success
+  //   NOT_FOUND if the lite index doesn't exist
+  //   INTERNAL on I/O error
+  static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic(
+      const Filesystem* filesystem, const std::string& base_dir);
 
   // Clears all files created by the index. Returns OK if all files were
   // cleared.
-  libtextclassifier3::Status Reset() { return lite_index_->Reset(); }
+  libtextclassifier3::Status Reset() {
+    ICING_RETURN_IF_ERROR(lite_index_->Reset());
+    return main_index_->Reset();
+  }
 
   // Brings components of the index into memory in anticipation of a query in
   // order to reduce latency.
-  void Warm() { lite_index_->Warm(); }
+  void Warm() {
+    lite_index_->Warm();
+    main_index_->Warm();
+  }
 
   // Syncs all the data and metadata changes to disk.
   //
@@ -92,25 +129,53 @@ class Index {
   //   OK on success
   //   INTERNAL on I/O errors
   libtextclassifier3::Status PersistToDisk() {
-    return lite_index_->PersistToDisk();
+    ICING_RETURN_IF_ERROR(lite_index_->PersistToDisk());
+    return main_index_->PersistToDisk();
   }
 
-  // Compute the checksum over the entire Index's subcomponents.
-  Crc32 ComputeChecksum() { return lite_index_->ComputeChecksum(); }
+  // Discard parts of the index if they contain data for document ids greater
+  // than document_id.
+  //
+  // NOTE: This means that TruncateTo(kInvalidDocumentId) will have no effect.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL on I/O errors
+  libtextclassifier3::Status TruncateTo(DocumentId document_id);
 
   // DocumentIds are always inserted in increasing order. Returns the largest
   // document_id added to the index.
   DocumentId last_added_document_id() const {
-    return lite_index_->last_added_document_id();
+    DocumentId lite_document_id = lite_index_->last_added_document_id();
+    if (lite_document_id != kInvalidDocumentId) {
+      return lite_document_id;
+    }
+    return main_index_->last_added_document_id();
+  }
+
+  // Sets last_added_document_id to document_id so long as document_id >
+  // last_added_document_id()
+  void set_last_added_document_id(DocumentId document_id) {
+    DocumentId lite_document_id = lite_index_->last_added_document_id();
+    if (lite_document_id == kInvalidDocumentId ||
+        document_id >= lite_document_id) {
+      lite_index_->set_last_added_document_id(document_id);
+    }
   }
 
   // Returns debug information for the index in out.
-  // verbosity <= 0, simplest debug information - just the lexicons and lite
-  //                 index.
-  // verbosity > 0, more detailed debug information including raw postings
-  //                lists.
-  void GetDebugInfo(int verbosity, std::string* out) const {
-    lite_index_->GetDebugInfo(verbosity, out);
+  // verbosity = BASIC, simplest debug information - just the lexicons and lite
+  //                    index.
+  // verbosity = DETAILED, more detailed debug information including raw
+  //                       postings lists.
+  IndexDebugInfoProto GetDebugInfo(DebugInfoVerbosity::Code verbosity) const {
+    IndexDebugInfoProto debug_info;
+    *debug_info.mutable_index_storage_info() = GetStorageInfo();
+    *debug_info.mutable_lite_index_info() =
+        lite_index_->GetDebugInfo(verbosity);
+    *debug_info.mutable_main_index_info() =
+        main_index_->GetDebugInfo(verbosity);
+    return debug_info;
   }
 
   // Returns the byte size of the all the elements held in the index. This
@@ -121,33 +186,47 @@ class Index {
   //   Byte size on success
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::StatusOr<int64_t> GetElementsSize() const {
-    return lite_index_->GetElementsSize();
+    ICING_ASSIGN_OR_RETURN(int64_t lite_index_size,
+                           lite_index_->GetElementsSize());
+    ICING_ASSIGN_OR_RETURN(int64_t main_index_size,
+                           main_index_->GetElementsSize());
+    return lite_index_size + main_index_size;
   }
 
+  // Calculates the StorageInfo for the Index.
+  //
+  // If an IO error occurs while trying to calculate the value for a field, then
+  // that field will be set to -1.
+  IndexStorageInfoProto GetStorageInfo() const;
+
   // Create an iterator to iterate through all doc hit infos in the index that
-  // match the term. section_id_mask can be set to ignore hits from sections not
-  // listed in the mask. Eg. section_id_mask = 1U << 3; would only return hits
-  // that occur in section 3.
+  // match the term. term_start_index is the start index of the given term in
+  // the search query. unnormalized_term_length is the length of the given
+  // unnormalized term in the search query not listed in the mask.
+  // Eg. section_id_mask = 1U << 3; would only return hits that occur in
+  // section 3.
   //
   // Returns:
   //   unique ptr to a valid DocHitInfoIterator that matches the term
   //   INVALID_ARGUMENT if given an invalid term_match_type
   libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
-      const std::string& term, SectionIdMask section_id_mask,
-      TermMatchType::Code term_match_type);
+      const std::string& term, int term_start_index,
+      int unnormalized_term_length, SectionIdMask section_id_mask,
+      TermMatchType::Code term_match_type, bool need_hit_term_frequency = true);
 
   // Finds terms with the given prefix in the given namespaces. If
-  // 'namespace_ids' is empty, returns results from all the namespaces. The
-  // input prefix must be normalized, otherwise inaccurate results may be
-  // returned. Results are not sorted specifically and are in their original
-  // order. Number of results are no more than 'num_to_return'.
+  // 'namespace_ids' is empty, returns results from all the namespaces. Results
+  // are sorted in decreasing order of hit count. Number of results are no more
+  // than 'num_to_return'.
   //
   // Returns:
   //   A list of TermMetadata on success
   //   INTERNAL_ERROR if failed to access term data.
   libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindTermsByPrefix(
-      const std::string& prefix, const std::vector<NamespaceId>& namespace_ids,
-      int num_to_return);
+      const std::string& prefix, int num_to_return,
+      TermMatchType::Code scoring_match_type,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::Code rank_by,
+      const SuggestionResultChecker* suggestion_result_checker);
 
   // A class that can be used to add hits to the index.
   //
@@ -170,14 +249,16 @@ class Index {
           namespace_id_(namespace_id),
           section_id_(section_id) {}
 
-    libtextclassifier3::Status AddHit(const char* term,
-                                      Hit::Score score = Hit::kMaxHitScore);
+    // Buffer the term in seen_tokens_.
+    libtextclassifier3::Status BufferTerm(const char* term);
+    // Index all the terms stored in seen_tokens_.
+    libtextclassifier3::Status IndexAllBufferedTerms();
 
    private:
     // The Editor is able to store previously seen terms as TermIds. This is
     // is more efficient than a client doing this externally because TermIds are
     // not exposed to clients.
-    std::unordered_set<uint32_t> seen_tokens_;
+    std::unordered_map<uint32_t, Hit::TermFrequency> seen_tokens_;
     const TermIdCodec* term_id_codec_;
     LiteIndex* lite_index_;
     DocumentId document_id_;
@@ -191,16 +272,71 @@ class Index {
                   section_id, term_match_type, namespace_id);
   }
 
+  bool WantsMerge() const { return lite_index_->WantsMerge(); }
+
+  // Merges newly-added hits in the LiteIndex into the MainIndex.
+  //
+  // RETURNS:
+  //  - INTERNAL on IO error while writing to the MainIndex.
+  //  - RESOURCE_EXHAUSTED error if unable to grow the index.
+  libtextclassifier3::Status Merge() {
+    ICING_ASSIGN_OR_RETURN(MainIndex::LexiconMergeOutputs outputs,
+                           main_index_->MergeLexicon(lite_index_->lexicon()));
+    ICING_ASSIGN_OR_RETURN(std::vector<TermIdHitPair> term_id_hit_pairs,
+                           MainIndexMerger::TranslateAndExpandLiteHits(
+                               *lite_index_, *term_id_codec_, outputs));
+    ICING_RETURN_IF_ERROR(main_index_->AddHits(
+        *term_id_codec_, std::move(outputs.backfill_map),
+        std::move(term_id_hit_pairs), lite_index_->last_added_document_id()));
+    ICING_RETURN_IF_ERROR(main_index_->PersistToDisk());
+    return lite_index_->Reset();
+  }
+
+  // Whether the LiteIndex HitBuffer requires sorting. This is only true if
+  // Icing has enabled sorting during indexing time, and the HitBuffer's
+  // unsorted tail has exceeded the lite_index_sort_size.
+  bool LiteIndexNeedSort() const {
+    return options_.lite_index_sort_at_indexing &&
+           lite_index_->HasUnsortedHitsExceedingSortThreshold();
+  }
+
+  // Sorts the LiteIndex HitBuffer.
+  void SortLiteIndex() {
+    lite_index_->SortHits();
+  }
+
+  // Reduces internal file sizes by reclaiming space of deleted documents.
+  // new_last_added_document_id will be used to update the last added document
+  // id in the lite index.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error, this indicates that the index may be in an
+  //                               invalid state and should be cleared.
+  libtextclassifier3::Status Optimize(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      DocumentId new_last_added_document_id);
+
  private:
   Index(const Options& options, std::unique_ptr<TermIdCodec> term_id_codec,
-        std::unique_ptr<LiteIndex>&& lite_index)
+        std::unique_ptr<LiteIndex> lite_index,
+        std::unique_ptr<MainIndex> main_index, const Filesystem* filesystem)
       : lite_index_(std::move(lite_index)),
+        main_index_(std::move(main_index)),
         options_(options),
-        term_id_codec_(std::move(term_id_codec)) {}
+        term_id_codec_(std::move(term_id_codec)),
+        filesystem_(filesystem) {}
+
+  libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindLiteTermsByPrefix(
+      const std::string& prefix,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::Code rank_by,
+      const SuggestionResultChecker* suggestion_result_checker);
 
   std::unique_ptr<LiteIndex> lite_index_;
+  std::unique_ptr<MainIndex> main_index_;
   const Options options_;
   std::unique_ptr<TermIdCodec> term_id_codec_;
+  const Filesystem* filesystem_;
 };
 
 }  // namespace lib
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index 070e82a..04a6bb7 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -14,12 +14,16 @@
 
 #include "icing/index/index.h"
 
+#include <unistd.h>
+
+#include <algorithm>
 #include <cstdint>
 #include <limits>
 #include <memory>
 #include <random>
 #include <string>
 #include <string_view>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -31,65 +35,101 @@
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/storage.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
+#include "icing/testing/always-true-suggestion-result-checker-impl.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/random-string.h"
 #include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
 
 namespace icing {
 namespace lib {
 
 namespace {
 
+using ::testing::ContainerEq;
 using ::testing::ElementsAre;
 using ::testing::Eq;
+using ::testing::Ge;
 using ::testing::Gt;
 using ::testing::IsEmpty;
+using ::testing::IsFalse;
 using ::testing::IsTrue;
+using ::testing::Ne;
 using ::testing::NiceMock;
 using ::testing::Not;
+using ::testing::Return;
 using ::testing::SizeIs;
+using ::testing::StrEq;
+using ::testing::StrNe;
 using ::testing::Test;
 using ::testing::UnorderedElementsAre;
 
+int GetBlockSize() { return getpagesize(); }
+
 class IndexTest : public Test {
  protected:
   void SetUp() override {
     index_dir_ = GetTestTempDir() + "/index_test/";
-    Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
-    ICING_ASSERT_OK_AND_ASSIGN(index_, Index::Create(options, &filesystem_));
+    Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/1024 * 8);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        index_, Index::Create(options, &filesystem_, &icing_filesystem_));
   }
 
   void TearDown() override {
-    filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
+    index_.reset();
+    icing_filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
   }
 
-  std::unique_ptr<Index> index_;
+  std::vector<DocHitInfo> GetHits(
+      std::unique_ptr<DocHitInfoIterator> iterator) {
+    std::vector<DocHitInfo> infos;
+    while (iterator->Advance().ok()) {
+      infos.push_back(iterator->doc_hit_info());
+    }
+    return infos;
+  }
+
+  libtextclassifier3::StatusOr<std::vector<DocHitInfo>> GetHits(
+      std::string term, int term_start_index, int unnormalized_term_length,
+      TermMatchType::Code match_type) {
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<DocHitInfoIterator> itr,
+        index_->GetIterator(term, term_start_index, unnormalized_term_length,
+                            kSectionIdMaskAll, match_type));
+    return GetHits(std::move(itr));
+  }
+
+  Filesystem filesystem_;
+  IcingFilesystem icing_filesystem_;
   std::string index_dir_;
-  IcingFilesystem filesystem_;
+  std::unique_ptr<Index> index_;
 };
 
 constexpr DocumentId kDocumentId0 = 0;
 constexpr DocumentId kDocumentId1 = 1;
 constexpr DocumentId kDocumentId2 = 2;
+constexpr DocumentId kDocumentId3 = 3;
+constexpr DocumentId kDocumentId4 = 4;
+constexpr DocumentId kDocumentId5 = 5;
+constexpr DocumentId kDocumentId6 = 6;
+constexpr DocumentId kDocumentId7 = 7;
+constexpr DocumentId kDocumentId8 = 8;
 constexpr SectionId kSectionId2 = 2;
 constexpr SectionId kSectionId3 = 3;
 
-std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
-  std::vector<DocHitInfo> infos;
-  while (iterator->Advance().ok()) {
-    infos.push_back(iterator->doc_hit_info());
-  }
-  return infos;
-}
-
 MATCHER_P2(EqualsDocHitInfo, document_id, sections, "") {
   const DocHitInfo& actual = arg;
   SectionIdMask section_mask = kSectionIdMaskNone;
   for (SectionId section : sections) {
-    section_mask |= 1U << section;
+    section_mask |= UINT64_C(1) << section;
   }
   *result_listener << "actual is {document_id=" << actual.document_id()
                    << ", section_mask=" << actual.hit_section_ids_mask()
@@ -102,52 +142,111 @@ MATCHER_P2(EqualsDocHitInfo, document_id, sections, "") {
 MATCHER_P2(EqualsTermMetadata, content, hit_count, "") {
   const TermMetadata& actual = arg;
   *result_listener << "actual is {content=" << actual.content
-                   << ", hit_count=" << actual.hit_count
+                   << ", score=" << actual.score
                    << "}, but expected was {content=" << content
-                   << ", hit_count=" << hit_count << "}.";
-  return actual.content == content && actual.hit_count == hit_count;
+                   << ", score=" << hit_count << "}.";
+  return actual.content == content && actual.score == hit_count;
 }
 
 TEST_F(IndexTest, CreationWithNullPointerShouldFail) {
-  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
-  EXPECT_THAT(Index::Create(options, /*filesystem=*/nullptr),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/1024 * 8);
+  EXPECT_THAT(
+      Index::Create(options, &filesystem_, /*icing_filesystem=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(
+      Index::Create(options, /*filesystem=*/nullptr, &icing_filesystem_),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
 TEST_F(IndexTest, EmptyIndex) {
-  // Assert
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
   EXPECT_THAT(itr->Advance(),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      itr,
-      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+      itr, index_->GetIterator("foo", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
   EXPECT_THAT(itr->Advance(),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST_F(IndexTest, EmptyIndexAfterMerge) {
+  // Merging an empty index should succeed, but have no effects.
+  ICING_ASSERT_OK(index_->Merge());
 
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("foo", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST_F(IndexTest, CreationWithLiteIndexSortAtIndexingEnabledShouldSort) {
+  // Make the index with lite_index_sort_at_indexing=false and a very small sort
+  // threshold.
+  Index::Options options(index_dir_, /*index_merge_size=*/1024,
+                         /*lite_index_sort_at_indexing=*/false,
+                         /*lite_index_sort_size=*/16);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("bar"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("baz"), IsOk());
+  ASSERT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  // Persist and recreate the index with lite_index_sort_at_indexing=true
+  ASSERT_THAT(index_->PersistToDisk(), IsOk());
+  options = Index::Options(index_dir_, /*index_merge_size=*/1024,
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/16);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+  // Check that the index is sorted after recreating with
+  // lite_index_sort_at_indexing, with the unsorted HitBuffer exceeding the sort
+  // threshold.
+  EXPECT_THAT(index_->LiteIndexNeedSort(), IsFalse());
 }
 
 TEST_F(IndexTest, AdvancePastEnd) {
-  // Act
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("bar", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+      index_->GetIterator("bar", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
   EXPECT_THAT(itr->Advance(),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
   EXPECT_THAT(itr->doc_hit_info(),
               EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      itr,
-      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+      itr, index_->GetIterator("foo", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
   EXPECT_THAT(itr->Advance(), IsOk());
   EXPECT_THAT(itr->Advance(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -155,250 +254,969 @@ TEST_F(IndexTest, AdvancePastEnd) {
               EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
 }
 
+TEST_F(IndexTest, AdvancePastEndAfterMerge) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("bar", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(itr->doc_hit_info(),
+              EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("foo", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->Advance(), IsOk());
+  EXPECT_THAT(itr->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(itr->doc_hit_info(),
+              EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
+}
+
+TEST_F(IndexTest, IteratorGetCallStats_mainIndexOnly) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  // Merge the index.
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+
+  // Before Advance().
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/0,
+          /*num_leaf_advance_calls_main_index=*/0,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
+
+  // 1st Advance().
+  ICING_ASSERT_OK(itr->Advance());
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/0,
+          /*num_leaf_advance_calls_main_index=*/1,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+
+  // 2nd Advance().
+  ICING_ASSERT_OK(itr->Advance());
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/0,
+          /*num_leaf_advance_calls_main_index=*/2,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+
+  // 3rd Advance().
+  ASSERT_THAT(itr->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/0,
+          /*num_leaf_advance_calls_main_index=*/2,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+}
+
+TEST_F(IndexTest, IteratorGetCallStats_liteIndexOnly) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+
+  // Before Advance().
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/0,
+          /*num_leaf_advance_calls_main_index=*/0,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
+
+  // 1st Advance().
+  ICING_ASSERT_OK(itr->Advance());
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/1,
+          /*num_leaf_advance_calls_main_index=*/0,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
+
+  // 2nd Advance().
+  ICING_ASSERT_OK(itr->Advance());
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/2,
+          /*num_leaf_advance_calls_main_index=*/0,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
+
+  // 3rd Advance().
+  ASSERT_THAT(itr->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/2,
+          /*num_leaf_advance_calls_main_index=*/0,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
+}
+
+TEST_F(IndexTest, IteratorGetCallStats) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  // Merge the index. 2 hits for "foo" will be merged into the main index.
+  ICING_ASSERT_OK(index_->Merge());
+
+  // Insert 2 more hits for "foo". It will be in the lite index.
+  edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+
+  // Before Advance().
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/0,
+          /*num_leaf_advance_calls_main_index=*/0,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
+
+  // 1st Advance(). DocHitInfoIteratorOr will advance both left and right
+  // iterator (i.e. lite and main index iterator) once, compare document ids,
+  // and return the hit with larger document id. In this case, hit from lite
+  // index will be chosen and returned.
+  ICING_ASSERT_OK(itr->Advance());
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/1,
+          /*num_leaf_advance_calls_main_index=*/1,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+
+  // 2nd Advance(). Since lite index iterator has larger document id in the
+  // previous round, we advance lite index iterator in this round. We still
+  // choose and return hit from lite index.
+  ICING_ASSERT_OK(itr->Advance());
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/2,
+          /*num_leaf_advance_calls_main_index=*/1,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+
+  // 3rd Advance(). Since lite index iterator has larger document id in the
+  // previous round, we advance lite index iterator in this round. However,
+  // there is no hit from lite index anymore, so we choose and return hit from
+  // main index.
+  ICING_ASSERT_OK(itr->Advance());
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/2,
+          /*num_leaf_advance_calls_main_index=*/1,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+
+  // 4th Advance(). Advance main index.
+  ICING_ASSERT_OK(itr->Advance());
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/2,
+          /*num_leaf_advance_calls_main_index=*/2,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+
+  // 5th Advance(). Reach the end.
+  ASSERT_THAT(itr->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(
+      itr->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/2,
+          /*num_leaf_advance_calls_main_index=*/2,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+}
+
 TEST_F(IndexTest, SingleHitSingleTermIndex) {
-  // Act
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  // Assert
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, SingleHitSingleTermIndexAfterMerge) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
 
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, SingleHitSingleTermIndexAfterOptimize) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId2);
+
+  ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
+                                   /*new_last_added_document_id=*/2));
+  EXPECT_THAT(
+      GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+          kDocumentId2, std::vector<SectionId>{kSectionId2}))));
+  EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
+
+  // Mapping to a different docid will translate the hit
+  ICING_ASSERT_OK(index_->Optimize(
+      /*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1},
+      /*new_last_added_document_id=*/1));
+  EXPECT_THAT(
+      GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+          kDocumentId1, std::vector<SectionId>{kSectionId2}))));
+  EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
+
+  // Mapping to kInvalidDocumentId will remove the hit.
+  ICING_ASSERT_OK(
+      index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId},
+                       /*new_last_added_document_id=*/0));
+  EXPECT_THAT(
+      GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_EQ(index_->last_added_document_id(), kDocumentId0);
+}
+
+TEST_F(IndexTest, SingleHitSingleTermIndexAfterMergeAndOptimize) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId2);
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
+                                   /*new_last_added_document_id=*/2));
+  EXPECT_THAT(
+      GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+          kDocumentId2, std::vector<SectionId>{kSectionId2}))));
+  EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
+
+  // Mapping to a different docid will translate the hit
+  ICING_ASSERT_OK(index_->Optimize(
+      /*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1},
+      /*new_last_added_document_id=*/1));
+  EXPECT_THAT(
+      GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+          kDocumentId1, std::vector<SectionId>{kSectionId2}))));
+  EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
+
+  // Mapping to kInvalidDocumentId will remove the hit.
+  ICING_ASSERT_OK(
+      index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId},
+                       /*new_last_added_document_id=*/0));
+  EXPECT_THAT(
+      GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_EQ(index_->last_added_document_id(), 0);
 }
 
 TEST_F(IndexTest, SingleHitMultiTermIndex) {
-  // Act
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("foo"), IsOk());
-  EXPECT_THAT(edit.AddHit("bar"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  // Assert
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
 
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+TEST_F(IndexTest, SingleHitMultiTermIndexAfterMerge) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, MultiHitMultiTermIndexAfterOptimize) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId2);
+
+  ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
+                                   /*new_last_added_document_id=*/2));
+  EXPECT_THAT(
+      GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(ElementsAre(
+          EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId0,
+                           std::vector<SectionId>{kSectionId2}))));
+  EXPECT_THAT(
+      GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+          kDocumentId1, std::vector<SectionId>{kSectionId2}))));
+  EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
+
+  // Delete document id 1, and document id 2 is translated to 1.
+  ICING_ASSERT_OK(
+      index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
+                       /*new_last_added_document_id=*/1));
+  EXPECT_THAT(
+      GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(ElementsAre(
+          EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId0,
+                           std::vector<SectionId>{kSectionId2}))));
+  EXPECT_THAT(
+      GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
+
+  // Delete all the rest documents.
+  ICING_ASSERT_OK(index_->Optimize(
+      /*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId},
+      /*new_last_added_document_id=*/kInvalidDocumentId));
+  EXPECT_THAT(
+      GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(
+      GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId);
+}
+
+TEST_F(IndexTest, MultiHitMultiTermIndexAfterMergeAndOptimize) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId2);
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
+                                   /*new_last_added_document_id=*/2));
+  EXPECT_THAT(
+      GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(ElementsAre(
+          EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId0,
+                           std::vector<SectionId>{kSectionId2}))));
+  EXPECT_THAT(
+      GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+          kDocumentId1, std::vector<SectionId>{kSectionId2}))));
+  EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
+
+  // Delete document id 1, and document id 2 is translated to 1.
+  ICING_ASSERT_OK(
+      index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
+                       /*new_last_added_document_id=*/1));
+  EXPECT_THAT(
+      GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(ElementsAre(
+          EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId0,
+                           std::vector<SectionId>{kSectionId2}))));
+  EXPECT_THAT(
+      GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
+
+  // Delete all the rest documents.
+  ICING_ASSERT_OK(index_->Optimize(
+      /*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId},
+      /*new_last_added_document_id=*/kInvalidDocumentId));
+  EXPECT_THAT(
+      GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(
+      GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::EXACT_ONLY),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId);
 }
 
 TEST_F(IndexTest, NoHitMultiTermIndex) {
-  // Act
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("foo"), IsOk());
-  EXPECT_THAT(edit.AddHit("bar"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  // Assert
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+      index_->GetIterator("baz", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
   EXPECT_THAT(itr->Advance(),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST_F(IndexTest, NoHitMultiTermIndexAfterMerge) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("baz", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
 }
 
 TEST_F(IndexTest, MultiHitMultiTermIndex) {
-  // Act
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
                       /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("bar"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::EXACT_ONLY,
                       /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  // Assert
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(
+          EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, MultiHitMultiTermIndexAfterMerge) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
   EXPECT_THAT(
       GetHits(std::move(itr)),
       ElementsAre(
           EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
 }
 
 TEST_F(IndexTest, MultiHitSectionRestrict) {
-  // Act
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
                       /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  // Assert
   SectionIdMask desired_section = 1U << kSectionId2;
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("foo", desired_section, TermMatchType::EXACT_ONLY));
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, desired_section,
+                          TermMatchType::EXACT_ONLY));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, MultiHitSectionRestrictAfterMerge) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  SectionIdMask desired_section = 1U << kSectionId2;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, desired_section,
+                          TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
 }
 
 TEST_F(IndexTest, SingleHitDedupeIndex) {
-  // Act
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t size, index_->GetElementsSize());
+  EXPECT_THAT(size, Eq(0));
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("foo"), IsOk());
-  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
+  EXPECT_THAT(size, Gt(0));
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t new_size, index_->GetElementsSize());
+  EXPECT_THAT(new_size, Eq(size));
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  // Assert
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, PrefixHit) {
-  // Act
   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
                                     TermMatchType::PREFIX, /*namespace_id=*/0);
-  ASSERT_THAT(edit.AddHit("fool"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  // Assert
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
 
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+TEST_F(IndexTest, PrefixHitAfterMerge) {
+  Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+                                    TermMatchType::PREFIX, /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
 }
 
 TEST_F(IndexTest, MultiPrefixHit) {
-  // Act
   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
                                     TermMatchType::PREFIX, /*namespace_id=*/0);
-  ASSERT_THAT(edit.AddHit("fool"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
                       /*namespace_id=*/0);
-  ASSERT_THAT(edit.AddHit("foo"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  // Assert
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
   EXPECT_THAT(
       GetHits(std::move(itr)),
       ElementsAre(
           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, MultiPrefixHitAfterMerge) {
+  Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+                                    TermMatchType::PREFIX, /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(
+          EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
 }
 
 TEST_F(IndexTest, NoExactHitInPrefixQuery) {
-  // Act
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
-  ASSERT_THAT(edit.AddHit("fool"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
                       /*namespace_id=*/0);
-  ASSERT_THAT(edit.AddHit("foo"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  // Assert
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId1, std::vector<SectionId>{kSectionId3})));
+}
+
+TEST_F(IndexTest, NoExactHitInPrefixQueryAfterMerge) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId1, std::vector<SectionId>{kSectionId3})));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 }
 
 TEST_F(IndexTest, PrefixHitDedupe) {
-  // Act
   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
                                     TermMatchType::PREFIX, /*namespace_id=*/0);
-  ASSERT_THAT(edit.AddHit("foo"), IsOk());
-  ASSERT_THAT(edit.AddHit("fool"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  // Assert
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, PrefixHitDedupeAfterMerge) {
+  Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+                                    TermMatchType::PREFIX, /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
 TEST_F(IndexTest, PrefixToString) {
   SectionIdMask id_mask = (1U << kSectionId2) | (1U << kSectionId3);
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("foo", id_mask, TermMatchType::PREFIX));
-  EXPECT_THAT(itr->ToString(), Eq("0000000000001100:foo*"));
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, id_mask,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
+                                  "000000000000000001100:foo* OR "
+                                  "00000000000000000000000000000000000000000000"
+                                  "00000000000000001100:foo*)"));
 
-  ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("foo", kSectionIdMaskAll,
-                                                      TermMatchType::PREFIX));
-  EXPECT_THAT(itr->ToString(), Eq("1111111111111111:foo*"));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("foo", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(itr->ToString(), Eq("(1111111111111111111111111111111111111111111"
+                                  "111111111111111111111:foo* OR "
+                                  "11111111111111111111111111111111111111111111"
+                                  "11111111111111111111:foo*)"));
 
-  ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("foo", kSectionIdMaskNone,
-                                                      TermMatchType::PREFIX));
-  EXPECT_THAT(itr->ToString(), Eq("0000000000000000:foo*"));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("foo", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskNone, TermMatchType::PREFIX));
+  EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
+                                  "000000000000000000000:foo* OR "
+                                  "00000000000000000000000000000000000000000000"
+                                  "00000000000000000000:foo*)"));
 }
 
 TEST_F(IndexTest, ExactToString) {
   SectionIdMask id_mask = (1U << kSectionId2) | (1U << kSectionId3);
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("foo", id_mask, TermMatchType::EXACT_ONLY));
-  EXPECT_THAT(itr->ToString(), Eq("0000000000001100:foo"));
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, id_mask,
+                          TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
+                                  "000000000000000001100:foo OR "
+                                  "00000000000000000000000000000000000000000000"
+                                  "00000000000000001100:foo)"));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      itr,
-      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
-  EXPECT_THAT(itr->ToString(), Eq("1111111111111111:foo"));
+      itr, index_->GetIterator("foo", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->ToString(), Eq("(1111111111111111111111111111111111111111111"
+                                  "111111111111111111111:foo OR "
+                                  "11111111111111111111111111111111111111111111"
+                                  "11111111111111111111:foo)"));
 
-  ICING_ASSERT_OK_AND_ASSIGN(itr,
-                             index_->GetIterator("foo", kSectionIdMaskNone,
-                                                 TermMatchType::EXACT_ONLY));
-  EXPECT_THAT(itr->ToString(), Eq("0000000000000000:foo"));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("foo", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskNone, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
+                                  "000000000000000000000:foo OR "
+                                  "00000000000000000000000000000000000000000000"
+                                  "00000000000000000000:foo)"));
 }
 
 TEST_F(IndexTest, NonAsciiTerms) {
   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
                                     TermMatchType::PREFIX, /*namespace_id=*/0);
-  ASSERT_THAT(edit.AddHit("こんにちは"), IsOk());
-  ASSERT_THAT(edit.AddHit("あなた"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("こんにちは"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("あなた"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("こんに", kSectionIdMaskAll, TermMatchType::PREFIX));
+      index_->GetIterator("こんに", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
 
-  ICING_ASSERT_OK_AND_ASSIGN(itr,
-                             index_->GetIterator("あなた", kSectionIdMaskAll,
-                                                 TermMatchType::EXACT_ONLY));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("あなた", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, NonAsciiTermsAfterMerge) {
+  Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+                                    TermMatchType::PREFIX, /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("こんにちは"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("あなた"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("こんに", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("あなた", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
@@ -406,24 +1224,35 @@ TEST_F(IndexTest, NonAsciiTerms) {
 
 TEST_F(IndexTest, FullIndex) {
   // Make a smaller index so that it's easier to fill up.
-  Index::Options options(index_dir_, /*index_merge_size=*/1024);
-  ICING_ASSERT_OK_AND_ASSIGN(index_, Index::Create(options, &filesystem_));
+  Index::Options options(index_dir_, /*index_merge_size=*/1024,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/64);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
   std::default_random_engine random;
-  libtextclassifier3::Status status = libtextclassifier3::Status::OK;
-  constexpr int kTokenSize = 5;
-  DocumentId document_id = 0;
   std::vector<std::string> query_terms;
+  std::string prefix = "prefix";
+  for (int i = 0; i < 2600; ++i) {
+    constexpr int kTokenSize = 5;
+    query_terms.push_back(prefix +
+                          RandomString(kAlNumAlphabet, kTokenSize, &random));
+  }
+
+  DocumentId document_id = 0;
+  libtextclassifier3::Status status = libtextclassifier3::Status::OK;
+  std::uniform_int_distribution<size_t> uniform(0u, query_terms.size() - 1);
   while (status.ok()) {
     for (int i = 0; i < 100; ++i) {
       Index::Editor edit =
-          index_->Edit(document_id, kSectionId2, TermMatchType::EXACT_ONLY,
+          index_->Edit(document_id, kSectionId2, TermMatchType::PREFIX,
                        /*namespace_id=*/0);
-      std::string term = RandomString(kAlNumAlphabet, kTokenSize, &random);
-      status = edit.AddHit(term.c_str());
-      if (i % 50 == 0) {
-        // Remember one out of every fifty terms to query for later.
-        query_terms.push_back(std::move(term));
+      size_t idx = uniform(random);
+      status = edit.BufferTerm(query_terms.at(idx).c_str());
+      if (!status.ok()) {
+        break;
       }
+      status = edit.IndexAllBufferedTerms();
       if (!status.ok()) {
         break;
       }
@@ -431,36 +1260,379 @@ TEST_F(IndexTest, FullIndex) {
     ++document_id;
   }
 
-  // Assert
   // Adding more hits should fail.
   Index::Editor edit =
-      index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY,
+      index_->Edit(document_id + 1, kSectionId2, TermMatchType::PREFIX,
                    /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("foo"),
-              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
-  EXPECT_THAT(edit.AddHit("bar"),
-              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
-  EXPECT_THAT(edit.AddHit("baz"),
+  std::string term = prefix + "foo";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  term = prefix + "bar";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  term = prefix + "baz";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
 
-  for (const std::string& term : query_terms) {
+  for (int i = 0; i < query_terms.size(); i += 25) {
     ICING_ASSERT_OK_AND_ASSIGN(
         std::unique_ptr<DocHitInfoIterator> itr,
-        index_->GetIterator(term.c_str(), kSectionIdMaskAll,
-                            TermMatchType::EXACT_ONLY));
+        index_->GetIterator(query_terms.at(i).c_str(), /*term_start_index=*/0,
+                            /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                            TermMatchType::PREFIX));
     // Each query term should contain at least one hit - there may have been
     // other hits for this term that were added.
     EXPECT_THAT(itr->Advance(), IsOk());
   }
-  EXPECT_THAT(index_->last_added_document_id(), Eq(document_id - 1));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> last_itr,
+      index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(last_itr->Advance(), IsOk());
+  EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
+}
+
+TEST_F(IndexTest, FullIndexMerge) {
+  // Make a smaller index so that it's easier to fill up.
+  Index::Options options(index_dir_, /*index_merge_size=*/1024,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/64);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+  std::default_random_engine random;
+  std::vector<std::string> query_terms;
+  std::string prefix = "prefix";
+  for (int i = 0; i < 2600; ++i) {
+    constexpr int kTokenSize = 5;
+    query_terms.push_back(prefix +
+                          RandomString(kAlNumAlphabet, kTokenSize, &random));
+  }
+
+  DocumentId document_id = 0;
+  libtextclassifier3::Status status = libtextclassifier3::Status::OK;
+  std::uniform_int_distribution<size_t> uniform(0u, query_terms.size() - 1);
+  while (status.ok()) {
+    for (int i = 0; i < 100; ++i) {
+      Index::Editor edit =
+          index_->Edit(document_id, kSectionId2, TermMatchType::PREFIX,
+                       /*namespace_id=*/0);
+      size_t idx = uniform(random);
+      status = edit.BufferTerm(query_terms.at(idx).c_str());
+      if (!status.ok()) {
+        break;
+      }
+      status = edit.IndexAllBufferedTerms();
+      if (!status.ok()) {
+        break;
+      }
+    }
+    ++document_id;
+  }
+  EXPECT_THAT(status,
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+  // Adding more hits should fail.
+  Index::Editor edit =
+      index_->Edit(document_id + 1, kSectionId2, TermMatchType::PREFIX,
+                   /*namespace_id=*/0);
+  std::string term = prefix + "foo";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  term = prefix + "bar";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  term = prefix + "baz";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> last_itr,
+      index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(last_itr->Advance(), IsOk());
+  EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
+
+  // After merging with the main index. Adding more hits should succeed now.
+  ICING_ASSERT_OK(index_->Merge());
+  edit = index_->Edit(document_id + 1, kSectionId2, TermMatchType::PREFIX, 0);
+  prefix + "foo";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  term = prefix + "bar";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  term = prefix + "baz";
+  EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator(prefix + "bar", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  // We know that "bar" should have at least one hit because we just added it!
+  EXPECT_THAT(itr->Advance(), IsOk());
+  EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(document_id + 1));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      last_itr, index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
+                                    /*unnormalized_term_length=*/0,
+                                    kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(last_itr->Advance(), IsOk());
+  EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id + 1));
+}
+
+TEST_F(IndexTest, OptimizeShouldWorkForEmptyIndex) {
+  // Optimize an empty index should succeed, but have no effects.
+  ICING_ASSERT_OK(
+      index_->Optimize(std::vector<DocumentId>(),
+                       /*new_last_added_document_id=*/kInvalidDocumentId));
+  EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("", kSectionIdMaskAll, /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0,
+                          TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("", kSectionIdMaskAll, /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IndexTest, IndexShouldWorkAtSectionLimit) {
+  std::string prefix = "prefix";
+  std::default_random_engine random;
+  std::vector<std::string> query_terms;
+  // Add 2048 hits to main index, and 2048 hits to lite index.
+  for (int i = 0; i < 4096; ++i) {
+    if (i == 1024) {
+      ICING_ASSERT_OK(index_->Merge());
+    }
+    // Generate a unique term for document i.
+    query_terms.push_back(prefix + RandomString("abcdefg", 5, &random) +
+                          std::to_string(i));
+    TermMatchType::Code term_match_type = TermMatchType::PREFIX;
+    SectionId section_id = i % 64;
+    if (section_id == 2) {
+      // Make section 2 an exact section.
+      term_match_type = TermMatchType::EXACT_ONLY;
+    }
+    Index::Editor edit = index_->Edit(/*document_id=*/i, section_id,
+                                      term_match_type, /*namespace_id=*/0);
+    ICING_ASSERT_OK(edit.BufferTerm(query_terms.at(i).c_str()));
+    ICING_ASSERT_OK(edit.IndexAllBufferedTerms());
+  }
+
+  std::vector<DocHitInfo> exp_prefix_hits;
+  for (int i = 0; i < 4096; ++i) {
+    if (i % 64 == 2) {
+      // Section 2 is an exact section, so we should not see any hits in
+      // prefix search.
+      continue;
+    }
+    exp_prefix_hits.push_back(DocHitInfo(i));
+    exp_prefix_hits.back().UpdateSection(/*section_id=*/i % 64);
+  }
+  std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end());
+
+  // Check prefix search.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<DocHitInfo> hits,
+      GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::PREFIX));
+  EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
+
+  // Check exact search.
+  for (int i = 0; i < 4096; ++i) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        hits,
+        GetHits(query_terms[i], /*term_start_index=*/0,
+                /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
+    EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
+                          i, std::vector<SectionId>{(SectionId)(i % 64)})));
+  }
+}
+
+// Skip this test on Android because of timeout.
+#if !defined(__ANDROID__)
+TEST_F(IndexTest, IndexShouldWorkAtDocumentLimit) {
+  std::string prefix = "pre";
+  std::default_random_engine random;
+  const int max_lite_index_size = 1024 * 1024 / 8;
+  int lite_index_size = 0;
+  for (int i = 0; i <= kMaxDocumentId; ++i) {
+    if (i % max_lite_index_size == 0 && i != 0) {
+      ICING_ASSERT_OK(index_->Merge());
+      lite_index_size = 0;
+    }
+    std::string term;
+    TermMatchType::Code term_match_type = TermMatchType::PREFIX;
+    SectionId section_id = i % 64;
+    if (section_id == 2) {
+      // Make section 2 an exact section.
+      term_match_type = TermMatchType::EXACT_ONLY;
+      term = std::to_string(i);
+    } else {
+      term = prefix + RandomString("abcd", 5, &random);
+    }
+    Index::Editor edit = index_->Edit(/*document_id=*/i, section_id,
+                                      term_match_type, /*namespace_id=*/0);
+    ICING_ASSERT_OK(edit.BufferTerm(term.c_str()));
+    ICING_ASSERT_OK(edit.IndexAllBufferedTerms());
+    ++lite_index_size;
+    index_->set_last_added_document_id(i);
+  }
+  // Ensure that the lite index still contains some data to better test both
+  // indexes.
+  ASSERT_THAT(lite_index_size, Eq(max_lite_index_size - 1));
+  EXPECT_EQ(index_->last_added_document_id(), kMaxDocumentId);
+
+  std::vector<DocHitInfo> exp_prefix_hits;
+  for (int i = 0; i <= kMaxDocumentId; ++i) {
+    if (i % 64 == 2) {
+      // Section 2 is an exact section, so we should not see any hits in
+      // prefix search.
+      continue;
+    }
+    exp_prefix_hits.push_back(DocHitInfo(i));
+    exp_prefix_hits.back().UpdateSection(/*section_id=*/i % 64);
+  }
+  std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end());
+
+  // Check prefix search.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<DocHitInfo> hits,
+      GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::PREFIX));
+  EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
+
+  // Check exact search.
+  for (int i = 0; i <= kMaxDocumentId; ++i) {
+    if (i % 64 == 2) {
+      // Only section 2 is an exact section
+      ICING_ASSERT_OK_AND_ASSIGN(
+          hits,
+          GetHits(std::to_string(i), /*term_start_index=*/0,
+                  /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
+      EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
+                            i, std::vector<SectionId>{(SectionId)(2)})));
+    }
+  }
+}
+#endif  // if !defined(__ANDROID__)
+
+TEST_F(IndexTest, IndexOptimize) {
+  std::string prefix = "prefix";
+  std::default_random_engine random;
+  std::vector<std::string> query_terms;
+  // Add 1024 hits to main index, and 1024 hits to lite index.
+  for (int i = 0; i < 2048; ++i) {
+    if (i == 1024) {
+      ICING_ASSERT_OK(index_->Merge());
+    }
+    // Generate a unique term for document i.
+    query_terms.push_back(prefix + RandomString("abcdefg", 5, &random) +
+                          std::to_string(i));
+    TermMatchType::Code term_match_type = TermMatchType::PREFIX;
+    SectionId section_id = i % 64;
+    if (section_id == 2) {
+      // Make section 2 an exact section.
+      term_match_type = TermMatchType::EXACT_ONLY;
+    }
+    Index::Editor edit = index_->Edit(/*document_id=*/i, section_id,
+                                      term_match_type, /*namespace_id=*/0);
+    ICING_ASSERT_OK(edit.BufferTerm(query_terms.at(i).c_str()));
+    ICING_ASSERT_OK(edit.IndexAllBufferedTerms());
+    index_->set_last_added_document_id(i);
+  }
+
+  // Delete one document for every three documents.
+  DocumentId document_id = 0;
+  DocumentId new_last_added_document_id = kInvalidDocumentId;
+  std::vector<DocumentId> document_id_old_to_new;
+  for (int i = 0; i < 2048; ++i) {
+    if (i % 3 == 0) {
+      document_id_old_to_new.push_back(kInvalidDocumentId);
+    } else {
+      new_last_added_document_id = document_id++;
+      document_id_old_to_new.push_back(new_last_added_document_id);
+    }
+  }
+
+  std::vector<DocHitInfo> exp_prefix_hits;
+  for (int i = 0; i < 2048; ++i) {
+    if (document_id_old_to_new[i] == kInvalidDocumentId) {
+      continue;
+    }
+    if (i % 64 == 2) {
+      // Section 2 is an exact section, so we should not see any hits in
+      // prefix search.
+      continue;
+    }
+    exp_prefix_hits.push_back(DocHitInfo(document_id_old_to_new[i]));
+    exp_prefix_hits.back().UpdateSection(/*section_id=*/i % 64);
+  }
+  std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end());
+
+  // Check that optimize is correct
+  ICING_ASSERT_OK(
+      index_->Optimize(document_id_old_to_new, new_last_added_document_id));
+  EXPECT_EQ(index_->last_added_document_id(), new_last_added_document_id);
+  // Check prefix search.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<DocHitInfo> hits,
+      GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
+              TermMatchType::PREFIX));
+  EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
+  // Check exact search.
+  for (int i = 0; i < 2048; ++i) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        hits,
+        GetHits(query_terms[i], /*term_start_index=*/0,
+                /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
+    if (document_id_old_to_new[i] == kInvalidDocumentId) {
+      EXPECT_THAT(hits, IsEmpty());
+    } else {
+      EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
+                            document_id_old_to_new[i],
+                            std::vector<SectionId>{(SectionId)(i % 64)})));
+    }
+  }
+
+  // Check that optimize does not block merge.
+  ICING_ASSERT_OK(index_->Merge());
+  EXPECT_EQ(index_->last_added_document_id(), new_last_added_document_id);
+  // Check prefix search.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      hits, GetHits(prefix, /*term_start_index=*/0,
+                    /*unnormalized_term_length=*/0, TermMatchType::PREFIX));
+  EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
+  // Check exact search.
+  for (int i = 0; i < 2048; ++i) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        hits,
+        GetHits(query_terms[i], /*term_start_index=*/0,
+                /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
+    if (document_id_old_to_new[i] == kInvalidDocumentId) {
+      EXPECT_THAT(hits, IsEmpty());
+    } else {
+      EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
+                            document_id_old_to_new[i],
+                            std::vector<SectionId>{(SectionId)(i % 64)})));
+    }
+  }
 }
 
 TEST_F(IndexTest, IndexCreateIOFailure) {
   // Create the index with mock filesystem. By default, Mock will return false,
   // so the first attempted file operation will fail.
-  NiceMock<IcingMockFilesystem> mock_filesystem;
-  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
-  EXPECT_THAT(Index::Create(options, &mock_filesystem),
+  NiceMock<IcingMockFilesystem> mock_icing_filesystem;
+  ON_CALL(mock_icing_filesystem, CreateDirectoryRecursively)
+      .WillByDefault(Return(false));
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/1024 * 8);
+  EXPECT_THAT(Index::Create(options, &filesystem_, &mock_icing_filesystem),
               StatusIs(libtextclassifier3::StatusCode::INTERNAL));
 }
 
@@ -468,28 +1640,32 @@ TEST_F(IndexTest, IndexCreateCorruptionFailure) {
   // Add some content to the index
   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
                                     TermMatchType::PREFIX, /*namespace_id=*/0);
-  ASSERT_THAT(edit.AddHit("foo"), IsOk());
-  ASSERT_THAT(edit.AddHit("bar"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   // Close the index.
   index_.reset();
 
   // Corrrupt the index file.
   std::string hit_buffer_filename = index_dir_ + "/idx/lite.hb";
-  ScopedFd sfd(filesystem_.OpenForWrite(hit_buffer_filename.c_str()));
+  ScopedFd sfd(icing_filesystem_.OpenForWrite(hit_buffer_filename.c_str()));
   ASSERT_THAT(sfd.is_valid(), IsTrue());
 
   constexpr std::string_view kCorruptBytes = "ffffffffffffffffffffff";
   // The first page of the hit_buffer is taken up by the header. Overwrite the
   // first page of content.
-  constexpr int kHitBufferStartOffset = 4096;
-  ASSERT_THAT(filesystem_.PWrite(sfd.get(), kHitBufferStartOffset,
-                                 kCorruptBytes.data(), kCorruptBytes.length()),
-              IsTrue());
+  int hit_buffer_start_offset = GetBlockSize();
+  ASSERT_THAT(
+      icing_filesystem_.PWrite(sfd.get(), hit_buffer_start_offset,
+                               kCorruptBytes.data(), kCorruptBytes.length()),
+      IsTrue());
 
   // Recreate the index.
-  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
-  EXPECT_THAT(Index::Create(options, &filesystem_),
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/1024 * 8);
+  EXPECT_THAT(Index::Create(options, &filesystem_, &icing_filesystem_),
               StatusIs(libtextclassifier3::StatusCode::DATA_LOSS));
 }
 
@@ -497,217 +1673,1070 @@ TEST_F(IndexTest, IndexPersistence) {
   // Add some content to the index
   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
                                     TermMatchType::PREFIX, /*namespace_id=*/0);
-  ASSERT_THAT(edit.AddHit("foo"), IsOk());
-  ASSERT_THAT(edit.AddHit("bar"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
   EXPECT_THAT(index_->PersistToDisk(), IsOk());
 
   // Close the index.
   index_.reset();
 
   // Recreate the index.
-  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
-  ICING_ASSERT_OK_AND_ASSIGN(index_, Index::Create(options, &filesystem_));
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/1024 * 8);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index_, Index::Create(options, &filesystem_, &icing_filesystem_));
 
   // Check that the hits are present.
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<DocHitInfoIterator> itr,
-      index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+      index_->GetIterator("f", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
   EXPECT_THAT(GetHits(std::move(itr)),
               ElementsAre(EqualsDocHitInfo(
                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
-  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 }
 
-TEST_F(IndexTest, InvalidHitBufferSize) {
-  Index::Options options(
-      index_dir_, /*index_merge_size=*/std::numeric_limits<uint32_t>::max());
-  EXPECT_THAT(Index::Create(options, &filesystem_),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
-TEST_F(IndexTest, ComputeChecksumSameBetweenCalls) {
-  // Add some content to the index.
-  Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
-                                    TermMatchType::PREFIX, /*namespace_id=*/0);
-  ASSERT_THAT(edit.AddHit("foo"), IsOk());
-
-  Crc32 checksum = index_->ComputeChecksum();
-  // Calling it again shouldn't change the checksum
-  EXPECT_THAT(index_->ComputeChecksum(), Eq(checksum));
-}
-
-TEST_F(IndexTest, ComputeChecksumSameAcrossInstances) {
-  // Add some content to the index.
+TEST_F(IndexTest, IndexPersistenceAfterMerge) {
+  // Add some content to the index
   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
                                     TermMatchType::PREFIX, /*namespace_id=*/0);
-  ASSERT_THAT(edit.AddHit("foo"), IsOk());
-
-  Crc32 checksum = index_->ComputeChecksum();
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  ICING_ASSERT_OK(index_->Merge());
+  EXPECT_THAT(index_->PersistToDisk(), IsOk());
 
-  // Recreate the index, checksum should still be the same across instances
+  // Close the index.
   index_.reset();
-  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
-  ICING_ASSERT_OK_AND_ASSIGN(index_, Index::Create(options, &filesystem_));
 
-  EXPECT_THAT(index_->ComputeChecksum(), Eq(checksum));
-}
-
-TEST_F(IndexTest, ComputeChecksumChangesOnModification) {
-  // Add some content to the index.
-  Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
-                                    TermMatchType::PREFIX, /*namespace_id=*/0);
-  ASSERT_THAT(edit.AddHit("foo"), IsOk());
-
-  Crc32 checksum = index_->ComputeChecksum();
+  // Recreate the index.
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/1024 * 8);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index_, Index::Create(options, &filesystem_, &icing_filesystem_));
 
-  // Modifying the index changes the checksum;
-  EXPECT_THAT(edit.AddHit("bar"), IsOk());
+  // Check that the hits are present.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("f", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
 
-  EXPECT_THAT(index_->ComputeChecksum(), Not(Eq(checksum)));
+TEST_F(IndexTest, InvalidHitBufferSize) {
+  Index::Options options(
+      index_dir_, /*index_merge_size=*/std::numeric_limits<uint32_t>::max(),
+      /*lite_index_sort_at_indexing=*/true, /*lite_index_sort_size=*/1024 * 8);
+  EXPECT_THAT(Index::Create(options, &filesystem_, &icing_filesystem_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
 TEST_F(IndexTest, FindTermByPrefixShouldReturnEmpty) {
   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
                                     TermMatchType::PREFIX, /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("fool"), IsOk());
+  AlwaysTrueSuggestionResultCheckerImpl impl;
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/0),
-              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"foo", /*num_to_return=*/0, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"foo", /*num_to_return=*/-1, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(IsEmpty()));
+
+  ICING_ASSERT_OK(index_->Merge());
 
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/-1),
-              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"foo", /*num_to_return=*/0, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"foo", /*num_to_return=*/-1, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(IsEmpty()));
 }
 
 TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectResult) {
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("foo"), IsOk());
-  EXPECT_THAT(edit.AddHit("bar"), IsOk());
+  AlwaysTrueSuggestionResultCheckerImpl impl;
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  // "b" should only match "bar" but not "foo".
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"b", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("bar", 1))));
+
+  ICING_ASSERT_OK(index_->Merge());
 
   // "b" should only match "bar" but not "foo".
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"b", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/10),
-              IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("bar", 1))));
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"b", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("bar", 1))));
 }
 
 TEST_F(IndexTest, FindTermByPrefixShouldRespectNumToReturn) {
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("fo"), IsOk());
-  EXPECT_THAT(edit.AddHit("foo"), IsOk());
-  EXPECT_THAT(edit.AddHit("fool"), IsOk());
+  AlwaysTrueSuggestionResultCheckerImpl impl;
+  EXPECT_THAT(edit.BufferTerm("fo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   // We have 3 results but only 2 should be returned.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/2),
-              IsOkAndHolds(SizeIs(2)));
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/2, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(SizeIs(2)));
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  // We have 3 results but only 2 should be returned.
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/2, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(SizeIs(2)));
 }
 
-TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInOneNamespace) {
+TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInAllNamespaces) {
   Index::Editor edit1 =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
-  EXPECT_THAT(edit1.AddHit("fo"), IsOk());
-  EXPECT_THAT(edit1.AddHit("foo"), IsOk());
+  AlwaysTrueSuggestionResultCheckerImpl impl;
+  EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
+  EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
 
   Index::Editor edit2 =
       index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/1);
-  EXPECT_THAT(edit2.AddHit("fool"), IsOk());
+  EXPECT_THAT(edit2.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
 
-  // namespace with id 0 has 2 results.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/10),
-              IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
-                                                EqualsTermMetadata("foo", 1))));
+  Index::Editor edit3 =
+      index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/2);
+  EXPECT_THAT(edit3.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
 
-  // namespace with id 1 has 1 result.
+  // Should return "fo", "foo" and "fool" across all namespaces.
   EXPECT_THAT(
-      index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{1},
-                                /*num_to_return=*/10),
-      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fool", 1))));
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
+                                        EqualsTermMetadata("foo", 1),
+                                        EqualsTermMetadata("fool", 1))));
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  // Should return "fo", "foo" and "fool" across all namespaces.
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
+                                        EqualsTermMetadata("foo", 1),
+                                        EqualsTermMetadata("fool", 1))));
 }
 
-TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInMultipleNamespaces) {
+TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectHitCount) {
   Index::Editor edit1 =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
-  EXPECT_THAT(edit1.AddHit("fo"), IsOk());
+  AlwaysTrueSuggestionResultCheckerImpl impl;
+  EXPECT_THAT(edit1.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit1.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
 
   Index::Editor edit2 =
       index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/1);
-  EXPECT_THAT(edit2.AddHit("foo"), IsOk());
+                   /*namespace_id=*/0);
+  EXPECT_THAT(edit2.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
 
-  Index::Editor edit3 =
-      index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/2);
-  EXPECT_THAT(edit3.AddHit("fool"), IsOk());
+  // 'foo' has 1 hit, 'fool' has 2 hits.
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
+                               EqualsTermMetadata("foo", 1))));
+
+  ICING_ASSERT_OK(index_->Merge());
 
-  // Should return "foo" and "fool" which are in namespaces with ids 1 and 2.
   EXPECT_THAT(
-      index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{1, 2},
-                                /*num_to_return=*/10),
-      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
-                                        EqualsTermMetadata("fool", 1))));
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
+                               EqualsTermMetadata("foo", 1))));
 }
 
-TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInAllNamespaces) {
+TEST_F(IndexTest, FindTermByPrefixMultipleHitBatch) {
+  AlwaysTrueSuggestionResultCheckerImpl impl;
+  // Create multiple hit batches.
+  for (int i = 0; i < 4000; i++) {
+    Index::Editor edit = index_->Edit(i, kSectionId2, TermMatchType::EXACT_ONLY,
+                                      /*namespace_id=*/0);
+    EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+    EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  }
+
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 4000))));
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 4000))));
+}
+
+TEST_F(IndexTest, FindTermByPrefixShouldReturnInOrder) {
+  // Push 6 term-six, 5 term-five, 4 term-four, 3 term-three, 2 term-two and one
+  // term-one into lite index.
   Index::Editor edit1 =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
-  EXPECT_THAT(edit1.AddHit("fo"), IsOk());
+  AlwaysTrueSuggestionResultCheckerImpl impl;
+  EXPECT_THAT(edit1.BufferTerm("term-one"), IsOk());
+  EXPECT_THAT(edit1.BufferTerm("term-two"), IsOk());
+  EXPECT_THAT(edit1.BufferTerm("term-three"), IsOk());
+  EXPECT_THAT(edit1.BufferTerm("term-four"), IsOk());
+  EXPECT_THAT(edit1.BufferTerm("term-five"), IsOk());
+  EXPECT_THAT(edit1.BufferTerm("term-six"), IsOk());
+  EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
 
   Index::Editor edit2 =
-      index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/1);
-  EXPECT_THAT(edit2.AddHit("foo"), IsOk());
+      index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/0);
+  EXPECT_THAT(edit2.BufferTerm("term-two"), IsOk());
+  EXPECT_THAT(edit2.BufferTerm("term-three"), IsOk());
+  EXPECT_THAT(edit2.BufferTerm("term-four"), IsOk());
+  EXPECT_THAT(edit2.BufferTerm("term-five"), IsOk());
+  EXPECT_THAT(edit2.BufferTerm("term-six"), IsOk());
+  EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
 
   Index::Editor edit3 =
-      index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/2);
-  EXPECT_THAT(edit3.AddHit("fool"), IsOk());
+      index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/0);
+  EXPECT_THAT(edit3.BufferTerm("term-three"), IsOk());
+  EXPECT_THAT(edit3.BufferTerm("term-four"), IsOk());
+  EXPECT_THAT(edit3.BufferTerm("term-five"), IsOk());
+  EXPECT_THAT(edit3.BufferTerm("term-six"), IsOk());
+  EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
+
+  Index::Editor edit4 =
+      index_->Edit(kDocumentId4, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/0);
+  EXPECT_THAT(edit4.BufferTerm("term-four"), IsOk());
+  EXPECT_THAT(edit4.BufferTerm("term-five"), IsOk());
+  EXPECT_THAT(edit4.BufferTerm("term-six"), IsOk());
+  EXPECT_THAT(edit4.IndexAllBufferedTerms(), IsOk());
 
-  // Should return "fo", "foo" and "fool" across all namespaces.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{},
-                                        /*num_to_return=*/10),
-              IsOkAndHolds(UnorderedElementsAre(
-                  EqualsTermMetadata("fo", 1), EqualsTermMetadata("foo", 1),
-                  EqualsTermMetadata("fool", 1))));
+  Index::Editor edit5 =
+      index_->Edit(kDocumentId5, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/0);
+  EXPECT_THAT(edit5.BufferTerm("term-five"), IsOk());
+  EXPECT_THAT(edit5.BufferTerm("term-six"), IsOk());
+  EXPECT_THAT(edit5.IndexAllBufferedTerms(), IsOk());
+
+  Index::Editor edit6 =
+      index_->Edit(kDocumentId6, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/0);
+  EXPECT_THAT(edit6.BufferTerm("term-six"), IsOk());
+  EXPECT_THAT(edit6.IndexAllBufferedTerms(), IsOk());
+
+  // verify the order in lite index is correct.
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"t", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-six", 6),
+                               EqualsTermMetadata("term-five", 5),
+                               EqualsTermMetadata("term-four", 4),
+                               EqualsTermMetadata("term-three", 3),
+                               EqualsTermMetadata("term-two", 2),
+                               EqualsTermMetadata("term-one", 1))));
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"t", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-six", 6),
+                               EqualsTermMetadata("term-five", 5),
+                               EqualsTermMetadata("term-four", 4),
+                               EqualsTermMetadata("term-three", 3),
+                               EqualsTermMetadata("term-two", 2),
+                               EqualsTermMetadata("term-one", 1))));
+
+  // keep push terms to the lite index. We will add 2 document to term-five,
+  // term-three and term-one. The output order should be 5-6-3-4-1-2.
+  Index::Editor edit7 =
+      index_->Edit(kDocumentId7, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/0);
+  EXPECT_THAT(edit7.BufferTerm("term-one"), IsOk());
+  EXPECT_THAT(edit7.BufferTerm("term-three"), IsOk());
+  EXPECT_THAT(edit7.BufferTerm("term-five"), IsOk());
+  EXPECT_THAT(edit7.IndexAllBufferedTerms(), IsOk());
+
+  Index::Editor edit8 =
+      index_->Edit(kDocumentId8, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/0);
+  EXPECT_THAT(edit8.BufferTerm("term-one"), IsOk());
+  EXPECT_THAT(edit8.BufferTerm("term-three"), IsOk());
+  EXPECT_THAT(edit8.BufferTerm("term-five"), IsOk());
+  EXPECT_THAT(edit8.IndexAllBufferedTerms(), IsOk());
+
+  // verify the combination of lite index and main index is in correct order.
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"t", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(ElementsAre(
+          EqualsTermMetadata("term-five", 7), EqualsTermMetadata("term-six", 6),
+          EqualsTermMetadata("term-three", 5),
+          EqualsTermMetadata("term-four", 4), EqualsTermMetadata("term-one", 3),
+          EqualsTermMetadata("term-two", 2))));
+
+  // Get the first three terms.
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"t", /*num_to_return=*/3, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-five", 7),
+                               EqualsTermMetadata("term-six", 6),
+                               EqualsTermMetadata("term-three", 5))));
 }
 
-TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectHitCount) {
+TEST_F(IndexTest, FindTermByPrefix_InTermMatchTypePrefix_ShouldReturnInOrder) {
   Index::Editor edit1 =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::PREFIX,
+                   /*namespace_id=*/0);
+  AlwaysTrueSuggestionResultCheckerImpl impl;
+  EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
+  EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
+
+  Index::Editor edit2 =
+      index_->Edit(kDocumentId2, kSectionId2, TermMatchType::PREFIX,
+                   /*namespace_id=*/0);
+  EXPECT_THAT(edit2.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
+
+  Index::Editor edit3 =
+      index_->Edit(kDocumentId3, kSectionId2, TermMatchType::PREFIX,
+                   /*namespace_id=*/0);
+  EXPECT_THAT(edit3.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
+  // verify the order in pls is correct
+  // "fo"    { {doc0, exact_hit}, {doc1, prefix_hit}, {doc2, prefix_hit} }
+  // "foo"   { {doc1, exact_hit}, {doc2, prefix_hit} }
+  // "fool"  { {doc2, exact_hit} }
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f",
+          /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(ElementsAre(EqualsTermMetadata("fo", 3),
+                               EqualsTermMetadata("foo", 2),
+                               EqualsTermMetadata("fool", 1))));
+  // Find by exact only, all terms should be equally.
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
+                                        EqualsTermMetadata("foo", 1),
+                                        EqualsTermMetadata("fool", 1))));
+}
+
+TEST_F(IndexTest, FindTermByPrefixShouldReturnHitCountForMain) {
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/0);
+  AlwaysTrueSuggestionResultCheckerImpl impl;
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId4, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId5, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId6, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId7, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  // 'foo' has 1 hit, 'fool' has 8 hits.
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 8),
+                               EqualsTermMetadata("foo", 1))));
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
+                                        EqualsTermMetadata("fool", 8))));
+}
+
+TEST_F(IndexTest, FindTermByPrefixShouldReturnCombinedHitCount) {
+  Index::Editor edit =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
-  EXPECT_THAT(edit1.AddHit("foo"), IsOk());
-  EXPECT_THAT(edit1.AddHit("fool"), IsOk());
+  AlwaysTrueSuggestionResultCheckerImpl impl;
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
+                               EqualsTermMetadata("foo", 1))));
+}
+
+TEST_F(IndexTest, FindTermRankComparison) {
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/0);
+  AlwaysTrueSuggestionResultCheckerImpl impl;
+  EXPECT_THAT(edit.BufferTerm("fo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("fo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   Index::Editor edit2 =
-      index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+      index_->Edit(kDocumentId2, kSectionId2, TermMatchType::PREFIX,
                    /*namespace_id=*/0);
-  EXPECT_THAT(edit2.AddHit("fool"), IsOk());
+  EXPECT_THAT(edit2.BufferTerm("fo"), IsOk());
+  EXPECT_THAT(edit2.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
 
-  // 'foo' has 1 hit, 'fool' has 2 hits.
   EXPECT_THAT(
-      index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
-                                /*num_to_return=*/10),
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY,
+          &impl),
+      IsOkAndHolds(ElementsAre(EqualsTermMetadata("fo", 3),
+                               EqualsTermMetadata("foo", 2),
+                               EqualsTermMetadata("fool", 1))));
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 2),
+                                        EqualsTermMetadata("foo", 2),
+                                        EqualsTermMetadata("fool", 1))));
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE, &impl),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
+                                        EqualsTermMetadata("foo", 1),
+                                        EqualsTermMetadata("fool", 1))));
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY,
+          &impl),
+      IsOkAndHolds(ElementsAre(EqualsTermMetadata("fo", 3),
+                               EqualsTermMetadata("foo", 2),
+                               EqualsTermMetadata("fool", 1))));
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 2),
+                                        EqualsTermMetadata("foo", 2),
+                                        EqualsTermMetadata("fool", 1))));
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE, &impl),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
+                                        EqualsTermMetadata("foo", 1),
+                                        EqualsTermMetadata("fool", 1))));
+}
+
+TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsFromBothIndices) {
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
+                   /*namespace_id=*/0);
+  AlwaysTrueSuggestionResultCheckerImpl impl;
+
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  // 'foo' has 1 hit in the main index, 'fool' has 1 hit in the lite index.
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(
+          /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          &impl),
       IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
-                                        EqualsTermMetadata("fool", 2))));
+                                        EqualsTermMetadata("fool", 1))));
 }
 
 TEST_F(IndexTest, GetElementsSize) {
   // Check empty index.
-  EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(Eq(0)));
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t size, index_->GetElementsSize());
+  EXPECT_THAT(size, Eq(0));
 
   // Add an element.
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
-  EXPECT_THAT(edit.AddHit("foo"), IsOk());
-  EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(Gt(0)));
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
+  EXPECT_THAT(size, Gt(0));
+
+  ASSERT_THAT(index_->Merge(), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
+  EXPECT_THAT(size, Gt(0));
+}
+
+TEST_F(IndexTest, ExactResultsFromLiteAndMain) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foot"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  ICING_ASSERT_OK(index_->Merge());
+
+  edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("footer"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(
+          EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, PrefixResultsFromLiteAndMain) {
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foot"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  ICING_ASSERT_OK(index_->Merge());
+
+  edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("footer"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(
+          EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, GetDebugInfo) {
+  // Add two documents to the lite index, merge them into the main index and
+  // then add another doc to the lite index.
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  index_->set_last_added_document_id(kDocumentId1);
+  ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  ICING_ASSERT_OK(index_->Merge());
+
+  edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  index_->set_last_added_document_id(kDocumentId2);
+  ASSERT_THAT(edit.BufferTerm("footer"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  IndexDebugInfoProto out0 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC);
+  ICING_LOG(DBG) << "main_index_info:\n" << out0.main_index_info();
+  ICING_LOG(DBG) << "lite_index_info:\n" << out0.lite_index_info();
+  EXPECT_THAT(out0.main_index_info(), Not(IsEmpty()));
+  EXPECT_THAT(out0.lite_index_info(), Not(IsEmpty()));
+
+  IndexDebugInfoProto out1 = index_->GetDebugInfo(DebugInfoVerbosity::DETAILED);
+  ICING_LOG(DBG) << "main_index_info:\n" << out1.main_index_info();
+  ICING_LOG(DBG) << "lite_index_info:\n" << out1.lite_index_info();
+  EXPECT_THAT(out1.main_index_info(),
+              SizeIs(Gt(out0.main_index_info().size())));
+  EXPECT_THAT(out1.lite_index_info(),
+              SizeIs(Gt(out0.lite_index_info().size())));
+
+  // Add one more doc to the lite index. Debug strings should change.
+  edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  index_->set_last_added_document_id(kDocumentId3);
+  ASSERT_THAT(edit.BufferTerm("far"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  IndexDebugInfoProto out2 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC);
+  ICING_LOG(DBG) << "main_index_info:\n" << out2.main_index_info();
+  ICING_LOG(DBG) << "lite_index_info:\n" << out2.lite_index_info();
+  EXPECT_THAT(out2.main_index_info(), Not(IsEmpty()));
+  EXPECT_THAT(out2.lite_index_info(), Not(IsEmpty()));
+  EXPECT_THAT(out2.main_index_info(), StrEq(out0.main_index_info()));
+  EXPECT_THAT(out2.lite_index_info(), StrNe(out0.lite_index_info()));
+
+  // Merge into the man index. Debug strings should change again.
+  ICING_ASSERT_OK(index_->Merge());
+
+  IndexDebugInfoProto out3 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC);
+  EXPECT_TRUE(out3.has_index_storage_info());
+  ICING_LOG(DBG) << "main_index_info:\n" << out3.main_index_info();
+  ICING_LOG(DBG) << "lite_index_info:\n" << out3.lite_index_info();
+  EXPECT_THAT(out3.main_index_info(), Not(IsEmpty()));
+  EXPECT_THAT(out3.lite_index_info(), Not(IsEmpty()));
+  EXPECT_THAT(out3.main_index_info(), StrNe(out2.main_index_info()));
+  EXPECT_THAT(out3.lite_index_info(), StrNe(out2.lite_index_info()));
+}
+
+TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) {
+  // Add two documents to the lite index, merge them into the main index and
+  // then add another doc to the lite index.
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId0, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  // After this merge the index should have posting lists for
+  // "fool" {(doc0,sec3)},
+  // "foot" {(doc1,sec3)},
+  // "foo"  {(doc1,sec3),(doc0,sec3),(doc0,sec2)}
+  ICING_ASSERT_OK(index_->Merge());
+
+  // Add one more doc to the lite index.
+  edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("far"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  // After this merge the index should add a posting list for "far" and a
+  // backfill branch point for "f". In addition to the posting lists described
+  // above, which are unaffected, the new posting lists should be
+  // "far" {(doc2,sec2)},
+  // "f"   {(doc1,sec3),(doc0,sec3)}
+  // Multiple pre-existing hits should be added to the new backfill branch
+  // point.
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("f", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(
+          EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId3})));
+}
+
+TEST_F(IndexTest, BackfillingNewTermsSucceeds) {
+  // Add two documents to the lite index, merge them into the main index and
+  // then add another doc to the lite index.
+  Index::Editor edit = index_->Edit(
+      kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  // After this merge the index should have posting lists for
+  // "fool" {(doc0,sec2)},
+  // "foot" {(doc1,sec3)},
+  // "foo"  {(doc1,sec3),(doc0,sec2)}
+  ICING_ASSERT_OK(index_->Merge());
+
+  edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("footer"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  // Add one more doc to the lite index. Debug strings should change.
+  edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("far"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  // After this merge the index should add posting lists for "far" and "footer"
+  // and a backfill branch point for "f". The new posting lists should be
+  // "fool"    {(doc0,sec2)},
+  // "foot"    {(doc1,sec3)},
+  // "foo"     {(doc2,sec3),(doc1,sec3),(doc0,sec2)}
+  // "footer"  {(doc2,sec2)},
+  // "far"     {(doc3,sec2)},
+  // "f"       {(doc2,sec3),(doc1,sec3)}
+  // Multiple pre-existing hits should be added to the new backfill branch
+  // point.
+  ICING_ASSERT_OK(index_->Merge());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("f", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(
+          EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3})));
+}
+
+TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
+  ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
+  EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(0));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("f", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+
+  // Add one document to the lite index
+  Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+                                    TermMatchType::PREFIX, /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  // Clipping to invalid should have no effect.
+  ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("f", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+  // Clipping to invalid should still have no effect even if hits are in main.
+  ICING_ASSERT_OK(index_->Merge());
+  ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("f", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+  // Clipping to invalid should still have no effect even if both indices have
+  // hits.
+  ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("f", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(
+          EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
+  ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
+  EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(0));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("f", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+
+  // Add one document to the lite index
+  Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+                                    TermMatchType::PREFIX, /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId0);
+  ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
+  // Clipping to invalid should have no effect.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("f", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+  // Clipping to invalid should still have no effect even if hits are in main.
+  ICING_ASSERT_OK(index_->Merge());
+  ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("f", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId1);
+
+  // Clipping to invalid should still have no effect even if both indices have
+  // hits.
+  ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("f", /*term_start_index=*/0,
+                               /*unnormalized_term_length=*/0,
+                               kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(
+          EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, TruncateToThrowsOutLiteIndex) {
+  // Add one document to the lite index and merge it into main.
+  Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+                                    TermMatchType::PREFIX, /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId0);
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  // Add another document to the lite index.
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId1);
+
+  EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
+
+  // Clipping to document 0 should toss out the lite index, but keep the main.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("f", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, TruncateToThrowsOutBothIndices) {
+  // Add two documents to the lite index and merge them into main.
+  Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+                                    TermMatchType::PREFIX, /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId0);
+  edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foul"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId1);
+
+  ICING_ASSERT_OK(index_->Merge());
+
+  // Add another document to the lite index.
+  edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
+                      /*namespace_id=*/0);
+  ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+  EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+  index_->set_last_added_document_id(kDocumentId2);
+
+  EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
+
+  // Clipping to document 0 should toss out both indices.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("f", /*term_start_index=*/0,
+                          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                          TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IndexTest, IndexStorageInfoProto) {
+  // Add two documents to the lite index and merge them into main.
+  {
+    Index::Editor edit = index_->Edit(
+        kDocumentId0, kSectionId2, TermMatchType::PREFIX, /*namespace_id=*/0);
+    ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+    EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+    edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::PREFIX,
+                        /*namespace_id=*/0);
+    ASSERT_THAT(edit.BufferTerm("foul"), IsOk());
+    EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+    ICING_ASSERT_OK(index_->Merge());
+  }
+
+  IndexStorageInfoProto storage_info = index_->GetStorageInfo();
+  EXPECT_THAT(storage_info.index_size(), Ge(0));
+  EXPECT_THAT(storage_info.lite_index_lexicon_size(), Ge(0));
+  EXPECT_THAT(storage_info.lite_index_hit_buffer_size(), Ge(0));
+  EXPECT_THAT(storage_info.main_index_lexicon_size(), Ge(0));
+  EXPECT_THAT(storage_info.main_index_storage_size(), Ge(0));
+  EXPECT_THAT(storage_info.main_index_block_size(), Ge(0));
+  // There should be 1 block for the header and 1 block for two posting lists.
+  EXPECT_THAT(storage_info.num_blocks(), Eq(2));
+  EXPECT_THAT(storage_info.min_free_fraction(), Ge(0));
 }
 
 }  // namespace
diff --git a/icing/index/integer-section-indexing-handler.cc b/icing/index/integer-section-indexing-handler.cc
new file mode 100644
index 0000000..63b09df
--- /dev/null
+++ b/icing/index/integer-section-indexing-handler.cc
@@ -0,0 +1,112 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/integer-section-indexing-handler.h"
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<IntegerSectionIndexingHandler>>
+IntegerSectionIndexingHandler::Create(const Clock* clock,
+                                      NumericIndex<int64_t>* integer_index) {
+  ICING_RETURN_ERROR_IF_NULL(clock);
+  ICING_RETURN_ERROR_IF_NULL(integer_index);
+
+  return std::unique_ptr<IntegerSectionIndexingHandler>(
+      new IntegerSectionIndexingHandler(clock, integer_index));
+}
+
+libtextclassifier3::Status IntegerSectionIndexingHandler::Handle(
+    const TokenizedDocument& tokenized_document, DocumentId document_id,
+    bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
+  std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
+
+  if (!IsDocumentIdValid(document_id)) {
+    return absl_ports::InvalidArgumentError(
+        IcingStringUtil::StringPrintf("Invalid DocumentId %d", document_id));
+  }
+
+  if (integer_index_.last_added_document_id() != kInvalidDocumentId &&
+      document_id <= integer_index_.last_added_document_id()) {
+    if (recovery_mode) {
+      // Skip the document if document_id <= last_added_document_id in recovery
+      // mode without returning an error.
+      return libtextclassifier3::Status::OK;
+    }
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "DocumentId %d must be greater than last added document_id %d",
+        document_id, integer_index_.last_added_document_id()));
+  }
+  integer_index_.set_last_added_document_id(document_id);
+
+  libtextclassifier3::Status status;
+  // We have to add integer sections into integer index in reverse order because
+  // sections are sorted by SectionId in ascending order, but BasicHit should be
+  // added in descending order of SectionId (posting list requirement).
+  for (auto riter = tokenized_document.integer_sections().rbegin();
+       riter != tokenized_document.integer_sections().rend(); ++riter) {
+    const Section<int64_t>& section = *riter;
+    std::unique_ptr<NumericIndex<int64_t>::Editor> editor = integer_index_.Edit(
+        section.metadata.path, document_id, section.metadata.id);
+
+    for (int64_t key : section.content) {
+      status = editor->BufferKey(key);
+      if (!status.ok()) {
+        ICING_LOG(WARNING)
+            << "Failed to buffer keys into integer index due to: "
+            << status.error_message();
+        break;
+      }
+    }
+    if (!status.ok()) {
+      break;
+    }
+
+    // Add all the seen keys to the integer index.
+    status = std::move(*editor).IndexAllBufferedKeys();
+    if (!status.ok()) {
+      ICING_LOG(WARNING) << "Failed to add keys into integer index due to: "
+                         << status.error_message();
+      break;
+    }
+  }
+
+  if (put_document_stats != nullptr) {
+    put_document_stats->set_integer_index_latency_ms(
+        index_timer->GetElapsedMilliseconds());
+  }
+
+  return status;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/integer-section-indexing-handler.h b/icing/index/integer-section-indexing-handler.h
new file mode 100644
index 0000000..0a501aa
--- /dev/null
+++ b/icing/index/integer-section-indexing-handler.h
@@ -0,0 +1,71 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_INTEGER_SECTION_INDEXING_HANDLER_H_
+#define ICING_INDEX_INTEGER_SECTION_INDEXING_HANDLER_H_
+
+#include <cstdint>
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/data-indexing-handler.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+class IntegerSectionIndexingHandler : public DataIndexingHandler {
+ public:
+  // Creates an IntegerSectionIndexingHandler instance which does not take
+  // ownership of any input components. All pointers must refer to valid objects
+  // that outlive the created IntegerSectionIndexingHandler instance.
+  //
+  // Returns:
+  //   - An IntegerSectionIndexingHandler instance on success
+  //   - FAILED_PRECONDITION_ERROR if any of the input pointer is null
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<IntegerSectionIndexingHandler>>
+  Create(const Clock* clock, NumericIndex<int64_t>* integer_index);
+
+  ~IntegerSectionIndexingHandler() override = default;
+
+  // Handles the integer indexing process: add hits into the integer index for
+  // all contents in tokenized_document.integer_sections.
+  //
+  // Returns:
+  //   - OK on success.
+  //   - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less
+  //     than or equal to the document_id of a previously indexed document in
+  //     non recovery mode.
+  //   - Any NumericIndex<int64_t>::Editor errors.
+  libtextclassifier3::Status Handle(
+      const TokenizedDocument& tokenized_document, DocumentId document_id,
+      bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
+
+ private:
+  explicit IntegerSectionIndexingHandler(const Clock* clock,
+                                         NumericIndex<int64_t>* integer_index)
+      : DataIndexingHandler(clock), integer_index_(*integer_index) {}
+
+  NumericIndex<int64_t>& integer_index_;  // Does not own.
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_INTEGER_SECTION_INDEXING_HANDLER_H_
diff --git a/icing/index/integer-section-indexing-handler_test.cc b/icing/index/integer-section-indexing-handler_test.cc
new file mode 100644
index 0000000..91cc06f
--- /dev/null
+++ b/icing/index/integer-section-indexing-handler_test.cc
@@ -0,0 +1,601 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/integer-section-indexing-handler.h"
+
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/integer-index.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+
+// Indexable properties (section) and section id. Section id is determined by
+// the lexicographical order of indexable property paths.
+// Schema type with indexable properties: FakeType
+// Section id = 0: "body"
+// Section id = 1: "timestamp"
+// Section id = 2: "title"
+static constexpr std::string_view kFakeType = "FakeType";
+static constexpr std::string_view kPropertyBody = "body";
+static constexpr std::string_view kPropertyTimestamp = "timestamp";
+static constexpr std::string_view kPropertyTitle = "title";
+
+static constexpr SectionId kSectionIdTimestamp = 1;
+
+// Schema type with nested indexable properties: NestedType
+// Section id = 0: "name"
+// Section id = 1: "nested.body"
+// Section id = 2: "nested.timestamp"
+// Section id = 3: "nested.title"
+// Section id = 4: "price"
+static constexpr std::string_view kNestedType = "NestedType";
+static constexpr std::string_view kPropertyName = "name";
+static constexpr std::string_view kPropertyNestedDoc = "nested";
+static constexpr std::string_view kPropertyPrice = "price";
+
+static constexpr SectionId kSectionIdNestedTimestamp = 2;
+static constexpr SectionId kSectionIdPrice = 4;
+
+class IntegerSectionIndexingHandlerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    base_dir_ = GetTestTempDir() + "/icing_test";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    integer_index_working_path_ = base_dir_ + "/integer_index";
+    schema_store_dir_ = base_dir_ + "/schema_store";
+    document_store_dir_ = base_dir_ + "/document_store";
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        integer_index_,
+        IntegerIndex::Create(filesystem_, integer_index_working_path_,
+                             /*num_data_threshold_for_bucket_split=*/65536,
+                             /*pre_mapping_fbv=*/false));
+
+    language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        lang_segmenter_,
+        language_segmenter_factory::Create(std::move(segmenter_options)));
+
+    ASSERT_THAT(
+        filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+        IsTrue());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kFakeType)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyTitle)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyBody)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyTimestamp)
+                                     .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kNestedType)
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kPropertyNestedDoc)
+                            .SetDataTypeDocument(
+                                kFakeType, /*index_nested_properties=*/true)
+                            .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyPrice)
+                                     .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyName)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    ASSERT_TRUE(
+        filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult doc_store_create_result,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store_.get(),
+                              /*force_recovery_and_revalidate_documents=*/false,
+                              /*namespace_id_fingerprint=*/false,
+                              /*pre_mapping_fbv=*/false,
+                              /*use_persistent_hash_map=*/false,
+                              PortableFileBackedProtoLog<
+                                  DocumentWrapper>::kDeflateCompressionLevel,
+                              /*initialize_stats=*/nullptr));
+    document_store_ = std::move(doc_store_create_result.document_store);
+  }
+
+  void TearDown() override {
+    document_store_.reset();
+    schema_store_.reset();
+    lang_segmenter_.reset();
+    integer_index_.reset();
+
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  FakeClock fake_clock_;
+  std::string base_dir_;
+  std::string integer_index_working_path_;
+  std::string schema_store_dir_;
+  std::string document_store_dir_;
+
+  std::unique_ptr<NumericIndex<int64_t>> integer_index_;
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+};
+
+std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
+  std::vector<DocHitInfo> infos;
+  while (iterator->Advance().ok()) {
+    infos.push_back(iterator->doc_hit_info());
+  }
+  return infos;
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest, CreationWithNullPointerShouldFail) {
+  EXPECT_THAT(IntegerSectionIndexingHandler::Create(/*clock=*/nullptr,
+                                                    integer_index_.get()),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  EXPECT_THAT(IntegerSectionIndexingHandler::Create(&fake_clock_,
+                                                    /*integer_index=*/nullptr),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest, HandleIntegerSection) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyTitle), "title")
+          .AddStringProperty(std::string(kPropertyBody), "body")
+          .AddInt64Property(std::string(kPropertyTimestamp), 123)
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(tokenized_document.document()));
+
+  ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kInvalidDocumentId));
+  // Handle document.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerSectionIndexingHandler> handler,
+      IntegerSectionIndexingHandler::Create(&fake_clock_,
+                                            integer_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
+
+  // Query "timestamp".
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      integer_index_->GetIterator(
+          kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+          *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  document_id, std::vector<SectionId>{kSectionIdTimestamp})));
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest, HandleNestedIntegerSection) {
+  DocumentProto nested_document =
+      DocumentBuilder()
+          .SetKey("icing", "nested_type/1")
+          .SetSchema(std::string(kNestedType))
+          .AddDocumentProperty(
+              std::string(kPropertyNestedDoc),
+              DocumentBuilder()
+                  .SetKey("icing", "nested_fake_type/1")
+                  .SetSchema(std::string(kFakeType))
+                  .AddStringProperty(std::string(kPropertyTitle),
+                                     "nested title")
+                  .AddStringProperty(std::string(kPropertyBody), "nested body")
+                  .AddInt64Property(std::string(kPropertyTimestamp), 123)
+                  .Build())
+          .AddInt64Property(std::string(kPropertyPrice), 456)
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(nested_document)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(tokenized_document.document()));
+
+  ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kInvalidDocumentId));
+  // Handle nested_document.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerSectionIndexingHandler> handler,
+      IntegerSectionIndexingHandler::Create(&fake_clock_,
+                                            integer_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
+
+  // Query "nested.timestamp".
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      integer_index_->GetIterator(
+          "nested.timestamp", /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+          *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(
+          document_id, std::vector<SectionId>{kSectionIdNestedTimestamp})));
+
+  // Query "price".
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr,
+      integer_index_->GetIterator(
+          kPropertyPrice, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+          *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  document_id, std::vector<SectionId>{kSectionIdPrice})));
+
+  // Query "timestamp". Should get empty result.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr,
+      integer_index_->GetIterator(
+          kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+          *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest, HandleShouldSkipEmptyIntegerSection) {
+  // Create a FakeType document without "timestamp".
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyTitle), "title")
+          .AddStringProperty(std::string(kPropertyBody), "body")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(tokenized_document.document()));
+
+  ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kInvalidDocumentId));
+  // Handle document. Index data should remain unchanged since there is no
+  // indexable integer, but last_added_document_id should be updated.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerSectionIndexingHandler> handler,
+      IntegerSectionIndexingHandler::Create(&fake_clock_,
+                                            integer_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
+
+  // Query "timestamp". Should get empty result.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      integer_index_->GetIterator(
+          kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+          *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest,
+       HandleInvalidDocumentIdShouldReturnInvalidArgumentError) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyTitle), "title")
+          .AddStringProperty(std::string(kPropertyBody), "body")
+          .AddInt64Property(std::string(kPropertyTimestamp), 123)
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+  ICING_ASSERT_OK(document_store_->Put(tokenized_document.document()));
+
+  static constexpr DocumentId kCurrentDocumentId = 3;
+  integer_index_->set_last_added_document_id(kCurrentDocumentId);
+  ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kCurrentDocumentId));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerSectionIndexingHandler> handler,
+      IntegerSectionIndexingHandler::Create(&fake_clock_,
+                                            integer_index_.get()));
+
+  // Handling document with kInvalidDocumentId should cause a failure, and both
+  // index data and last_added_document_id should remain unchanged.
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kInvalidDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kCurrentDocumentId));
+
+  // Query "timestamp". Should get empty result.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      integer_index_->GetIterator(
+          kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+          *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+
+  // Recovery mode should get the same result.
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kInvalidDocumentId,
+                      /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kCurrentDocumentId));
+
+  // Query "timestamp". Should get empty result.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr,
+      integer_index_->GetIterator(
+          kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+          *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest,
+       HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyTitle), "title")
+          .AddStringProperty(std::string(kPropertyBody), "body")
+          .AddInt64Property(std::string(kPropertyTimestamp), 123)
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(tokenized_document.document()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerSectionIndexingHandler> handler,
+      IntegerSectionIndexingHandler::Create(&fake_clock_,
+                                            integer_index_.get()));
+
+  // Handling document with document_id == last_added_document_id should cause a
+  // failure, and both index data and last_added_document_id should remain
+  // unchanged.
+  integer_index_->set_last_added_document_id(document_id);
+  ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
+
+  // Query "timestamp". Should get empty result.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      integer_index_->GetIterator(
+          kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+          *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+
+  // Handling document with document_id < last_added_document_id should cause a
+  // failure, and both index data and last_added_document_id should remain
+  // unchanged.
+  integer_index_->set_last_added_document_id(document_id + 1);
+  ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id + 1));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id + 1));
+
+  // Query "timestamp". Should get empty result.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr,
+      integer_index_->GetIterator(
+          kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+          *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IntegerSectionIndexingHandlerTest,
+       HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId) {
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyTitle), "title one")
+          .AddStringProperty(std::string(kPropertyBody), "body one")
+          .AddInt64Property(std::string(kPropertyTimestamp), 123)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/2")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyTitle), "title two")
+          .AddStringProperty(std::string(kPropertyBody), "body two")
+          .AddInt64Property(std::string(kPropertyTimestamp), 456)
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document1,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document2,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(tokenized_document1.document()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(tokenized_document2.document()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerSectionIndexingHandler> handler,
+      IntegerSectionIndexingHandler::Create(&fake_clock_,
+                                            integer_index_.get()));
+
+  // Handle document with document_id > last_added_document_id in recovery mode.
+  // The handler should index this document and update last_added_document_id.
+  EXPECT_THAT(
+      handler->Handle(tokenized_document1, document_id1, /*recovery_mode=*/true,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id1));
+
+  // Query "timestamp".
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      integer_index_->GetIterator(
+          kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+          *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  document_id1, std::vector<SectionId>{kSectionIdTimestamp})));
+
+  // Handle document with document_id == last_added_document_id in recovery
+  // mode. We should not get any error, but the handler should ignore the
+  // document, so both index data and last_added_document_id should remain
+  // unchanged.
+  integer_index_->set_last_added_document_id(document_id2);
+  ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id2));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document2, document_id2, /*recovery_mode=*/true,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id2));
+
+  // Query "timestamp". Should not get hits for document2.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr,
+      integer_index_->GetIterator(
+          kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+          *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  document_id1, std::vector<SectionId>{kSectionIdTimestamp})));
+
+  // Handle document with document_id < last_added_document_id in recovery mode.
+  // We should not get any error, but the handler should ignore the document, so
+  // both index data and last_added_document_id should remain unchanged.
+  integer_index_->set_last_added_document_id(document_id2 + 1);
+  ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id2 + 1));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document2, document_id2, /*recovery_mode=*/true,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id2 + 1));
+
+  // Query "timestamp". Should not get hits for document2.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr,
+      integer_index_->GetIterator(
+          kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
+          *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  document_id1, std::vector<SectionId>{kSectionIdTimestamp})));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc b/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc
index e75ed87..1917fd0 100644
--- a/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc
@@ -32,7 +32,6 @@ libtextclassifier3::Status DocHitInfoIteratorAllDocumentId::Advance() {
   if (!IsDocumentIdValid(current_document_id_)) {
     // Reached the end, set these to invalid values and return
     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
-    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
     return absl_ports::ResourceExhaustedError(
         "No more DocHitInfos in iterator");
   }
@@ -40,5 +39,12 @@ libtextclassifier3::Status DocHitInfoIteratorAllDocumentId::Advance() {
   return libtextclassifier3::Status::OK;
 }
 
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorAllDocumentId::TrimRightMostNode() && {
+  // The all document id node should be trimmed.
+  TrimmedNode node = {nullptr, /*term=*/"", /*term_start_index_=*/0,
+                      /*unnormalized_term_length_=*/0};
+  return node;
+}
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id.h b/icing/index/iterator/doc-hit-info-iterator-all-document-id.h
index 0fa74f5..60c5e0c 100644
--- a/icing/index/iterator/doc-hit-info-iterator-all-document-id.h
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id.h
@@ -35,10 +35,18 @@ class DocHitInfoIteratorAllDocumentId : public DocHitInfoIterator {
 
   libtextclassifier3::Status Advance() override;
 
-  int32_t GetNumBlocksInspected() const override { return 0; }
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
 
-  int32_t GetNumLeafAdvanceCalls() const override {
-    return document_id_limit_ - current_document_id_;
+  void MapChildren(const ChildrenMapper& mapper) override {}
+
+  CallStats GetCallStats() const override {
+    return CallStats(
+        /*num_leaf_advance_calls_lite_index_in=*/0,
+        /*num_leaf_advance_calls_main_index_in=*/0,
+        /*num_leaf_advance_calls_integer_index_in=*/0,
+        /*num_leaf_advance_calls_no_index_in=*/document_id_limit_ -
+            current_document_id_,
+        /*num_blocks_inspected_in=*/0);
   }
 
   std::string ToString() const override {
diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc b/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc
index 7366b97..379cb4d 100644
--- a/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc
@@ -32,6 +32,7 @@ namespace {
 
 using ::testing::ElementsAreArray;
 using ::testing::Eq;
+using ::testing::IsNull;
 using ::testing::Not;
 
 TEST(DocHitInfoIteratorAllDocumentIdTest, Initialize) {
@@ -40,9 +41,8 @@ TEST(DocHitInfoIteratorAllDocumentIdTest, Initialize) {
 
     // We'll always start with an invalid document_id, need to Advance before we
     // get anything out of this.
-    EXPECT_THAT(all_it.doc_hit_info().document_id(), Eq(kInvalidDocumentId));
-    EXPECT_THAT(all_it.hit_intersect_section_ids_mask(),
-                Eq(kSectionIdMaskNone));
+    EXPECT_THAT(all_it.doc_hit_info(),
+                EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>{}));
   }
 
   {
@@ -53,26 +53,25 @@ TEST(DocHitInfoIteratorAllDocumentIdTest, Initialize) {
   }
 }
 
-TEST(DocHitInfoIteratorAllDocumentIdTest, GetNumBlocksInspected) {
+TEST(DocHitInfoIteratorAllDocumentIdTest, GetCallStats) {
   DocHitInfoIteratorAllDocumentId all_it(100);
-  EXPECT_THAT(all_it.GetNumBlocksInspected(), Eq(0));
-
-  // Number of iterations is chosen arbitrarily. Just meant to demonstrate that
-  // no matter how many Advance calls are made, GetNumBlocksInspected should
-  // always return 0.
-  for (int i = 0; i < 5; ++i) {
-    EXPECT_THAT(all_it.Advance(), IsOk());
-    EXPECT_THAT(all_it.GetNumBlocksInspected(), Eq(0));
-  }
-}
-
-TEST(DocHitInfoIteratorAllDocumentIdTest, GetNumLeafAdvanceCalls) {
-  DocHitInfoIteratorAllDocumentId all_it(100);
-  EXPECT_THAT(all_it.GetNumLeafAdvanceCalls(), Eq(0));
+  EXPECT_THAT(
+      all_it.GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/0,
+          /*num_leaf_advance_calls_main_index=*/0,
+          /*num_leaf_advance_calls_integer_index=*/0,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
 
   for (int i = 1; i <= 5; ++i) {
     EXPECT_THAT(all_it.Advance(), IsOk());
-    EXPECT_THAT(all_it.GetNumLeafAdvanceCalls(), Eq(i));
+    EXPECT_THAT(
+        all_it.GetCallStats(),
+        EqualsDocHitInfoIteratorCallStats(
+            /*num_leaf_advance_calls_lite_index=*/0,
+            /*num_leaf_advance_calls_main_index=*/0,
+            /*num_leaf_advance_calls_integer_index=*/0,
+            /*num_leaf_advance_calls_no_index=*/i, /*num_blocks_inspected=*/0));
   }
 }
 
@@ -86,12 +85,8 @@ TEST(DocHitInfoIteratorAllDocumentIdTest, Advance) {
     // Test one advance
     DocHitInfoIteratorAllDocumentId all_it(5);
     EXPECT_THAT(all_it.Advance(), IsOk());
-    EXPECT_THAT(all_it.doc_hit_info().document_id(), Eq(5));
-
-    // Advancing shouldn't affect the intersect section ids mask, since there's
-    // no intersecting going on
-    EXPECT_THAT(all_it.hit_intersect_section_ids_mask(),
-                Eq(kSectionIdMaskNone));
+    EXPECT_THAT(all_it.doc_hit_info(),
+                EqualsDocHitInfo(5, std::vector<SectionId>{}));
   }
 
   {
@@ -108,6 +103,16 @@ TEST(DocHitInfoIteratorAllDocumentIdTest, Advance) {
   }
 }
 
+TEST(DocHitInfoIteratorAllDocumentIdTest, TrimAllDocumentIdIterator) {
+  DocHitInfoIteratorAllDocumentId all_it(100);
+  ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+                             std::move(all_it).TrimRightMostNode());
+  // The whole iterator is trimmed
+  EXPECT_THAT(trimmed_node.term_, testing::IsEmpty());
+  EXPECT_THAT(trimmed_node.term_start_index_, 0);
+  EXPECT_THAT(trimmed_node.iterator_, IsNull());
+}
+
 }  // namespace
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc
index f224583..249bd0e 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and.cc
@@ -14,8 +14,7 @@
 
 #include "icing/index/iterator/doc-hit-info-iterator-and.h"
 
-#include <stddef.h>
-
+#include <cstddef>
 #include <cstdint>
 #include <memory>
 #include <string>
@@ -38,8 +37,6 @@ namespace {
 
 // When combining ANDed iterators, n-ary operator has better performance when
 // number of operands > 3 according to benchmark cl/243720660
-// TODO (samzheng): Tune this number when it's necessary, e.g. implementation
-// changes.
 inline constexpr int kBinaryAndIteratorPerformanceThreshold = 3;
 
 // The minimum number of iterators needed to construct a And iterator. The And
@@ -58,11 +55,12 @@ std::unique_ptr<DocHitInfoIterator> CreateAndIterator(
   if (iterators.size() <= kBinaryAndIteratorPerformanceThreshold &&
       iterators.size() >= kMinBinaryIterators) {
     // Accumulate the iterators that need to be ANDed together.
-    iterator = std::move(iterators.at(0));
-    for (size_t i = 1; i < iterators.size(); ++i) {
+    iterator = std::move(iterators.at(iterators.size() - 1));
+    for (int i = iterators.size() - 2; i >= 0; --i) {
       std::unique_ptr<DocHitInfoIterator> temp_iterator = std::move(iterator);
       iterator = std::make_unique<DocHitInfoIteratorAnd>(
-          std::move(temp_iterator), std::move(iterators[i]));
+          /*short_it=*/std::move(iterators[i]),
+          /*long_it=*/std::move(temp_iterator));
     }
   } else {
     // If the vector is too small, the AndNary iterator can handle it and return
@@ -85,7 +83,6 @@ libtextclassifier3::Status DocHitInfoIteratorAnd::Advance() {
     // Didn't find anything for the first iterator, reset to invalid values and
     // return.
     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
-    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
     return absl_ports::ResourceExhaustedError(
         "No more DocHitInfos in iterator");
   }
@@ -107,18 +104,21 @@ libtextclassifier3::Status DocHitInfoIteratorAnd::Advance() {
 
   // Guaranteed that short_doc_id and long_doc_id match now
   doc_hit_info_ = short_->doc_hit_info();
-  doc_hit_info_.MergeSectionsFrom(long_->doc_hit_info());
-  hit_intersect_section_ids_mask_ = short_->hit_intersect_section_ids_mask() &
-                                    long_->hit_intersect_section_ids_mask();
+  doc_hit_info_.MergeSectionsFrom(long_->doc_hit_info().hit_section_ids_mask());
   return libtextclassifier3::Status::OK;
 }
 
-int32_t DocHitInfoIteratorAnd::GetNumBlocksInspected() const {
-  return short_->GetNumBlocksInspected() + long_->GetNumBlocksInspected();
-}
-
-int32_t DocHitInfoIteratorAnd::GetNumLeafAdvanceCalls() const {
-  return short_->GetNumLeafAdvanceCalls() + long_->GetNumLeafAdvanceCalls();
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorAnd::TrimRightMostNode() && {
+  ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_long,
+                         std::move(*long_).TrimRightMostNode());
+  if (trimmed_long.iterator_ == nullptr) {
+    trimmed_long.iterator_ = std::move(short_);
+  } else {
+    trimmed_long.iterator_ = std::make_unique<DocHitInfoIteratorAnd>(
+        std::move(short_), std::move(trimmed_long.iterator_));
+  }
+  return trimmed_long;
 }
 
 std::string DocHitInfoIteratorAnd::ToString() const {
@@ -141,7 +141,6 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
     // Didn't find anything for the first iterator, reset to invalid values and
     // return
     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
-    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
     return absl_ports::ResourceExhaustedError(
         "No more DocHitInfos in iterator");
   }
@@ -164,6 +163,7 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
         DocumentId unused;
         ICING_ASSIGN_OR_RETURN(
             unused, AdvanceTo(iterator.get(), potential_document_id));
+        (void)unused;  // Silence unused warning.
       }
 
       if (iterator->doc_hit_info().document_id() == potential_document_id) {
@@ -184,31 +184,41 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
 
   // Found a DocumentId which exists in all the iterators
   doc_hit_info_ = iterators_.at(0)->doc_hit_info();
-  hit_intersect_section_ids_mask_ =
-      iterators_.at(0)->hit_intersect_section_ids_mask();
 
   for (size_t i = 1; i < iterators_.size(); i++) {
-    doc_hit_info_.MergeSectionsFrom(iterators_.at(i)->doc_hit_info());
-    hit_intersect_section_ids_mask_ &=
-        iterators_.at(i)->hit_intersect_section_ids_mask();
+    doc_hit_info_.MergeSectionsFrom(
+        iterators_.at(i)->doc_hit_info().hit_section_ids_mask());
   }
   return libtextclassifier3::Status::OK;
 }
 
-int32_t DocHitInfoIteratorAndNary::GetNumBlocksInspected() const {
-  int32_t blockCount = 0;
-  for (const std::unique_ptr<DocHitInfoIterator>& iter : iterators_) {
-    blockCount += iter->GetNumBlocksInspected();
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorAndNary::TrimRightMostNode() && {
+  ICING_ASSIGN_OR_RETURN(
+      TrimmedNode trimmed_right,
+      std::move(*iterators_.rbegin()->get()).TrimRightMostNode());
+  if (trimmed_right.iterator_ == nullptr) {
+    if (iterators_.size() > 2) {
+      iterators_.pop_back();
+      trimmed_right.iterator_ =
+          std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
+    } else if (iterators_.size() == 2) {
+      trimmed_right.iterator_ = std::move(iterators_.at(0));
+    }
+  } else {
+    iterators_.at(iterators_.size() - 1) = std::move(trimmed_right.iterator_);
+    trimmed_right.iterator_ =
+        std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
   }
-  return blockCount;
+  return trimmed_right;
 }
 
-int32_t DocHitInfoIteratorAndNary::GetNumLeafAdvanceCalls() const {
-  int32_t leafCount = 0;
-  for (const std::unique_ptr<DocHitInfoIterator>& iter : iterators_) {
-    leafCount += iter->GetNumLeafAdvanceCalls();
+DocHitInfoIterator::CallStats DocHitInfoIteratorAndNary::GetCallStats() const {
+  CallStats call_stats;
+  for (const auto& iter : iterators_) {
+    call_stats += iter->GetCallStats();
   }
-  return leafCount;
+  return call_stats;
 }
 
 std::string DocHitInfoIteratorAndNary::ToString() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.h b/icing/index/iterator/doc-hit-info-iterator-and.h
index 4618fb9..8c52ac9 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.h
+++ b/icing/index/iterator/doc-hit-info-iterator-and.h
@@ -18,6 +18,7 @@
 #include <cstdint>
 #include <memory>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
@@ -40,12 +41,32 @@ class DocHitInfoIteratorAnd : public DocHitInfoIterator {
                                  std::unique_ptr<DocHitInfoIterator> long_it);
   libtextclassifier3::Status Advance() override;
 
-  int32_t GetNumBlocksInspected() const override;
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
 
-  int32_t GetNumLeafAdvanceCalls() const override;
+  CallStats GetCallStats() const override {
+    return short_->GetCallStats() + long_->GetCallStats();
+  }
 
   std::string ToString() const override;
 
+  void MapChildren(const ChildrenMapper& mapper) override {
+    short_ = mapper(std::move(short_));
+    long_ = mapper(std::move(long_));
+  }
+
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    short_->PopulateMatchedTermsStats(matched_terms_stats,
+                                      filtering_section_mask);
+    long_->PopulateMatchedTermsStats(matched_terms_stats,
+                                     filtering_section_mask);
+  }
+
  private:
   std::unique_ptr<DocHitInfoIterator> short_;
   std::unique_ptr<DocHitInfoIterator> long_;
@@ -61,12 +82,31 @@ class DocHitInfoIteratorAndNary : public DocHitInfoIterator {
 
   libtextclassifier3::Status Advance() override;
 
-  int32_t GetNumBlocksInspected() const override;
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
 
-  int32_t GetNumLeafAdvanceCalls() const override;
+  CallStats GetCallStats() const override;
 
   std::string ToString() const override;
 
+  void MapChildren(const ChildrenMapper& mapper) override {
+    for (int i = 0; i < iterators_.size(); ++i) {
+      iterators_[i] = mapper(std::move(iterators_[i]));
+    }
+  }
+
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    for (size_t i = 0; i < iterators_.size(); ++i) {
+      iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats,
+                                                  filtering_section_mask);
+    }
+  }
+
  private:
   std::vector<std::unique_ptr<DocHitInfoIterator>> iterators_;
 };
diff --git a/icing/index/iterator/doc-hit-info-iterator-and_test.cc b/icing/index/iterator/doc-hit-info-iterator-and_test.cc
index 35574b7..f204ada 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and_test.cc
@@ -74,39 +74,33 @@ TEST(DocHitInfoIteratorAndTest, Initialize) {
                                  std::make_unique<DocHitInfoIteratorDummy>());
 
   // We start out with invalid values
-  EXPECT_THAT(and_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId)));
-  EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(),
-              Eq(kSectionIdMaskNone));
+  EXPECT_THAT(and_iter.doc_hit_info(),
+              EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>{}));
 }
 
-TEST(DocHitInfoIteratorAndTest, GetNumBlocksInspected) {
-  int first_iter_blocks = 4;  // arbitrary value
+TEST(DocHitInfoIteratorAndTest, GetCallStats) {
+  DocHitInfoIterator::CallStats first_iter_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/2,
+      /*num_leaf_advance_calls_main_index_in=*/5,
+      /*num_leaf_advance_calls_integer_index_in=*/3,
+      /*num_leaf_advance_calls_no_index_in=*/1,
+      /*num_blocks_inspected_in=*/4);  // arbitrary value
   auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  first_iter->SetNumBlocksInspected(first_iter_blocks);
-
-  int second_iter_blocks = 7;  // arbitrary value
+  first_iter->SetCallStats(first_iter_call_stats);
+
+  DocHitInfoIterator::CallStats second_iter_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/6,
+      /*num_leaf_advance_calls_main_index_in=*/2,
+      /*num_leaf_advance_calls_integer_index_in=*/10,
+      /*num_leaf_advance_calls_no_index_in=*/3,
+      /*num_blocks_inspected_in=*/7);  // arbitrary value
   auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  second_iter->SetNumBlocksInspected(second_iter_blocks);
+  second_iter->SetCallStats(second_iter_call_stats);
 
   DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter));
 
-  EXPECT_THAT(and_iter.GetNumBlocksInspected(),
-              Eq(first_iter_blocks + second_iter_blocks));
-}
-
-TEST(DocHitInfoIteratorAndTest, GetNumLeafAdvanceCalls) {
-  int first_iter_leaves = 4;  // arbitrary value
-  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  first_iter->SetNumLeafAdvanceCalls(first_iter_leaves);
-
-  int second_iter_leaves = 7;  // arbitrary value
-  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  second_iter->SetNumLeafAdvanceCalls(second_iter_leaves);
-
-  DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter));
-
-  EXPECT_THAT(and_iter.GetNumLeafAdvanceCalls(),
-              Eq(first_iter_leaves + second_iter_leaves));
+  EXPECT_THAT(and_iter.GetCallStats(),
+              Eq(first_iter_call_stats + second_iter_call_stats));
 }
 
 TEST(DocHitInfoIteratorAndTest, AdvanceNoOverlap) {
@@ -171,29 +165,257 @@ TEST(DocHitInfoIteratorAndTest, AdvanceNestedIterators) {
   EXPECT_THAT(GetDocumentIds(outer_iter.get()), ElementsAre(10, 6, 2));
 }
 
+TEST(DocHitInfoIteratorAndTest, TrimAndIterator) {
+  std::vector<DocHitInfo> left_vector = {DocHitInfo(3), DocHitInfo(2)};
+  std::vector<DocHitInfo> right_vector = {DocHitInfo(1), DocHitInfo(0)};
+
+  std::unique_ptr<DocHitInfoIterator> left_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(left_vector);
+  std::unique_ptr<DocHitInfoIterator> right_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(right_vector, "term", 10);
+
+  std::unique_ptr<DocHitInfoIterator> iter =
+      std::make_unique<DocHitInfoIteratorAnd>(std::move(left_iter),
+                                              std::move(right_iter));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+                             std::move(*iter).TrimRightMostNode());
+  EXPECT_THAT(trimmed_node.term_, Eq("term"));
+  EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+  EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()), ElementsAre(3, 2));
+}
+
+TEST(DocHitInfoIteratorAndTest, TrimAndIterator_TwoLayer) {
+  // Build an interator tree like:
+  //
+  //            AND
+  //          /     \
+  //     first         AND
+  //        |        /      \
+  //    {0, 1}    second  third
+  //                 |      |
+  //                {1}    {0}
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(1), DocHitInfo(0)};
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(1)};
+  std::vector<DocHitInfo> third_vector = {DocHitInfo(0)};
+
+  std::unique_ptr<DocHitInfoIterator> first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+  std::unique_ptr<DocHitInfoIterator> second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_vector);
+  std::unique_ptr<DocHitInfoIterator> third_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(third_vector, "term", 10);
+
+  std::unique_ptr<DocHitInfoIterator> nested_iter =
+      std::make_unique<DocHitInfoIteratorAnd>(std::move(second_iter),
+                                              std::move(third_iter));
+  std::unique_ptr<DocHitInfoIterator> iter =
+      std::make_unique<DocHitInfoIteratorAnd>(std::move(first_iter),
+                                              std::move(nested_iter));
+
+  // The third_iter is trimmed.
+  //          AND
+  //      /        \
+  //    first    second
+  //      |         |
+  //   {0, 1}       {1}
+  ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+                             std::move(*iter).TrimRightMostNode());
+  EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()), ElementsAre(1));
+  EXPECT_THAT(trimmed_node.term_, Eq("term"));
+  EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+}
+
+TEST(DocHitInfoIteratorAndNaryTest, TrimAndNaryIterator) {
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(2), DocHitInfo(1),
+                                          DocHitInfo(0)};
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(2), DocHitInfo(1)};
+  std::vector<DocHitInfo> third_vector = {DocHitInfo(2)};
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(first_vector));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(second_vector));
+  iterators.push_back(
+      std::make_unique<DocHitInfoIteratorDummy>(third_vector, "term", 10));
+
+  std::unique_ptr<DocHitInfoIterator> iter =
+      std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators));
+
+  // The third iterator is trimmed
+  ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+                             std::move(*iter).TrimRightMostNode());
+  EXPECT_THAT(trimmed_node.term_, Eq("term"));
+  EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+  EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()), ElementsAre(2, 1));
+}
+
+TEST(DocHitInfoIteratorAndNaryTest, TrimAndNaryIterator_TwoLayer) {
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(3), DocHitInfo(2),
+                                          DocHitInfo(1), DocHitInfo(0)};
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(2), DocHitInfo(1),
+                                           DocHitInfo(0)};
+  std::vector<DocHitInfo> third_vector = {DocHitInfo(1), DocHitInfo(0)};
+  std::vector<DocHitInfo> forth_vector = {DocHitInfo(0)};
+
+  // Build nested iterator
+  std::unique_ptr<DocHitInfoIterator> third_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(third_vector);
+  std::unique_ptr<DocHitInfoIterator> forth_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(forth_vector, "term", 10);
+  std::unique_ptr<DocHitInfoIterator> nested_iter =
+      std::make_unique<DocHitInfoIteratorAnd>(std::move(third_iter),
+                                              std::move(forth_iter));
+
+  // Build outer iterator
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(first_vector));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(second_vector));
+  iterators.push_back(std::move(nested_iter));
+  std::unique_ptr<DocHitInfoIterator> iter =
+      std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators));
+
+  // The forth iterator is trimmed.
+  ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+                             std::move(*iter).TrimRightMostNode());
+  EXPECT_THAT(trimmed_node.term_, Eq("term"));
+  EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+  EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()), ElementsAre(1, 0));
+}
+
 TEST(DocHitInfoIteratorAndTest, SectionIdMask) {
   // Arbitrary section ids for the documents in the DocHitInfoIterators.
   // Created to test correct section_id_mask behavior.
   SectionIdMask section_id_mask1 = 0b01010101;  // hits in sections 0, 2, 4, 6
   SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
-  SectionIdMask mask_anded_result = 0b00000100;
   SectionIdMask mask_ored_result = 0b01010111;
 
   std::vector<DocHitInfo> first_vector = {DocHitInfo(4, section_id_mask1)};
   std::vector<DocHitInfo> second_vector = {DocHitInfo(4, section_id_mask2)};
 
   auto first_iter = std::make_unique<DocHitInfoIteratorDummy>(first_vector);
-  first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+  first_iter->set_hit_section_ids_mask(section_id_mask1);
 
   auto second_iter = std::make_unique<DocHitInfoIteratorDummy>(second_vector);
-  second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+  second_iter->set_hit_section_ids_mask(section_id_mask2);
 
   DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter));
 
   ICING_EXPECT_OK(and_iter.Advance());
   EXPECT_THAT(and_iter.doc_hit_info().hit_section_ids_mask(),
               Eq(mask_ored_result));
-  EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
+}
+
+TEST(DocHitInfoIteratorAndTest, PopulateMatchedTermsStats) {
+  {
+    // Arbitrary section ids for the documents in the DocHitInfoIterators.
+    // Created to test correct section_id_mask behavior.
+    DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(4);
+    doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+    doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+    doc_hit_info1.UpdateSection(/*section_id=*/4, /*hit_term_frequency=*/3);
+    doc_hit_info1.UpdateSection(/*section_id=*/6, /*hit_term_frequency=*/4);
+
+    SectionIdMask section_id_mask1 = 0b01010101;  // hits in sections 0, 2, 4, 6
+    std::unordered_map<SectionId, Hit::TermFrequency>
+        expected_section_ids_tf_map1 = {{0, 1}, {2, 2}, {4, 3}, {6, 4}};
+
+    DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(4);
+    doc_hit_info2.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/2);
+    doc_hit_info2.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/6);
+
+    SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
+    std::unordered_map<SectionId, Hit::TermFrequency>
+        expected_section_ids_tf_map2 = {{1, 2}, {2, 6}};
+
+    std::vector<DocHitInfoTermFrequencyPair> first_vector = {doc_hit_info1};
+    std::vector<DocHitInfoTermFrequencyPair> second_vector = {doc_hit_info2};
+
+    auto first_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+    first_iter->set_hit_section_ids_mask(section_id_mask1);
+
+    auto second_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
+    second_iter->set_hit_section_ids_mask(section_id_mask2);
+
+    DocHitInfoIteratorAnd and_iter(std::move(first_iter),
+                                   std::move(second_iter));
+    std::vector<TermMatchInfo> matched_terms_stats;
+    and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+    ICING_EXPECT_OK(and_iter.Advance());
+    EXPECT_THAT(and_iter.doc_hit_info().document_id(), Eq(4));
+
+    and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(
+        matched_terms_stats,
+        ElementsAre(
+            EqualsTermMatchInfo("hi", expected_section_ids_tf_map1),
+            EqualsTermMatchInfo("hello", expected_section_ids_tf_map2)));
+
+    EXPECT_FALSE(and_iter.Advance().ok());
+  }
+  {
+    // Arbitrary section ids for the documents in the DocHitInfoIterators.
+    // Created to test correct section_id_mask behavior.
+    DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(4);
+    doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+    doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+
+    SectionIdMask section_id_mask1 = 0b00000101;  // hits in sections 0, 2
+    std::unordered_map<SectionId, Hit::TermFrequency>
+        expected_section_ids_tf_map1 = {{0, 1}, {2, 2}};
+
+    std::vector<DocHitInfoTermFrequencyPair> first_vector = {doc_hit_info1};
+    std::vector<DocHitInfoTermFrequencyPair> second_vector = {doc_hit_info1};
+
+    auto first_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+    first_iter->set_hit_section_ids_mask(section_id_mask1);
+
+    auto second_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hi");
+    second_iter->set_hit_section_ids_mask(section_id_mask1);
+
+    DocHitInfoIteratorAnd and_iter(std::move(first_iter),
+                                   std::move(second_iter));
+    std::vector<TermMatchInfo> matched_terms_stats;
+    and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+    ICING_EXPECT_OK(and_iter.Advance());
+    EXPECT_THAT(and_iter.doc_hit_info().document_id(), Eq(4));
+
+    and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                         "hi", expected_section_ids_tf_map1)));
+
+    EXPECT_FALSE(and_iter.Advance().ok());
+  }
+}
+
+TEST(DocHitInfoIteratorAndTest, PopulateMatchedTermsStats_NoMatchingDocument) {
+  DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(4);
+  doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+
+  DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(5);
+  doc_hit_info2.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/2);
+  doc_hit_info2.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/6);
+
+  std::vector<DocHitInfoTermFrequencyPair> first_vector = {doc_hit_info1};
+  std::vector<DocHitInfoTermFrequencyPair> second_vector = {doc_hit_info2};
+
+  auto first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+  auto second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
+
+  DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
+  EXPECT_FALSE(and_iter.Advance().ok());
 }
 
 TEST(DocHitInfoIteratorAndNaryTest, Initialize) {
@@ -205,9 +427,8 @@ TEST(DocHitInfoIteratorAndNaryTest, Initialize) {
   DocHitInfoIteratorAndNary and_iter(std::move(iterators));
 
   // We start out with invalid values
-  EXPECT_THAT(and_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId)));
-  EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(),
-              Eq(kSectionIdMaskNone));
+  EXPECT_THAT(and_iter.doc_hit_info(),
+              EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>{}));
 }
 
 TEST(DocHitInfoIteratorAndNaryTest, InitializeEmpty) {
@@ -220,22 +441,42 @@ TEST(DocHitInfoIteratorAndNaryTest, InitializeEmpty) {
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST(DocHitInfoIteratorAndNaryTest, GetNumBlocksInspected) {
-  int first_iter_blocks = 4;  // arbitrary value
+TEST(DocHitInfoIteratorAndNaryTest, GetCallStats) {
+  DocHitInfoIterator::CallStats first_iter_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/2,
+      /*num_leaf_advance_calls_main_index_in=*/5,
+      /*num_leaf_advance_calls_integer_index_in=*/3,
+      /*num_leaf_advance_calls_no_index_in=*/1,
+      /*num_blocks_inspected_in=*/4);  // arbitrary value
   auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  first_iter->SetNumBlocksInspected(first_iter_blocks);
-
-  int second_iter_blocks = 7;  // arbitrary value
+  first_iter->SetCallStats(first_iter_call_stats);
+
+  DocHitInfoIterator::CallStats second_iter_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/6,
+      /*num_leaf_advance_calls_main_index_in=*/2,
+      /*num_leaf_advance_calls_integer_index_in=*/10,
+      /*num_leaf_advance_calls_no_index_in=*/3,
+      /*num_blocks_inspected_in=*/7);  // arbitrary value
   auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  second_iter->SetNumBlocksInspected(second_iter_blocks);
-
-  int third_iter_blocks = 13;  // arbitrary value
+  second_iter->SetCallStats(second_iter_call_stats);
+
+  DocHitInfoIterator::CallStats third_iter_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/1000,
+      /*num_leaf_advance_calls_main_index_in=*/2000,
+      /*num_leaf_advance_calls_integer_index_in=*/3000,
+      /*num_leaf_advance_calls_no_index_in=*/0,
+      /*num_blocks_inspected_in=*/200);  // arbitrary value
   auto third_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  third_iter->SetNumBlocksInspected(third_iter_blocks);
-
-  int fourth_iter_blocks = 1;  // arbitrary value
+  third_iter->SetCallStats(third_iter_call_stats);
+
+  DocHitInfoIterator::CallStats fourth_iter_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/200,
+      /*num_leaf_advance_calls_main_index_in=*/400,
+      /*num_leaf_advance_calls_integer_index_in=*/100,
+      /*num_leaf_advance_calls_no_index_in=*/20,
+      /*num_blocks_inspected_in=*/50);  // arbitrary value
   auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  fourth_iter->SetNumBlocksInspected(fourth_iter_blocks);
+  fourth_iter->SetCallStats(fourth_iter_call_stats);
 
   std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
   iterators.push_back(std::move(first_iter));
@@ -244,38 +485,9 @@ TEST(DocHitInfoIteratorAndNaryTest, GetNumBlocksInspected) {
   iterators.push_back(std::move(fourth_iter));
   DocHitInfoIteratorAndNary and_iter(std::move(iterators));
 
-  EXPECT_THAT(and_iter.GetNumBlocksInspected(),
-              Eq(first_iter_blocks + second_iter_blocks + third_iter_blocks +
-                 fourth_iter_blocks));
-}
-
-TEST(DocHitInfoIteratorAndNaryTest, GetNumLeafAdvanceCalls) {
-  int first_iter_leaves = 4;  // arbitrary value
-  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  first_iter->SetNumLeafAdvanceCalls(first_iter_leaves);
-
-  int second_iter_leaves = 7;  // arbitrary value
-  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  second_iter->SetNumLeafAdvanceCalls(second_iter_leaves);
-
-  int third_iter_leaves = 13;  // arbitrary value
-  auto third_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  third_iter->SetNumLeafAdvanceCalls(third_iter_leaves);
-
-  int fourth_iter_leaves = 13;  // arbitrary value
-  auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  fourth_iter->SetNumLeafAdvanceCalls(fourth_iter_leaves);
-
-  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
-  iterators.push_back(std::move(first_iter));
-  iterators.push_back(std::move(second_iter));
-  iterators.push_back(std::move(third_iter));
-  iterators.push_back(std::move(fourth_iter));
-  DocHitInfoIteratorAndNary and_iter(std::move(iterators));
-
-  EXPECT_THAT(and_iter.GetNumLeafAdvanceCalls(),
-              Eq(first_iter_leaves + second_iter_leaves + third_iter_leaves +
-                 fourth_iter_leaves));
+  EXPECT_THAT(and_iter.GetCallStats(),
+              Eq(first_iter_call_stats + second_iter_call_stats +
+                 third_iter_call_stats + fourth_iter_call_stats));
 }
 
 TEST(DocHitInfoIteratorAndNaryTest, Advance) {
@@ -311,7 +523,6 @@ TEST(DocHitInfoIteratorAndNaryTest, SectionIdMask) {
   SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
   SectionIdMask section_id_mask3 = 0b00001100;  // hits in sections 2, 3
   SectionIdMask section_id_mask4 = 0b00100100;  // hits in sections 2, 5
-  SectionIdMask mask_anded_result = 0b00000100;
   SectionIdMask mask_ored_result = 0b01101111;
 
   std::vector<DocHitInfo> first_vector = {DocHitInfo(4, section_id_mask1)};
@@ -320,16 +531,16 @@ TEST(DocHitInfoIteratorAndNaryTest, SectionIdMask) {
   std::vector<DocHitInfo> fourth_vector = {DocHitInfo(4, section_id_mask4)};
 
   auto first_iter = std::make_unique<DocHitInfoIteratorDummy>(first_vector);
-  first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+  first_iter->set_hit_section_ids_mask(section_id_mask1);
 
   auto second_iter = std::make_unique<DocHitInfoIteratorDummy>(second_vector);
-  second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+  second_iter->set_hit_section_ids_mask(section_id_mask2);
 
   auto third_iter = std::make_unique<DocHitInfoIteratorDummy>(third_vector);
-  third_iter->set_hit_intersect_section_ids_mask(section_id_mask3);
+  third_iter->set_hit_section_ids_mask(section_id_mask3);
 
   auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>(fourth_vector);
-  fourth_iter->set_hit_intersect_section_ids_mask(section_id_mask4);
+  fourth_iter->set_hit_section_ids_mask(section_id_mask4);
 
   std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
   iterators.push_back(std::move(first_iter));
@@ -342,7 +553,81 @@ TEST(DocHitInfoIteratorAndNaryTest, SectionIdMask) {
   ICING_EXPECT_OK(and_iter.Advance());
   EXPECT_THAT(and_iter.doc_hit_info().hit_section_ids_mask(),
               Eq(mask_ored_result));
-  EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
+}
+
+TEST(DocHitInfoIteratorAndNaryTest, PopulateMatchedTermsStats) {
+  // Arbitrary section ids/term frequencies for the documents in the
+  // DocHitInfoIterators.
+  // For term "hi", document 10 and 8
+  DocHitInfoTermFrequencyPair doc_hit_info1_hi = DocHitInfo(10);
+  doc_hit_info1_hi.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  doc_hit_info1_hi.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+  doc_hit_info1_hi.UpdateSection(/*section_id=*/6, /*hit_term_frequency=*/4);
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map1_hi = {{0, 1}, {2, 2}, {6, 4}};
+
+  DocHitInfoTermFrequencyPair doc_hit_info2_hi = DocHitInfo(8);
+  doc_hit_info2_hi.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/2);
+  doc_hit_info2_hi.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/6);
+
+  // For term "hello", document 10 and 9
+  DocHitInfoTermFrequencyPair doc_hit_info1_hello = DocHitInfo(10);
+  doc_hit_info1_hello.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/2);
+  doc_hit_info1_hello.UpdateSection(/*section_id=*/3, /*hit_term_frequency=*/3);
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map1_hello = {{0, 2}, {3, 3}};
+
+  DocHitInfoTermFrequencyPair doc_hit_info2_hello = DocHitInfo(9);
+  doc_hit_info2_hello.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/3);
+  doc_hit_info2_hello.UpdateSection(/*section_id=*/3, /*hit_term_frequency=*/2);
+
+  // For term "ciao", document 10 and 9
+  DocHitInfoTermFrequencyPair doc_hit_info1_ciao = DocHitInfo(10);
+  doc_hit_info1_ciao.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/2);
+  doc_hit_info1_ciao.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/3);
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map1_ciao = {{0, 2}, {1, 3}};
+
+  DocHitInfoTermFrequencyPair doc_hit_info2_ciao = DocHitInfo(9);
+  doc_hit_info2_ciao.UpdateSection(/*section_id=*/3, /*hit_term_frequency=*/3);
+  doc_hit_info2_ciao.UpdateSection(/*section_id=*/4, /*hit_term_frequency=*/2);
+
+  std::vector<DocHitInfoTermFrequencyPair> first_vector = {doc_hit_info1_hi,
+                                                           doc_hit_info2_hi};
+  std::vector<DocHitInfoTermFrequencyPair> second_vector = {
+      doc_hit_info1_hello, doc_hit_info2_hello};
+  std::vector<DocHitInfoTermFrequencyPair> third_vector = {doc_hit_info1_ciao,
+                                                           doc_hit_info2_ciao};
+
+  auto first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+  auto second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
+  auto third_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(third_vector, "ciao");
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::move(first_iter));
+  iterators.push_back(std::move(second_iter));
+  iterators.push_back(std::move(third_iter));
+
+  DocHitInfoIteratorAndNary and_iter(std::move(iterators));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+  ICING_EXPECT_OK(and_iter.Advance());
+  EXPECT_THAT(and_iter.doc_hit_info().document_id(), Eq(10));
+
+  and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(
+      matched_terms_stats,
+      ElementsAre(
+          EqualsTermMatchInfo("hi", expected_section_ids_tf_map1_hi),
+          EqualsTermMatchInfo("hello", expected_section_ids_tf_map1_hello),
+          EqualsTermMatchInfo("ciao", expected_section_ids_tf_map1_ciao)));
+
+  EXPECT_FALSE(and_iter.Advance().ok());
 }
 
 }  // namespace
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc
index 482a5ab..82d1ac7 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc
@@ -31,7 +31,6 @@
 #include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
-#include "icing/util/clock.h"
 
 namespace icing {
 namespace lib {
@@ -39,12 +38,12 @@ namespace lib {
 DocHitInfoIteratorFilter::DocHitInfoIteratorFilter(
     std::unique_ptr<DocHitInfoIterator> delegate,
     const DocumentStore* document_store, const SchemaStore* schema_store,
-    const Clock* clock, const Options& options)
+    const Options& options, int64_t current_time_ms)
     : delegate_(std::move(delegate)),
       document_store_(*document_store),
       schema_store_(*schema_store),
       options_(options),
-      current_time_milliseconds_(clock->GetSystemTimeMilliseconds()) {
+      current_time_ms_(current_time_ms) {
   // Precompute all the NamespaceIds
   for (std::string_view name_space : options_.namespaces) {
     auto namespace_id_or = document_store_.GetNamespaceId(name_space);
@@ -57,81 +56,68 @@ DocHitInfoIteratorFilter::DocHitInfoIteratorFilter(
 
   // Precompute all the SchemaTypeIds
   for (std::string_view schema_type : options_.schema_types) {
-    auto schema_type_id_or = schema_store_.GetSchemaTypeId(schema_type);
+    libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*>
+        schema_type_ids_or =
+            schema_store_.GetSchemaTypeIdsWithChildren(schema_type);
 
     // If we can't find the SchemaTypeId, just throw it away
-    if (schema_type_id_or.ok()) {
-      target_schema_type_ids_.emplace(schema_type_id_or.ValueOrDie());
+    if (schema_type_ids_or.ok()) {
+      const std::unordered_set<SchemaTypeId>* schema_type_ids =
+          schema_type_ids_or.ValueOrDie();
+      target_schema_type_ids_.insert(schema_type_ids->begin(),
+                                     schema_type_ids->end());
     }
   }
 }
 
 libtextclassifier3::Status DocHitInfoIteratorFilter::Advance() {
-  if (!delegate_->Advance().ok()) {
-    // Didn't find anything on the delegate iterator.
-    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
-    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
-    return absl_ports::ResourceExhaustedError(
-        "No more DocHitInfos in iterator");
-  }
-
-  if (current_time_milliseconds_ < 0) {
-    // This shouldn't happen, but we add a sanity check here for any unknown
-    // errors.
-    return absl_ports::InternalError(
-        "Couldn't get current time. Try again in a bit");
-  }
-
-  if (options_.filter_deleted) {
-    if (!document_store_.DoesDocumentExist(
-            delegate_->doc_hit_info().document_id())) {
-      // Document doesn't exist, keep searching
-      return Advance();
+  while (delegate_->Advance().ok()) {
+    // Try to get the DocumentFilterData
+    auto document_filter_data_optional =
+        document_store_.GetAliveDocumentFilterData(
+            delegate_->doc_hit_info().document_id(), current_time_ms_);
+    if (!document_filter_data_optional) {
+      // Didn't find the DocumentFilterData in the filter cache. This could be
+      // because the Document doesn't exist or the DocumentId isn't valid or the
+      // filter cache is in some invalid state. This is bad, but not the query's
+      // responsibility to fix, so just skip this result for now.
+      continue;
     }
-  }
-
-  // Try to get the DocumentFilterData
-  auto document_filter_data_or = document_store_.GetDocumentFilterData(
-      delegate_->doc_hit_info().document_id());
-  if (!document_filter_data_or.ok()) {
-    // Didn't find the DocumentFilterData in the filter cache. This could be
-    // because the DocumentId isn't valid or the filter cache is in some invalid
-    // state. This is bad, but not the query's responsibility to fix, so just
-    // skip this result for now.
-    return Advance();
-  }
-  // We should be guaranteed that this exists now.
-  DocumentFilterData data = std::move(document_filter_data_or).ValueOrDie();
+    // We should be guaranteed that this exists now.
+    DocumentFilterData data = document_filter_data_optional.value();
 
-  if (!options_.namespaces.empty() &&
-      target_namespace_ids_.count(data.namespace_id()) == 0) {
-    // Doesn't match one of the specified namespaces. Keep searching
-    return Advance();
-  }
+    if (!options_.namespaces.empty() &&
+        target_namespace_ids_.count(data.namespace_id()) == 0) {
+      // Doesn't match one of the specified namespaces. Keep searching
+      continue;
+    }
 
-  if (!options_.schema_types.empty() &&
-      target_schema_type_ids_.count(data.schema_type_id()) == 0) {
-    // Doesn't match one of the specified schema types. Keep searching
-    return Advance();
-  }
+    if (!options_.schema_types.empty() &&
+        target_schema_type_ids_.count(data.schema_type_id()) == 0) {
+      // Doesn't match one of the specified schema types. Keep searching
+      continue;
+    }
 
-  if (current_time_milliseconds_ >= data.expiration_timestamp_ms()) {
-    // Current time has exceeded the document's expiration time
-    return Advance();
+    // Satisfied all our specified filters
+    doc_hit_info_ = delegate_->doc_hit_info();
+    return libtextclassifier3::Status::OK;
   }
 
-  // Satisfied all our specified filters
-  doc_hit_info_ = delegate_->doc_hit_info();
-  hit_intersect_section_ids_mask_ = delegate_->hit_intersect_section_ids_mask();
-  return libtextclassifier3::Status::OK;
+  // Didn't find anything on the delegate iterator.
+  doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+  return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
 }
 
-int32_t DocHitInfoIteratorFilter::GetNumBlocksInspected() const {
-  return delegate_->GetNumBlocksInspected();
-}
-
-int32_t DocHitInfoIteratorFilter::GetNumLeafAdvanceCalls() const {
-  return delegate_->GetNumLeafAdvanceCalls();
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorFilter::TrimRightMostNode() && {
+  ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
+                         std::move(*delegate_).TrimRightMostNode());
+  if (trimmed_delegate.iterator_ != nullptr) {
+    trimmed_delegate.iterator_ = std::make_unique<DocHitInfoIteratorFilter>(
+        std::move(trimmed_delegate.iterator_), &document_store_, &schema_store_,
+        options_, current_time_ms_);
+  }
+  return trimmed_delegate;
 }
 
 std::string DocHitInfoIteratorFilter::ToString() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.h b/icing/index/iterator/doc-hit-info-iterator-filter.h
index bf027e4..608665e 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.h
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.h
@@ -20,6 +20,7 @@
 #include <string>
 #include <string_view>
 #include <unordered_set>
+#include <utility>
 #include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
@@ -27,7 +28,6 @@
 #include "icing/schema/schema-store.h"
 #include "icing/store/document-store.h"
 #include "icing/store/namespace-id.h"
-#include "icing/util/clock.h"
 
 namespace icing {
 namespace lib {
@@ -37,10 +37,6 @@ namespace lib {
 class DocHitInfoIteratorFilter : public DocHitInfoIterator {
  public:
   struct Options {
-    // Filter out/don't return DocHitInfos that are associated with nonexistent
-    // Documents.
-    bool filter_deleted = true;
-
     // List of namespaces that documents must have. An empty vector means that
     // all namespaces are valid, and no documents will be filtered out.
     //
@@ -61,16 +57,27 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator {
   explicit DocHitInfoIteratorFilter(
       std::unique_ptr<DocHitInfoIterator> delegate,
       const DocumentStore* document_store, const SchemaStore* schema_store,
-      const Clock* clock, const Options& options);
+      const Options& options, int64_t current_time_ms);
 
   libtextclassifier3::Status Advance() override;
 
-  int32_t GetNumBlocksInspected() const override;
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
+  void MapChildren(const ChildrenMapper& mapper) override {
+    delegate_ = mapper(std::move(delegate_));
+  }
 
-  int32_t GetNumLeafAdvanceCalls() const override;
+  CallStats GetCallStats() const override { return delegate_->GetCallStats(); }
 
   std::string ToString() const override;
 
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    delegate_->PopulateMatchedTermsStats(matched_terms_stats,
+                                         filtering_section_mask);
+  }
+
  private:
   std::unique_ptr<DocHitInfoIterator> delegate_;
   const DocumentStore& document_store_;
@@ -78,7 +85,7 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator {
   const Options options_;
   std::unordered_set<NamespaceId> target_namespace_ids_;
   std::unordered_set<SchemaTypeId> target_schema_type_ids_;
-  const int64_t current_time_milliseconds_;
+  int64_t current_time_ms_;
 };
 
 }  // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
index e769013..0ed4d02 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
@@ -17,6 +17,7 @@
 #include <limits>
 #include <memory>
 #include <string>
+#include <string_view>
 #include <utility>
 #include <vector>
 
@@ -25,9 +26,12 @@
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
 #include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-and.h"
 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
@@ -45,6 +49,18 @@ using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::IsEmpty;
 
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+    const Filesystem* filesystem, const std::string& base_dir,
+    const Clock* clock, const SchemaStore* schema_store) {
+  return DocumentStore::Create(
+      filesystem, base_dir, clock, schema_store,
+      /*force_recovery_and_revalidate_documents=*/false,
+      /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+      /*use_persistent_hash_map=*/false,
+      PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+      /*initialize_stats=*/nullptr);
+}
+
 class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test {
  protected:
   DocHitInfoIteratorDeletedFilterTest()
@@ -59,18 +75,22 @@ class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test {
     test_document3_ =
         DocumentBuilder().SetKey("icing", "email/3").SetSchema("email").Build();
 
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type("email");
-
-    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                               SchemaStore::Create(&filesystem_, test_dir_));
-    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("email"))
+            .Build();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
 
     ICING_ASSERT_OK_AND_ASSIGN(
-        document_store_,
-        DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                              schema_store_.get()));
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+    document_store_ = std::move(create_result.document_store);
   }
 
   void TearDown() override {
@@ -100,38 +120,11 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, EmptyOriginalIterator) {
 
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator_empty), document_store_.get(),
-      schema_store_.get(), &fake_clock_, options_);
+      schema_store_.get(), options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
 
-TEST_F(DocHitInfoIteratorDeletedFilterTest, TurnOffDeletedFilterOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             document_store_->Put(test_document1_));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             document_store_->Put(test_document2_));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
-                             document_store_->Put(test_document3_));
-
-  // Deletes test document 2
-  ICING_ASSERT_OK(document_store_->Delete(test_document2_.namespace_(),
-                                          test_document2_.uri()));
-
-  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1),
-                                           DocHitInfo(document_id2),
-                                           DocHitInfo(document_id3)};
-  std::unique_ptr<DocHitInfoIterator> original_iterator =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
-
-  options_.filter_deleted = false;
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
-
-  EXPECT_THAT(GetDocumentIds(&filtered_iterator),
-              ElementsAre(document_id1, document_id2, document_id3));
-}
-
 TEST_F(DocHitInfoIteratorDeletedFilterTest, DeletedDocumentsAreFiltered) {
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(test_document1_));
@@ -140,8 +133,9 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, DeletedDocumentsAreFiltered) {
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
                              document_store_->Put(test_document3_));
   // Deletes test document 2
-  ICING_ASSERT_OK(document_store_->Delete(test_document2_.namespace_(),
-                                          test_document2_.uri()));
+  ICING_ASSERT_OK(document_store_->Delete(
+      test_document2_.namespace_(), test_document2_.uri(),
+      fake_clock_.GetSystemTimeMilliseconds()));
 
   std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1),
                                            DocHitInfo(document_id2),
@@ -151,7 +145,7 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, DeletedDocumentsAreFiltered) {
 
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator),
               ElementsAre(document_id1, document_id3));
@@ -177,7 +171,7 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, NonExistingDocumentsAreFiltered) {
 
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator),
               ElementsAre(document_id1, document_id2, document_id3));
@@ -190,7 +184,7 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, NegativeDocumentIdIsIgnored) {
 
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(filtered_iterator.Advance(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -204,7 +198,7 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, InvalidDocumentIdIsIgnored) {
 
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(filtered_iterator.Advance(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -221,7 +215,7 @@ TEST_F(DocHitInfoIteratorDeletedFilterTest, GreaterThanMaxDocumentIdIsIgnored) {
 
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(filtered_iterator.Advance(),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -251,18 +245,22 @@ class DocHitInfoIteratorNamespaceFilterTest : public ::testing::Test {
                                 .SetSchema("email")
                                 .Build();
 
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type("email");
-
-    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                               SchemaStore::Create(&filesystem_, test_dir_));
-    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("email"))
+            .Build();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
 
     ICING_ASSERT_OK_AND_ASSIGN(
-        document_store_,
-        DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                              schema_store_.get()));
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+    document_store_ = std::move(create_result.document_store);
   }
 
   void TearDown() override {
@@ -295,7 +293,7 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, EmptyOriginalIterator) {
   options_.namespaces = std::vector<std::string_view>{};
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator_empty), document_store_.get(),
-      schema_store_.get(), &fake_clock_, options_);
+      schema_store_.get(), options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
@@ -313,7 +311,7 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest,
   options_.namespaces = std::vector<std::string_view>{"nonexistent_namespace"};
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
@@ -330,7 +328,7 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, NoNamespacesReturnsAll) {
   options_.namespaces = std::vector<std::string_view>{};
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
 }
@@ -354,7 +352,7 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest,
   options_.namespaces = std::vector<std::string_view>{namespace1_};
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator),
               ElementsAre(document_id1, document_id2));
@@ -380,7 +378,7 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, FilterForMultipleNamespacesOk) {
   options_.namespaces = std::vector<std::string_view>{namespace1_, namespace3_};
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator),
               ElementsAre(document_id1, document_id2, document_id4));
@@ -388,36 +386,58 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, FilterForMultipleNamespacesOk) {
 
 class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test {
  protected:
+  static constexpr std::string_view kSchema1 = "email";
+  static constexpr std::string_view kSchema2 = "message";
+  static constexpr std::string_view kSchema3 = "person";
+  static constexpr std::string_view kSchema4 = "artist";
+  static constexpr std::string_view kSchema5 = "emailMessage";
+
   DocHitInfoIteratorSchemaTypeFilterTest()
       : test_dir_(GetTestTempDir() + "/icing") {}
 
   void SetUp() override {
     filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
-    document1_schema1_ =
-        DocumentBuilder().SetKey("namespace", "1").SetSchema(schema1_).Build();
-    document2_schema2_ =
-        DocumentBuilder().SetKey("namespace", "2").SetSchema(schema2_).Build();
-    document3_schema3_ =
-        DocumentBuilder().SetKey("namespace", "3").SetSchema(schema3_).Build();
-    document4_schema1_ =
-        DocumentBuilder().SetKey("namespace", "4").SetSchema(schema1_).Build();
-
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type(schema1_);
-    type_config = schema.add_types();
-    type_config->set_schema_type(schema2_);
-    type_config = schema.add_types();
-    type_config->set_schema_type(schema3_);
-
-    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                               SchemaStore::Create(&filesystem_, test_dir_));
-    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+    document1_schema1_ = DocumentBuilder()
+                             .SetKey("namespace", "1")
+                             .SetSchema(std::string(kSchema1))
+                             .Build();
+    document2_schema2_ = DocumentBuilder()
+                             .SetKey("namespace", "2")
+                             .SetSchema(std::string(kSchema2))
+                             .Build();
+    document3_schema3_ = DocumentBuilder()
+                             .SetKey("namespace", "3")
+                             .SetSchema(std::string(kSchema3))
+                             .Build();
+    document4_schema1_ = DocumentBuilder()
+                             .SetKey("namespace", "4")
+                             .SetSchema(std::string(kSchema1))
+                             .Build();
+
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType(kSchema1))
+            .AddType(SchemaTypeConfigBuilder().SetType(kSchema2))
+            .AddType(SchemaTypeConfigBuilder().SetType(kSchema3))
+            .AddType(SchemaTypeConfigBuilder().SetType(kSchema4).AddParentType(
+                kSchema3))
+            .AddType(SchemaTypeConfigBuilder()
+                         .SetType(std::string(kSchema5))
+                         .AddParentType(kSchema1)
+                         .AddParentType(kSchema2))
+            .Build();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
 
     ICING_ASSERT_OK_AND_ASSIGN(
-        document_store_,
-        DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                              schema_store_.get()));
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+    document_store_ = std::move(create_result.document_store);
   }
 
   void TearDown() override {
@@ -433,9 +453,6 @@ class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test {
   FakeClock fake_clock_;
   const Filesystem filesystem_;
   const std::string test_dir_;
-  const std::string schema1_ = "email";
-  const std::string schema2_ = "message";
-  const std::string schema3_ = "person";
   DocumentProto document1_schema1_;
   DocumentProto document2_schema2_;
   DocumentProto document3_schema3_;
@@ -450,7 +467,7 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, EmptyOriginalIterator) {
   options_.schema_types = std::vector<std::string_view>{};
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator_empty), document_store_.get(),
-      schema_store_.get(), &fake_clock_, options_);
+      schema_store_.get(), options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
@@ -469,7 +486,7 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
       std::vector<std::string_view>{"nonexistent_schema_type"};
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
@@ -486,7 +503,7 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, NoSchemaTypesReturnsAll) {
   options_.schema_types = std::vector<std::string_view>{};
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
 }
@@ -504,10 +521,10 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  options_.schema_types = std::vector<std::string_view>{schema1_};
+  options_.schema_types = std::vector<std::string_view>{kSchema1};
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
 }
@@ -527,15 +544,119 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, FilterForMultipleSchemaTypesOk) {
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  options_.schema_types = std::vector<std::string_view>{schema2_, schema3_};
+  options_.schema_types = std::vector<std::string_view>{kSchema2, kSchema3};
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator),
               ElementsAre(document_id2, document_id3));
 }
 
+TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
+       FilterForSchemaTypePolymorphismOk) {
+  // Add some irrelevant documents.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1_schema1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2_schema2_));
+
+  // Create a person document and an artist document, where the artist should be
+  // able to be interpreted as a person by polymorphism.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId person_document_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "person")
+                               .SetSchema("person")
+                               .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId artist_document_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "artist")
+                               .SetSchema("artist")
+                               .Build()));
+
+  std::vector<DocHitInfo> doc_hit_infos = {
+      DocHitInfo(document_id1), DocHitInfo(document_id2),
+      DocHitInfo(person_document_id), DocHitInfo(artist_document_id)};
+
+  // Filters for the "person" type should also include the "artist" type.
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+  options_.schema_types = {"person"};
+  DocHitInfoIteratorFilter filtered_iterator_1(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      options_, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator_1),
+              ElementsAre(person_document_id, artist_document_id));
+
+  // Filters for the "artist" type should not include the "person" type.
+  original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+  options_.schema_types = {"artist"};
+  DocHitInfoIteratorFilter filtered_iterator_2(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      options_, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator_2),
+              ElementsAre(artist_document_id));
+}
+
+TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
+       FilterForSchemaTypeMultipleParentPolymorphismOk) {
+  // Create an email and a message document.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId email_document_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "email")
+                               .SetSchema("email")
+                               .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId message_document_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "message")
+                               .SetSchema("message")
+                               .Build()));
+
+  // Create a emailMessage document, which the should be able to be interpreted
+  // as both an email and a message by polymorphism.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId email_message_document_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "emailMessage")
+                               .SetSchema("emailMessage")
+                               .Build()));
+
+  std::vector<DocHitInfo> doc_hit_infos = {
+      DocHitInfo(email_document_id), DocHitInfo(message_document_id),
+      DocHitInfo(email_message_document_id)};
+
+  // Filters for the "email" type should also include the "emailMessage" type.
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+  options_.schema_types = std::vector<std::string_view>{"email"};
+  DocHitInfoIteratorFilter filtered_iterator_1(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      options_, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator_1),
+              ElementsAre(email_document_id, email_message_document_id));
+
+  // Filters for the "message" type should also include the "emailMessage" type.
+  original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+  options_.schema_types = std::vector<std::string_view>{"message"};
+  DocHitInfoIteratorFilter filtered_iterator_2(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      options_, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator_2),
+              ElementsAre(message_document_id, email_message_document_id));
+
+  // Filters for a irrelevant type should return nothing.
+  original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+  options_.schema_types = std::vector<std::string_view>{"person"};
+  DocHitInfoIteratorFilter filtered_iterator_3(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      options_, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator_3), IsEmpty());
+}
+
 class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
  protected:
   DocHitInfoIteratorExpirationFilterTest()
@@ -544,18 +665,22 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
   void SetUp() override {
     filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
 
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type(email_schema_);
-
-    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                               SchemaStore::Create(&filesystem_, test_dir_));
-    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType(email_schema_))
+            .Build();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
 
     ICING_ASSERT_OK_AND_ASSIGN(
-        document_store_,
-        DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                              schema_store_.get()));
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+    document_store_ = std::move(create_result.document_store);
   }
 
   void TearDown() override {
@@ -576,6 +701,16 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
 };
 
 TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) {
+  // Arbitrary value
+  fake_clock_.SetSystemTimeMilliseconds(100);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   // Insert a document
   DocumentProto document = DocumentBuilder()
                                .SetKey("namespace", "1")
@@ -584,23 +719,30 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) {
                                .SetTtlMs(0)
                                .Build();
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             document_store_->Put(document));
+                             document_store->Put(document));
 
   std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  // Arbitrary value
-  fake_clock_.SetSystemTimeMilliseconds(100);
-
   DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      std::move(original_iterator), document_store.get(), schema_store_.get(),
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
 }
 
 TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) {
+  // Arbitrary value, but must be less than document's creation_timestamp + ttl
+  fake_clock_.SetSystemTimeMilliseconds(50);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   // Insert a document
   DocumentProto document = DocumentBuilder()
                                .SetKey("namespace", "1")
@@ -609,92 +751,84 @@ TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) {
                                .SetTtlMs(100)
                                .Build();
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             document_store_->Put(document));
+                             document_store->Put(document));
 
   std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  // Arbitrary value, but must be less than document's creation_timestamp + ttl
-  fake_clock_.SetSystemTimeMilliseconds(50);
-
   DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      std::move(original_iterator), document_store.get(), schema_store_.get(),
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
 }
 
 TEST_F(DocHitInfoIteratorExpirationFilterTest, EqualTtlFilteredOut) {
+  // Current time is exactly the document's creation_timestamp + ttl
+  fake_clock_.SetSystemTimeMilliseconds(150);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   // Insert a document
   DocumentProto document = DocumentBuilder()
                                .SetKey("namespace", "1")
                                .SetSchema(email_schema_)
-                               .SetCreationTimestampMs(0)
+                               .SetCreationTimestampMs(50)
                                .SetTtlMs(100)
                                .Build();
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             document_store_->Put(document));
+                             document_store->Put(document));
 
   std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  // Current time is exactly the document's creation_timestamp + ttl
-  fake_clock_.SetSystemTimeMilliseconds(100);
-
   DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      std::move(original_iterator), document_store.get(), schema_store_.get(),
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
 
 TEST_F(DocHitInfoIteratorExpirationFilterTest, PastTtlFilteredOut) {
+  // Arbitrary value, but must be greater than the document's
+  // creation_timestamp + ttl
+  fake_clock_.SetSystemTimeMilliseconds(151);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   // Insert a document
   DocumentProto document = DocumentBuilder()
                                .SetKey("namespace", "1")
                                .SetSchema(email_schema_)
-                               .SetCreationTimestampMs(0)
+                               .SetCreationTimestampMs(50)
                                .SetTtlMs(100)
                                .Build();
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             document_store_->Put(document));
+                             document_store->Put(document));
 
   std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  // Arbitrary value, but must be greater than the document's
-  // creation_timestamp + ttl
-  fake_clock_.SetSystemTimeMilliseconds(101);
-
   DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
+      std::move(original_iterator), document_store.get(), schema_store_.get(),
+      options_, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
 }
 
-TEST_F(DocHitInfoIteratorExpirationFilterTest,
-       InvalidTimeFiltersReturnsInternalError) {
-  // Put something in the original iterator so we don't get a ResourceExhausted
-  // error
-  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(/*document_id_in=*/0)};
-  std::unique_ptr<DocHitInfoIterator> original_iterator =
-      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
-
-  // -1 is an invalid timestamp
-  fake_clock_.SetSystemTimeMilliseconds(-1);
-
-  DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options_);
-
-  EXPECT_THAT(filtered_iterator.Advance(),
-              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-}
-
 class DocHitInfoIteratorFilterTest : public ::testing::Test {
  protected:
   DocHitInfoIteratorFilterTest() : test_dir_(GetTestTempDir() + "/icing") {}
@@ -728,24 +862,27 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test {
     document5_namespace1_schema1_ = DocumentBuilder()
                                         .SetKey(namespace1_, "5")
                                         .SetSchema(schema1_)
-                                        .SetCreationTimestampMs(0)
+                                        .SetCreationTimestampMs(1)
                                         .SetTtlMs(100)
                                         .Build();
 
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type(schema1_);
-    type_config = schema.add_types();
-    type_config->set_schema_type(schema2_);
-
-    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                               SchemaStore::Create(&filesystem_, test_dir_));
-    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType(schema1_))
+            .AddType(SchemaTypeConfigBuilder().SetType(schema2_))
+            .Build();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
 
     ICING_ASSERT_OK_AND_ASSIGN(
-        document_store_,
-        DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                              schema_store_.get()));
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+    document_store_ = std::move(create_result.document_store);
   }
 
   void TearDown() override {
@@ -773,26 +910,37 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test {
 };
 
 TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) {
+  // Filters out document5 since it's expired
+  fake_clock_.SetSystemTimeMilliseconds(199);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentId document_id1,
-      document_store_->Put(document1_namespace1_schema1_));
+      document_store->Put(document1_namespace1_schema1_));
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentId document_id2,
-      document_store_->Put(document2_namespace1_schema1_));
+      document_store->Put(document2_namespace1_schema1_));
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentId document_id3,
-      document_store_->Put(document3_namespace2_schema1_));
+      document_store->Put(document3_namespace2_schema1_));
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentId document_id4,
-      document_store_->Put(document4_namespace1_schema2_));
+      document_store->Put(document4_namespace1_schema2_));
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentId document_id5,
-      document_store_->Put(document5_namespace1_schema1_));
+      document_store->Put(document5_namespace1_schema1_));
 
   // Deletes document2, causing it to be filtered out
   ICING_ASSERT_OK(
-      document_store_->Delete(document2_namespace1_schema1_.namespace_(),
-                              document2_namespace1_schema1_.uri()));
+      document_store->Delete(document2_namespace1_schema1_.namespace_(),
+                             document2_namespace1_schema1_.uri(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
 
   std::vector<DocHitInfo> doc_hit_infos = {
       DocHitInfo(document_id1), DocHitInfo(document_id2),
@@ -810,13 +958,9 @@ TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) {
   // Filters out document4 by schema type
   options.schema_types = std::vector<std::string_view>{schema1_};
 
-  // Filters out document5 since it's expired
-  FakeClock fake_clock;
-  fake_clock.SetSystemTimeMilliseconds(199);
-
   DocHitInfoIteratorFilter filtered_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock, options);
+      std::move(original_iterator), document_store.get(), schema_store_.get(),
+      options, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
 }
@@ -849,7 +993,7 @@ TEST_F(DocHitInfoIteratorFilterTest, SectionIdMasksArePopulatedCorrectly) {
   DocHitInfoIteratorFilter::Options options;
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options);
+      options, fake_clock_.GetSystemTimeMilliseconds());
 
   EXPECT_THAT(GetDocHitInfos(&filtered_iterator),
               ElementsAre(EqualsDocHitInfo(document_id1, section_ids1),
@@ -857,28 +1001,71 @@ TEST_F(DocHitInfoIteratorFilterTest, SectionIdMasksArePopulatedCorrectly) {
                           EqualsDocHitInfo(document_id3, section_ids3)));
 }
 
-TEST_F(DocHitInfoIteratorFilterTest, GetNumBlocksInspected) {
+TEST_F(DocHitInfoIteratorFilterTest, GetCallStats) {
+  DocHitInfoIterator::CallStats original_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/2,
+      /*num_leaf_advance_calls_main_index_in=*/5,
+      /*num_leaf_advance_calls_integer_index_in=*/3,
+      /*num_leaf_advance_calls_no_index_in=*/1,
+      /*num_blocks_inspected_in=*/4);  // arbitrary value
   auto original_iterator = std::make_unique<DocHitInfoIteratorDummy>();
-  original_iterator->SetNumBlocksInspected(5);
+  original_iterator->SetCallStats(original_call_stats);
 
   DocHitInfoIteratorFilter::Options options;
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options);
+      options, fake_clock_.GetSystemTimeMilliseconds());
 
-  EXPECT_THAT(filtered_iterator.GetNumBlocksInspected(), Eq(5));
+  EXPECT_THAT(filtered_iterator.GetCallStats(), Eq(original_call_stats));
 }
 
-TEST_F(DocHitInfoIteratorFilterTest, GetNumLeafAdvanceCalls) {
-  auto original_iterator = std::make_unique<DocHitInfoIteratorDummy>();
-  original_iterator->SetNumLeafAdvanceCalls(6);
+TEST_F(DocHitInfoIteratorFilterTest, TrimFilterIterator) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(document1_namespace1_schema1_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(document2_namespace1_schema1_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store_->Put(document3_namespace2_schema1_));
+
+  // Build an interator tree like:
+  //                Filter
+  //                   |
+  //                  AND
+  //             /           \
+  //          {1, 3}         {2}
+  std::vector<DocHitInfo> left_vector = {DocHitInfo(document_id1),
+                                         DocHitInfo(document_id3)};
+  std::vector<DocHitInfo> right_vector = {DocHitInfo(document_id2)};
+
+  std::unique_ptr<DocHitInfoIterator> left_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(left_vector);
+  std::unique_ptr<DocHitInfoIterator> right_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(right_vector, "term", 10);
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorAnd>(std::move(left_iter),
+                                              std::move(right_iter));
 
   DocHitInfoIteratorFilter::Options options;
+  // Filters out document3 by namespace
+  options.namespaces = std::vector<std::string_view>{namespace1_};
   DocHitInfoIteratorFilter filtered_iterator(
       std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      &fake_clock_, options);
-
-  EXPECT_THAT(filtered_iterator.GetNumLeafAdvanceCalls(), Eq(6));
+      options, fake_clock_.GetSystemTimeMilliseconds());
+
+  // The trimmed tree.
+  //          Filter
+  //             |
+  //          {1, 3}
+  ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+                             std::move(filtered_iterator).TrimRightMostNode());
+  EXPECT_THAT(trimmed_node.term_, Eq("term"));
+  EXPECT_THAT(trimmed_node.term_start_index_, Eq(10));
+  EXPECT_THAT(GetDocumentIds(trimmed_node.iterator_.get()),
+              ElementsAre(document_id1));
 }
 
 }  // namespace
diff --git a/icing/index/iterator/doc-hit-info-iterator-none.h b/icing/index/iterator/doc-hit-info-iterator-none.h
new file mode 100644
index 0000000..c2853f1
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-none.h
@@ -0,0 +1,52 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_NONE_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_NONE_H_
+
+#include <cstdint>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+
+namespace icing {
+namespace lib {
+
+// Iterator that will return no results.
+class DocHitInfoIteratorNone : public DocHitInfoIterator {
+ public:
+  libtextclassifier3::Status Advance() override {
+    return absl_ports::ResourceExhaustedError(
+        "DocHitInfoIterator NONE has no hits.");
+  }
+
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
+    TrimmedNode node = {nullptr, /*term=*/"", /*term_start_index_=*/0,
+                        /*unnormalized_term_length_=*/0};
+    return node;
+  }
+
+  void MapChildren(const ChildrenMapper& mapper) override {}
+
+  CallStats GetCallStats() const override { return CallStats(); }
+
+  std::string ToString() const override { return "(NONE)"; }
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_NONE_H_
diff --git a/icing/index/iterator/doc-hit-info-iterator-not.cc b/icing/index/iterator/doc-hit-info-iterator-not.cc
index e1ece5c..10a8292 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-not.cc
@@ -15,13 +15,15 @@
 #include "icing/index/iterator/doc-hit-info-iterator-not.h"
 
 #include <cstdint>
+#include <memory>
+#include <utility>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/absl_ports/str_cat.h"
 #include "icing/index/hit/doc-hit-info.h"
 #include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
-#include "icing/schema/section.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/store/document-id.h"
 
 namespace icing {
@@ -35,40 +37,40 @@ DocHitInfoIteratorNot::DocHitInfoIteratorNot(
           DocHitInfoIteratorAllDocumentId(document_id_limit)) {}
 
 libtextclassifier3::Status DocHitInfoIteratorNot::Advance() {
-  if (!all_document_id_iterator_.Advance().ok()) {
-    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
-    return absl_ports::ResourceExhaustedError(
-        "No more DocHitInfos in iterator");
-  }
+  while (all_document_id_iterator_.Advance().ok()) {
+    if (all_document_id_iterator_.doc_hit_info().document_id() <
+        to_be_excluded_->doc_hit_info().document_id()) {
+      // Since DocumentIds are returned from DocHitInfoIterators in decreasing
+      // order, we have passed the last NOT result if we're smaller than its
+      // DocumentId. Advance the NOT result if so.
+      to_be_excluded_->Advance().IgnoreError();
+    }
 
-  if (all_document_id_iterator_.doc_hit_info().document_id() <
-      to_be_excluded_->doc_hit_info().document_id()) {
-    // Since DocumentIds are returned from DocHitInfoIterators in decreasing
-    // order, we have passed the last NOT result if we're smaller than its
-    // DocumentId. Advance the NOT result if so.
-    to_be_excluded_->Advance().IgnoreError();
-  }
+    if (all_document_id_iterator_.doc_hit_info().document_id() ==
+        to_be_excluded_->doc_hit_info().document_id()) {
+      // This is a NOT result, skip and Advance to the next result.
+      continue;
+    }
 
-  if (all_document_id_iterator_.doc_hit_info().document_id() ==
-      to_be_excluded_->doc_hit_info().document_id()) {
-    // This is a NOT result, skip and Advance to the next result.
-    return Advance();
+    // No errors, we've found a valid result
+    doc_hit_info_ = all_document_id_iterator_.doc_hit_info();
+    return libtextclassifier3::Status::OK;
   }
 
-  // No errors, we've found a valid result
-  doc_hit_info_ = all_document_id_iterator_.doc_hit_info();
-
-  return libtextclassifier3::Status::OK;
+  // Didn't find a hit, return with error
+  doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+  return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
 }
 
-int32_t DocHitInfoIteratorNot::GetNumBlocksInspected() const {
-  return to_be_excluded_->GetNumBlocksInspected() +
-         all_document_id_iterator_.GetNumBlocksInspected();
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorNot::TrimRightMostNode() && {
+  // Don't generate suggestion if the last operator is NOT.
+  return absl_ports::InvalidArgumentError(
+      "Cannot generate suggestion if the last term is NOT operator.");
 }
 
-int32_t DocHitInfoIteratorNot::GetNumLeafAdvanceCalls() const {
-  return to_be_excluded_->GetNumLeafAdvanceCalls() +
-         all_document_id_iterator_.GetNumLeafAdvanceCalls();
+void DocHitInfoIteratorNot::MapChildren(const ChildrenMapper& mapper) {
+  to_be_excluded_ = mapper(std::move(to_be_excluded_));
 }
 
 std::string DocHitInfoIteratorNot::ToString() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-not.h b/icing/index/iterator/doc-hit-info-iterator-not.h
index 58e909d..11575fb 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not.h
+++ b/icing/index/iterator/doc-hit-info-iterator-not.h
@@ -30,14 +30,12 @@ namespace lib {
 // Iterator that will return all documents that are *not* specified by the
 // to_be_excluded_iterator.
 //
-// NOTE: The hit_intersect_section_ids_mask is meaningless for this iterator.
+// NOTE: doc_hit_info_.hit_section_ids_mask() is meaningless for this iterator.
 // When this iterator produces a result, it's because the Document was not
 // present in the to_be_excluded_iterator. There is no concept of the Document
 // having been chosen because it's term was in a specific section. Since we
 // don't know anything about the sections for the Document, the
-// hit_intersect_section_ids_mask is always kSectionIdMaskNone. Correspondingly,
-// this means that the doc_hit_info.hit_section_ids_mask will also always be
-// kSectionIdMaskNone.
+// doc_hit_info.hit_section_ids_mask() is always kSectionIdMaskNone.
 class DocHitInfoIteratorNot : public DocHitInfoIterator {
  public:
   // to_be_excluded_iterator: The results of this iterator will be excluded
@@ -50,9 +48,17 @@ class DocHitInfoIteratorNot : public DocHitInfoIterator {
 
   libtextclassifier3::Status Advance() override;
 
-  int32_t GetNumBlocksInspected() const override;
+  // The NOT operator is not suppose to be trimmed.
+  // We shouldn't generate suggestion for the last term if the last term belongs
+  // to NOT operator.
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
 
-  int32_t GetNumLeafAdvanceCalls() const override;
+  void MapChildren(const ChildrenMapper& mapper) override;
+
+  CallStats GetCallStats() const override {
+    return to_be_excluded_->GetCallStats() +
+           all_document_id_iterator_.GetCallStats();
+  }
 
   std::string ToString() const override;
 
diff --git a/icing/index/iterator/doc-hit-info-iterator-not_test.cc b/icing/index/iterator/doc-hit-info-iterator-not_test.cc
index 5d0e4ac..a8c835f 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-not_test.cc
@@ -102,40 +102,39 @@ TEST(DocHitInfoIteratorNotTest, AllDocumentIdOverlapOk) {
   EXPECT_THAT(GetDocumentIds(&not_iterator), IsEmpty());
 }
 
-TEST(DocHitInfoIteratorNotTest, GetNumBlocksInspected) {
-  int to_be_excluded_iterator_blocks = 4;  // arbitrary value
+TEST(DocHitInfoIteratorNotTest, GetCallStats) {
+  DocHitInfoIterator::CallStats to_be_excluded_iterator_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/2,
+      /*num_leaf_advance_calls_main_index_in=*/5,
+      /*num_leaf_advance_calls_integer_index_in=*/3,
+      /*num_leaf_advance_calls_no_index_in=*/1,
+      /*num_blocks_inspected_in=*/4);  // arbitrary value
   auto to_be_excluded_iterator = std::make_unique<DocHitInfoIteratorDummy>();
-  to_be_excluded_iterator->SetNumBlocksInspected(
-      to_be_excluded_iterator_blocks);
-
-  DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
-                                     /*document_id_limit=*/5);
-
-  // The AllDocumentId iterator doesn't count any blocks as being inspected
-  // since it's just decrementing 1 from the document_id_limit.
-  EXPECT_THAT(not_iterator.GetNumBlocksInspected(),
-              Eq(to_be_excluded_iterator_blocks));
-}
-
-TEST(DocHitInfoIteratorNotTest, GetNumLeafAdvanceCalls) {
-  int to_be_excluded_iterator_leaves = 4;  // arbitrary value
-  auto to_be_excluded_iterator = std::make_unique<DocHitInfoIteratorDummy>();
-  to_be_excluded_iterator->SetNumLeafAdvanceCalls(
-      to_be_excluded_iterator_leaves);
+  to_be_excluded_iterator->SetCallStats(to_be_excluded_iterator_call_stats);
 
   int all_document_id_limit = 5;
   // Since we iterate from [limit, 0] inclusive, add 1 for the 0th advance call
   int all_leaf_advance_calls = all_document_id_limit + 1;
   DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
-                                     all_document_id_limit);
+                                     /*document_id_limit=*/5);
 
   while (not_iterator.Advance().ok()) {
     // Advance through the whole not iterator
   }
 
-  // The AllDocumentId iterator counts each DocumentId as a leaf advance call
-  EXPECT_THAT(not_iterator.GetNumLeafAdvanceCalls(),
-              Eq(to_be_excluded_iterator_leaves + all_leaf_advance_calls));
+  // The AllDocumentId iterator doesn't count lite/main/integer index or blocks
+  // as being inspected since it's just decrementing 1 from the
+  // document_id_limit.
+  EXPECT_THAT(
+      not_iterator.GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          to_be_excluded_iterator_call_stats.num_leaf_advance_calls_lite_index,
+          to_be_excluded_iterator_call_stats.num_leaf_advance_calls_main_index,
+          to_be_excluded_iterator_call_stats
+              .num_leaf_advance_calls_integer_index,
+          to_be_excluded_iterator_call_stats.num_leaf_advance_calls_no_index +
+              all_leaf_advance_calls,
+          to_be_excluded_iterator_call_stats.num_blocks_inspected));
 }
 
 TEST(DocHitInfoIteratorNotTest, SectionIdsAlwaysNone) {
@@ -155,6 +154,17 @@ TEST(DocHitInfoIteratorNotTest, SectionIdsAlwaysNone) {
                           DocHitInfo(0, kSectionIdMaskNone)));
 }
 
+TEST(DocHitInfoIteratorNotTest, TrimNotIterator) {
+  std::vector<DocHitInfo> exclude_doc_hit_infos = {DocHitInfo(0)};
+  std::unique_ptr<DocHitInfoIterator> to_be_excluded_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(exclude_doc_hit_infos);
+
+  DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
+                                     /*document_id_limit=*/5);
+  EXPECT_THAT(std::move(not_iterator).TrimRightMostNode(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.cc b/icing/index/iterator/doc-hit-info-iterator-or.cc
index 9d18753..6251365 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-or.cc
@@ -20,7 +20,9 @@
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/absl_ports/str_cat.h"
 #include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/store/document-id.h"
+#include "icing/util/status-macros.h"
 
 namespace icing {
 namespace lib {
@@ -29,8 +31,6 @@ namespace {
 
 // When combining Or iterators, n-ary operator has better performance when
 // number of operands > 2 according to benchmark cl/243321264
-// TODO (samzheng): Tune this number when it's necessary, e.g. implementation
-// changes.
 constexpr int kBinaryOrIteratorPerformanceThreshold = 2;
 
 }  // namespace
@@ -59,6 +59,26 @@ DocHitInfoIteratorOr::DocHitInfoIteratorOr(
     std::unique_ptr<DocHitInfoIterator> right_it)
     : left_(std::move(left_it)), right_(std::move(right_it)) {}
 
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorOr::TrimRightMostNode() && {
+  // Trim the whole OR iterator. Only keep the prefix of the right iterator.
+  //
+  // The OR operator has higher priority, it is not possible that we have an
+  // unfinished prefix in the nested iterator right-most child we need to search
+  // suggestion for.
+  //
+  // eg: `foo OR (bar baz)` is not valid for search suggestion since there is no
+  // unfinished last term to be filled.
+  //
+  // If we need to trim a OR iterator for search suggestion, the right child
+  // must be the last term. We don't need left side information to
+  // generate suggestion for the right side.
+  ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_right,
+                         std::move(*right_).TrimRightMostNode());
+  trimmed_right.iterator_ = nullptr;
+  return trimmed_right;
+}
+
 libtextclassifier3::Status DocHitInfoIteratorOr::Advance() {
   // Cache the document_id of the left iterator for comparison to the right.
   DocumentId orig_left_document_id = left_document_id_;
@@ -94,7 +114,6 @@ libtextclassifier3::Status DocHitInfoIteratorOr::Advance() {
       right_document_id_ == kInvalidDocumentId) {
     // Reached the end, set these to invalid values and return
     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
-    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
     return absl_ports::ResourceExhaustedError(
         "No more DocHitInfos in iterator");
   }
@@ -110,27 +129,19 @@ libtextclassifier3::Status DocHitInfoIteratorOr::Advance() {
   } else {
     chosen = left_.get();
   }
+  current_ = chosen;
 
   doc_hit_info_ = chosen->doc_hit_info();
-  hit_intersect_section_ids_mask_ = chosen->hit_intersect_section_ids_mask();
 
   // If equal, combine.
   if (left_document_id_ == right_document_id_) {
-    doc_hit_info_.MergeSectionsFrom(right_->doc_hit_info());
-    hit_intersect_section_ids_mask_ &= right_->hit_intersect_section_ids_mask();
+    doc_hit_info_.MergeSectionsFrom(
+        right_->doc_hit_info().hit_section_ids_mask());
   }
 
   return libtextclassifier3::Status::OK;
 }
 
-int32_t DocHitInfoIteratorOr::GetNumBlocksInspected() const {
-  return left_->GetNumBlocksInspected() + right_->GetNumBlocksInspected();
-}
-
-int32_t DocHitInfoIteratorOr::GetNumLeafAdvanceCalls() const {
-  return left_->GetNumLeafAdvanceCalls() + right_->GetNumLeafAdvanceCalls();
-}
-
 std::string DocHitInfoIteratorOr::ToString() const {
   return absl_ports::StrCat("(", left_->ToString(), " OR ", right_->ToString(),
                             ")");
@@ -140,7 +151,28 @@ DocHitInfoIteratorOrNary::DocHitInfoIteratorOrNary(
     std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)
     : iterators_(std::move(iterators)) {}
 
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorOrNary::TrimRightMostNode() && {
+  // Trim the whole OR iterator.
+  //
+  // The OR operator has higher priority, it is not possible that we have an
+  // unfinished prefix in the nested iterator right-most child we need to search
+  // suggestion for.
+  //
+  // eg: `foo OR (bar baz)` is not valid for search suggestion since there is no
+  // unfinished last term to be filled.
+  //
+  // If we need to trim a OR iterator for search suggestion, the right-most
+  // child must be the last term. We don't need left side information to
+  // generate suggestion for the right side.
+  ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_right,
+                         std::move(*iterators_.back()).TrimRightMostNode());
+  trimmed_right.iterator_ = nullptr;
+  return trimmed_right;
+}
+
 libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() {
+  current_iterators_.clear();
   if (iterators_.size() < 2) {
     return absl_ports::InvalidArgumentError(
         "Not enough iterators to OR together");
@@ -150,7 +182,6 @@ libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() {
     // 0 is the smallest (last) DocumentId, can't advance further. Reset to
     // invalid values and return directly
     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
-    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
     return absl_ports::ResourceExhaustedError(
         "No more DocHitInfos in iterator");
   }
@@ -180,43 +211,31 @@ libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() {
     // None of the iterators had a next document_id, reset to invalid values and
     // return
     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
-    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
     return absl_ports::ResourceExhaustedError(
         "No more DocHitInfos in iterator");
   }
 
   // Found the next hit DocumentId, now calculate the section info.
-  hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
   for (const auto& iterator : iterators_) {
     if (iterator->doc_hit_info().document_id() == next_document_id) {
+      current_iterators_.push_back(iterator.get());
       if (doc_hit_info_.document_id() == kInvalidDocumentId) {
         doc_hit_info_ = iterator->doc_hit_info();
-        hit_intersect_section_ids_mask_ =
-            iterator->hit_intersect_section_ids_mask();
       } else {
-        doc_hit_info_.MergeSectionsFrom(iterator->doc_hit_info());
-        hit_intersect_section_ids_mask_ &=
-            iterator->hit_intersect_section_ids_mask();
+        doc_hit_info_.MergeSectionsFrom(
+            iterator->doc_hit_info().hit_section_ids_mask());
       }
     }
   }
   return libtextclassifier3::Status::OK;
 }
 
-int32_t DocHitInfoIteratorOrNary::GetNumBlocksInspected() const {
-  int32_t blockCount = 0;
-  for (const auto& iter : iterators_) {
-    blockCount += iter->GetNumBlocksInspected();
-  }
-  return blockCount;
-}
-
-int32_t DocHitInfoIteratorOrNary::GetNumLeafAdvanceCalls() const {
-  int32_t leafCount = 0;
+DocHitInfoIterator::CallStats DocHitInfoIteratorOrNary::GetCallStats() const {
+  CallStats call_stats;
   for (const auto& iter : iterators_) {
-    leafCount += iter->GetNumLeafAdvanceCalls();
+    call_stats += iter->GetCallStats();
   }
-  return leafCount;
+  return call_stats;
 }
 
 std::string DocHitInfoIteratorOrNary::ToString() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.h b/icing/index/iterator/doc-hit-info-iterator-or.h
index 4128e0f..8c0427b 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.h
+++ b/icing/index/iterator/doc-hit-info-iterator-or.h
@@ -16,7 +16,9 @@
 #define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_OR_H_
 
 #include <cstdint>
+#include <memory>
 #include <string>
+#include <utility>
 
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 
@@ -34,17 +36,44 @@ class DocHitInfoIteratorOr : public DocHitInfoIterator {
   explicit DocHitInfoIteratorOr(std::unique_ptr<DocHitInfoIterator> left_it,
                                 std::unique_ptr<DocHitInfoIterator> right_it);
 
-  libtextclassifier3::Status Advance() override;
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
 
-  int32_t GetNumBlocksInspected() const override;
+  libtextclassifier3::Status Advance() override;
 
-  int32_t GetNumLeafAdvanceCalls() const override;
+  CallStats GetCallStats() const override {
+    return left_->GetCallStats() + right_->GetCallStats();
+  }
 
   std::string ToString() const override;
 
+  void MapChildren(const ChildrenMapper &mapper) override {
+    left_ = mapper(std::move(left_));
+    right_ = mapper(std::move(right_));
+  }
+
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo> *matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    current_->PopulateMatchedTermsStats(matched_terms_stats,
+                                        filtering_section_mask);
+    // If equal, then current_ == left_. Combine with results from right_.
+    if (left_document_id_ == right_document_id_) {
+      right_->PopulateMatchedTermsStats(matched_terms_stats,
+                                        filtering_section_mask);
+    }
+  }
+
  private:
   std::unique_ptr<DocHitInfoIterator> left_;
   std::unique_ptr<DocHitInfoIterator> right_;
+  // Pointer to the chosen iterator that points to the current doc_hit_info_. If
+  // both left_ and right_ point to the same docid, then chosen_ == left.
+  // chosen_ does not own the iterator it points to.
+  DocHitInfoIterator *current_;
   DocumentId left_document_id_ = kMaxDocumentId;
   DocumentId right_document_id_ = kMaxDocumentId;
 };
@@ -57,16 +86,38 @@ class DocHitInfoIteratorOrNary : public DocHitInfoIterator {
   explicit DocHitInfoIteratorOrNary(
       std::vector<std::unique_ptr<DocHitInfoIterator>> iterators);
 
-  libtextclassifier3::Status Advance() override;
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
 
-  int32_t GetNumBlocksInspected() const override;
+  libtextclassifier3::Status Advance() override;
 
-  int32_t GetNumLeafAdvanceCalls() const override;
+  CallStats GetCallStats() const override;
 
   std::string ToString() const override;
 
+  void MapChildren(const ChildrenMapper &mapper) override {
+    for (int i = 0; i < iterators_.size(); ++i) {
+      iterators_[i] = mapper(std::move(iterators_[i]));
+    }
+  }
+
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo> *matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    for (size_t i = 0; i < current_iterators_.size(); i++) {
+      current_iterators_.at(i)->PopulateMatchedTermsStats(
+          matched_terms_stats, filtering_section_mask);
+    }
+  }
+
  private:
   std::vector<std::unique_ptr<DocHitInfoIterator>> iterators_;
+  // Pointers to the iterators that point to the current doc_hit_info_.
+  // current_iterators_ does not own the iterators it points to.
+  std::vector<DocHitInfoIterator *> current_iterators_;
 };
 
 }  // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-or_test.cc b/icing/index/iterator/doc-hit-info-iterator-or_test.cc
index 3faa5ab..d198b53 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-or_test.cc
@@ -19,7 +19,6 @@
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include "icing/index/iterator/doc-hit-info-iterator-and.h"
 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/schema/section.h"
@@ -33,6 +32,7 @@ namespace {
 
 using ::testing::ElementsAre;
 using ::testing::Eq;
+using ::testing::IsEmpty;
 
 TEST(CreateAndIteratorTest, Or) {
   // Basic test that we can create a working Or iterator. Further testing of
@@ -73,38 +73,33 @@ TEST(DocHitInfoIteratorOrTest, Initialize) {
                                std::make_unique<DocHitInfoIteratorDummy>());
 
   // We start out with invalid values
-  EXPECT_THAT(or_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId)));
-  EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(kSectionIdMaskNone));
+  EXPECT_THAT(or_iter.doc_hit_info(),
+              EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>{}));
 }
 
-TEST(DocHitInfoIteratorOrTest, GetNumBlocksInspected) {
-  int first_iter_blocks = 4;  // arbitrary value
+TEST(DocHitInfoIteratorOrTest, GetCallStats) {
+  DocHitInfoIterator::CallStats first_iter_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/2,
+      /*num_leaf_advance_calls_main_index_in=*/5,
+      /*num_leaf_advance_calls_integer_index_in=*/3,
+      /*num_leaf_advance_calls_no_index_in=*/1,
+      /*num_blocks_inspected_in=*/4);  // arbitrary value
   auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  first_iter->SetNumBlocksInspected(first_iter_blocks);
-
-  int second_iter_blocks = 7;  // arbitrary value
-  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  second_iter->SetNumBlocksInspected(second_iter_blocks);
-
-  DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
-
-  EXPECT_THAT(or_iter.GetNumBlocksInspected(),
-              Eq(first_iter_blocks + second_iter_blocks));
-}
-
-TEST(DocHitInfoIteratorOrTest, GetNumLeafAdvanceCalls) {
-  int first_iter_leaves = 4;  // arbitrary value
-  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  first_iter->SetNumLeafAdvanceCalls(first_iter_leaves);
-
-  int second_iter_leaves = 7;  // arbitrary value
+  first_iter->SetCallStats(first_iter_call_stats);
+
+  DocHitInfoIterator::CallStats second_iter_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/6,
+      /*num_leaf_advance_calls_main_index_in=*/2,
+      /*num_leaf_advance_calls_integer_index_in=*/10,
+      /*num_leaf_advance_calls_no_index_in=*/3,
+      /*num_blocks_inspected_in=*/7);  // arbitrary value
   auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  second_iter->SetNumLeafAdvanceCalls(second_iter_leaves);
+  second_iter->SetCallStats(second_iter_call_stats);
 
   DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
 
-  EXPECT_THAT(or_iter.GetNumLeafAdvanceCalls(),
-              Eq(first_iter_leaves + second_iter_leaves));
+  EXPECT_THAT(or_iter.GetCallStats(),
+              Eq(first_iter_call_stats + second_iter_call_stats));
 }
 
 TEST(DocHitInfoIteratorOrTest, Advance) {
@@ -155,24 +150,200 @@ TEST(DocHitInfoIteratorOrTest, SectionIdMask) {
   // Created to test correct section_id_mask behavior.
   SectionIdMask section_id_mask1 = 0b01010101;  // hits in sections 0, 2, 4, 6
   SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
-  SectionIdMask mask_anded_result = 0b00000100;
   SectionIdMask mask_ored_result = 0b01010111;
 
   std::vector<DocHitInfo> first_vector = {DocHitInfo(4, section_id_mask1)};
   std::vector<DocHitInfo> second_vector = {DocHitInfo(4, section_id_mask2)};
 
   auto first_iter = std::make_unique<DocHitInfoIteratorDummy>(first_vector);
-  first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+  first_iter->set_hit_section_ids_mask(section_id_mask1);
 
   auto second_iter = std::make_unique<DocHitInfoIteratorDummy>(second_vector);
-  second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+  second_iter->set_hit_section_ids_mask(section_id_mask2);
 
   DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
 
   ICING_EXPECT_OK(or_iter.Advance());
   EXPECT_THAT(or_iter.doc_hit_info().hit_section_ids_mask(),
               Eq(mask_ored_result));
-  EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
+}
+
+TEST(DocHitInfoIteratorOrTest, PopulateMatchedTermsStats) {
+  {
+    // Arbitrary section ids for the documents in the DocHitInfoIterators.
+    // Created to test correct section_id_mask behavior.
+    DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(4);
+    doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+    doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+    doc_hit_info1.UpdateSection(/*section_id=*/4, /*hit_term_frequency=*/3);
+    doc_hit_info1.UpdateSection(/*section_id=*/6, /*hit_term_frequency=*/4);
+    SectionIdMask section_id_mask1 = 0b01010101;  // hits in sections 0, 2, 4, 6
+    std::unordered_map<SectionId, Hit::TermFrequency>
+        expected_section_ids_tf_map1 = {{0, 1}, {2, 2}, {4, 3}, {6, 4}};
+
+    DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(4);
+    doc_hit_info2.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/2);
+    doc_hit_info2.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/6);
+    SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
+    std::unordered_map<SectionId, Hit::TermFrequency>
+        expected_section_ids_tf_map2 = {{1, 2}, {2, 6}};
+
+    std::vector<DocHitInfoTermFrequencyPair> first_vector = {doc_hit_info1};
+    std::vector<DocHitInfoTermFrequencyPair> second_vector = {doc_hit_info2};
+
+    auto first_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+    first_iter->set_hit_section_ids_mask(section_id_mask1);
+
+    auto second_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
+    second_iter->set_hit_section_ids_mask(section_id_mask2);
+
+    DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
+    std::vector<TermMatchInfo> matched_terms_stats;
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+    ICING_EXPECT_OK(or_iter.Advance());
+    EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(4));
+
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(
+        matched_terms_stats,
+        ElementsAre(
+            EqualsTermMatchInfo("hi", expected_section_ids_tf_map1),
+            EqualsTermMatchInfo("hello", expected_section_ids_tf_map2)));
+
+    EXPECT_FALSE(or_iter.Advance().ok());
+  }
+  {
+    // Arbitrary section ids for the documents in the DocHitInfoIterators.
+    // Created to test correct section_id_mask behavior.
+    DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(4);
+    doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+    doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+    SectionIdMask section_id_mask1 = 0b00000101;  // hits in sections 0, 2
+    std::unordered_map<SectionId, Hit::TermFrequency>
+        expected_section_ids_tf_map1 = {{0, 1}, {2, 2}};
+
+    std::vector<DocHitInfoTermFrequencyPair> first_vector = {doc_hit_info1};
+    std::vector<DocHitInfoTermFrequencyPair> second_vector = {doc_hit_info1};
+
+    auto first_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+    first_iter->set_hit_section_ids_mask(section_id_mask1);
+
+    auto second_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hi");
+    second_iter->set_hit_section_ids_mask(section_id_mask1);
+
+    DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
+    std::vector<TermMatchInfo> matched_terms_stats;
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+    ICING_EXPECT_OK(or_iter.Advance());
+    EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(4));
+
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                         "hi", expected_section_ids_tf_map1)));
+    EXPECT_FALSE(or_iter.Advance().ok());
+  }
+  {
+    // Arbitrary section ids for the documents in the DocHitInfoIterators.
+    // Created to test correct section_id_mask behavior.
+    DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(4);
+    doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+    doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+    doc_hit_info1.UpdateSection(/*section_id=*/4, /*hit_term_frequency=*/3);
+    doc_hit_info1.UpdateSection(/*section_id=*/6, /*hit_term_frequency=*/4);
+    SectionIdMask section_id_mask1 = 0b01010101;  // hits in sections 0, 2, 4, 6
+    std::unordered_map<SectionId, Hit::TermFrequency>
+        expected_section_ids_tf_map1 = {{0, 1}, {2, 2}, {4, 3}, {6, 4}};
+
+    DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(5);
+    doc_hit_info2.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/2);
+    doc_hit_info2.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/6);
+    SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
+    std::unordered_map<SectionId, Hit::TermFrequency>
+        expected_section_ids_tf_map2 = {{1, 2}, {2, 6}};
+
+    std::vector<DocHitInfoTermFrequencyPair> first_vector = {doc_hit_info1};
+    std::vector<DocHitInfoTermFrequencyPair> second_vector = {doc_hit_info2};
+
+    auto first_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+    first_iter->set_hit_section_ids_mask(section_id_mask1);
+
+    auto second_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
+    second_iter->set_hit_section_ids_mask(section_id_mask2);
+
+    DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
+    std::vector<TermMatchInfo> matched_terms_stats;
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+    ICING_EXPECT_OK(or_iter.Advance());
+    EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(5));
+
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats,
+                ElementsAre(EqualsTermMatchInfo("hello",
+                                                expected_section_ids_tf_map2)));
+
+    ICING_EXPECT_OK(or_iter.Advance());
+    EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(4));
+
+    matched_terms_stats.clear();
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                         "hi", expected_section_ids_tf_map1)));
+
+    EXPECT_FALSE(or_iter.Advance().ok());
+  }
+}
+
+TEST(DocHitInfoIteratorOrTest, TrimOrIterator) {
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(0)};
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(1)};
+
+  std::unique_ptr<DocHitInfoIterator> first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+  std::unique_ptr<DocHitInfoIterator> second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_vector, "term", 10);
+
+  DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+                             std::move(or_iter).TrimRightMostNode());
+  // The whole iterator is trimmed
+  ASSERT_TRUE(trimmed_node.iterator_ == nullptr);
+  ASSERT_THAT(trimmed_node.term_, Eq("term"));
+  ASSERT_THAT(trimmed_node.term_start_index_, Eq(10));
+}
+
+TEST(DocHitInfoIteratorOrNaryTest, TrimOrNaryIterator) {
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(0)};
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(1)};
+  std::vector<DocHitInfo> third_vector = {DocHitInfo(2)};
+  std::vector<DocHitInfo> forth_vector = {DocHitInfo(3)};
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(first_vector));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(second_vector));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(third_vector));
+  iterators.push_back(
+      std::make_unique<DocHitInfoIteratorDummy>(forth_vector, "term", 10));
+  DocHitInfoIteratorOrNary or_iter(std::move(iterators));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocHitInfoIterator::TrimmedNode trimmed_node,
+                             std::move(or_iter).TrimRightMostNode());
+  // The whole iterator is trimmed
+  ASSERT_TRUE(trimmed_node.iterator_ == nullptr);
+  ASSERT_THAT(trimmed_node.term_, Eq("term"));
+  ASSERT_THAT(trimmed_node.term_start_index_, Eq(10));
 }
 
 TEST(DocHitInfoIteratorOrNaryTest, Initialize) {
@@ -184,8 +355,8 @@ TEST(DocHitInfoIteratorOrNaryTest, Initialize) {
   DocHitInfoIteratorOrNary or_iter(std::move(iterators));
 
   // We start out with invalid values
-  EXPECT_THAT(or_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId)));
-  EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(kSectionIdMaskNone));
+  EXPECT_THAT(or_iter.doc_hit_info(),
+              EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>{}));
 }
 
 TEST(DocHitInfoIteratorOrNaryTest, InitializeEmpty) {
@@ -198,51 +369,42 @@ TEST(DocHitInfoIteratorOrNaryTest, InitializeEmpty) {
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST(DocHitInfoIteratorOrNaryTest, GetNumBlocksInspected) {
-  int first_iter_blocks = 4;  // arbitrary value
-  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  first_iter->SetNumBlocksInspected(first_iter_blocks);
-
-  int second_iter_blocks = 7;  // arbitrary value
-  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  second_iter->SetNumBlocksInspected(second_iter_blocks);
-
-  int third_iter_blocks = 13;  // arbitrary value
-  auto third_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  third_iter->SetNumBlocksInspected(third_iter_blocks);
-
-  int fourth_iter_blocks = 1;  // arbitrary value
-  auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  fourth_iter->SetNumBlocksInspected(fourth_iter_blocks);
-
-  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
-  iterators.push_back(std::move(first_iter));
-  iterators.push_back(std::move(second_iter));
-  iterators.push_back(std::move(third_iter));
-  iterators.push_back(std::move(fourth_iter));
-  DocHitInfoIteratorOrNary or_iter(std::move(iterators));
-
-  EXPECT_THAT(or_iter.GetNumBlocksInspected(),
-              Eq(first_iter_blocks + second_iter_blocks + third_iter_blocks +
-                 fourth_iter_blocks));
-}
-
-TEST(DocHitInfoIteratorOrNaryTest, GetNumLeafAdvanceCalls) {
-  int first_iter_leaves = 4;  // arbitrary value
+TEST(DocHitInfoIteratorOrNaryTest, GetCallStats) {
+  DocHitInfoIterator::CallStats first_iter_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/2,
+      /*num_leaf_advance_calls_main_index_in=*/5,
+      /*num_leaf_advance_calls_integer_index_in=*/3,
+      /*num_leaf_advance_calls_no_index_in=*/1,
+      /*num_blocks_inspected_in=*/4);  // arbitrary value
   auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  first_iter->SetNumLeafAdvanceCalls(first_iter_leaves);
-
-  int second_iter_leaves = 7;  // arbitrary value
+  first_iter->SetCallStats(first_iter_call_stats);
+
+  DocHitInfoIterator::CallStats second_iter_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/6,
+      /*num_leaf_advance_calls_main_index_in=*/2,
+      /*num_leaf_advance_calls_integer_index_in=*/10,
+      /*num_leaf_advance_calls_no_index_in=*/3,
+      /*num_blocks_inspected_in=*/7);  // arbitrary value
   auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  second_iter->SetNumLeafAdvanceCalls(second_iter_leaves);
-
-  int third_iter_leaves = 13;  // arbitrary value
+  second_iter->SetCallStats(second_iter_call_stats);
+
+  DocHitInfoIterator::CallStats third_iter_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/1000,
+      /*num_leaf_advance_calls_main_index_in=*/2000,
+      /*num_leaf_advance_calls_integer_index_in=*/3000,
+      /*num_leaf_advance_calls_no_index_in=*/0,
+      /*num_blocks_inspected_in=*/200);  // arbitrary value
   auto third_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  third_iter->SetNumLeafAdvanceCalls(third_iter_leaves);
-
-  int fourth_iter_leaves = 13;  // arbitrary value
+  third_iter->SetCallStats(third_iter_call_stats);
+
+  DocHitInfoIterator::CallStats fourth_iter_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/200,
+      /*num_leaf_advance_calls_main_index_in=*/400,
+      /*num_leaf_advance_calls_integer_index_in=*/100,
+      /*num_leaf_advance_calls_no_index_in=*/20,
+      /*num_blocks_inspected_in=*/50);  // arbitrary value
   auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>();
-  fourth_iter->SetNumLeafAdvanceCalls(fourth_iter_leaves);
+  fourth_iter->SetCallStats(fourth_iter_call_stats);
 
   std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
   iterators.push_back(std::move(first_iter));
@@ -251,9 +413,9 @@ TEST(DocHitInfoIteratorOrNaryTest, GetNumLeafAdvanceCalls) {
   iterators.push_back(std::move(fourth_iter));
   DocHitInfoIteratorOrNary or_iter(std::move(iterators));
 
-  EXPECT_THAT(or_iter.GetNumLeafAdvanceCalls(),
-              Eq(first_iter_leaves + second_iter_leaves + third_iter_leaves +
-                 fourth_iter_leaves));
+  EXPECT_THAT(or_iter.GetCallStats(),
+              Eq(first_iter_call_stats + second_iter_call_stats +
+                 third_iter_call_stats + fourth_iter_call_stats));
 }
 
 TEST(DocHitInfoIteratorOrNaryTest, Advance) {
@@ -282,7 +444,6 @@ TEST(DocHitInfoIteratorOrNaryTest, SectionIdMask) {
   SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
   SectionIdMask section_id_mask3 = 0b00001100;  // hits in sections 2, 3
   SectionIdMask section_id_mask4 = 0b00100100;  // hits in sections 2, 5
-  SectionIdMask mask_anded_result = 0b00000100;
   SectionIdMask mask_ored_result = 0b01101111;
 
   std::vector<DocHitInfo> first_vector = {DocHitInfo(4, section_id_mask1)};
@@ -291,16 +452,16 @@ TEST(DocHitInfoIteratorOrNaryTest, SectionIdMask) {
   std::vector<DocHitInfo> fourth_vector = {DocHitInfo(4, section_id_mask4)};
 
   auto first_iter = std::make_unique<DocHitInfoIteratorDummy>(first_vector);
-  first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+  first_iter->set_hit_section_ids_mask(section_id_mask1);
 
   auto second_iter = std::make_unique<DocHitInfoIteratorDummy>(second_vector);
-  second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+  second_iter->set_hit_section_ids_mask(section_id_mask2);
 
   auto third_iter = std::make_unique<DocHitInfoIteratorDummy>(third_vector);
-  third_iter->set_hit_intersect_section_ids_mask(section_id_mask3);
+  third_iter->set_hit_section_ids_mask(section_id_mask3);
 
   auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>(fourth_vector);
-  fourth_iter->set_hit_intersect_section_ids_mask(section_id_mask4);
+  fourth_iter->set_hit_section_ids_mask(section_id_mask4);
 
   std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
   iterators.push_back(std::move(first_iter));
@@ -313,7 +474,108 @@ TEST(DocHitInfoIteratorOrNaryTest, SectionIdMask) {
   ICING_EXPECT_OK(or_iter.Advance());
   EXPECT_THAT(or_iter.doc_hit_info().hit_section_ids_mask(),
               Eq(mask_ored_result));
-  EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
+}
+
+TEST(DocHitInfoIteratorOrNaryTest, PopulateMatchedTermsStats) {
+  // Arbitrary section ids/term frequencies for the documents in the
+  // DocHitInfoIterators.
+  // For term "hi", document 10 and 8
+  DocHitInfoTermFrequencyPair doc_hit_info1_hi = DocHitInfo(10);
+  doc_hit_info1_hi.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  doc_hit_info1_hi.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+  doc_hit_info1_hi.UpdateSection(/*section_id=*/6, /*hit_term_frequency=*/4);
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map1_hi = {{0, 1}, {2, 2}, {6, 4}};
+
+  DocHitInfoTermFrequencyPair doc_hit_info2_hi = DocHitInfo(8);
+  doc_hit_info2_hi.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/2);
+  doc_hit_info2_hi.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/6);
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map2_hi = {{1, 2}, {2, 6}};
+
+  // For term "hello", document 10 and 9
+  DocHitInfoTermFrequencyPair doc_hit_info1_hello = DocHitInfo(10);
+  doc_hit_info1_hello.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/2);
+  doc_hit_info1_hello.UpdateSection(/*section_id=*/3, /*hit_term_frequency=*/3);
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map1_hello = {{0, 2}, {3, 3}};
+
+  DocHitInfoTermFrequencyPair doc_hit_info2_hello = DocHitInfo(9);
+  doc_hit_info2_hello.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/3);
+  doc_hit_info2_hello.UpdateSection(/*section_id=*/3, /*hit_term_frequency=*/2);
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map2_hello = {{2, 3}, {3, 2}};
+
+  // For term "ciao", document 9 and 8
+  DocHitInfoTermFrequencyPair doc_hit_info1_ciao = DocHitInfo(9);
+  doc_hit_info1_ciao.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/2);
+  doc_hit_info1_ciao.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/3);
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map1_ciao = {{0, 2}, {1, 3}};
+
+  DocHitInfoTermFrequencyPair doc_hit_info2_ciao = DocHitInfo(8);
+  doc_hit_info2_ciao.UpdateSection(/*section_id=*/3, /*hit_term_frequency=*/3);
+  doc_hit_info2_ciao.UpdateSection(/*section_id=*/4, /*hit_term_frequency=*/2);
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map2_ciao = {{3, 3}, {4, 2}};
+
+  std::vector<DocHitInfoTermFrequencyPair> first_vector = {doc_hit_info1_hi,
+                                                           doc_hit_info2_hi};
+  std::vector<DocHitInfoTermFrequencyPair> second_vector = {
+      doc_hit_info1_hello, doc_hit_info2_hello};
+  std::vector<DocHitInfoTermFrequencyPair> third_vector = {doc_hit_info1_ciao,
+                                                           doc_hit_info2_ciao};
+
+  auto first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+  auto second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
+  auto third_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(third_vector, "ciao");
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::move(first_iter));
+  iterators.push_back(std::move(second_iter));
+  iterators.push_back(std::move(third_iter));
+
+  DocHitInfoIteratorOrNary or_iter(std::move(iterators));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+  ICING_EXPECT_OK(or_iter.Advance());
+  EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(10));
+
+  or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(
+      matched_terms_stats,
+      ElementsAre(
+          EqualsTermMatchInfo("hi", expected_section_ids_tf_map1_hi),
+          EqualsTermMatchInfo("hello", expected_section_ids_tf_map1_hello)));
+
+  ICING_EXPECT_OK(or_iter.Advance());
+  EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(9));
+
+  matched_terms_stats.clear();
+  or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(
+      matched_terms_stats,
+      ElementsAre(
+          EqualsTermMatchInfo("hello", expected_section_ids_tf_map2_hello),
+          EqualsTermMatchInfo("ciao", expected_section_ids_tf_map1_ciao)));
+
+  ICING_EXPECT_OK(or_iter.Advance());
+  EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(8));
+
+  matched_terms_stats.clear();
+  or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(
+      matched_terms_stats,
+      ElementsAre(
+          EqualsTermMatchInfo("hi", expected_section_ids_tf_map2_hi),
+          EqualsTermMatchInfo("ciao", expected_section_ids_tf_map2_ciao)));
+
+  EXPECT_FALSE(or_iter.Advance().ok());
 }
 
 }  // namespace
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-document.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-document.cc
new file mode 100644
index 0000000..e6a1c67
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-document.cc
@@ -0,0 +1,65 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-document.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+DocHitInfoIteratorPropertyInDocument::DocHitInfoIteratorPropertyInDocument(
+    std::unique_ptr<DocHitInfoIterator> meta_hit_iterator)
+    : meta_hit_iterator_(std::move(meta_hit_iterator)) {}
+
+libtextclassifier3::Status DocHitInfoIteratorPropertyInDocument::Advance() {
+  while (meta_hit_iterator_->Advance().ok()) {
+    // Currently, the metadata hits added by PropertyExistenceIndexingHandler
+    // can only have a section id of 0, so the section mask has to be 1 << 0.
+    if (meta_hit_iterator_->doc_hit_info().hit_section_ids_mask() == (1 << 0)) {
+      doc_hit_info_ = meta_hit_iterator_->doc_hit_info();
+      // Hits returned by "hasProperty" should not be associated with any
+      // section.
+      doc_hit_info_.set_hit_section_ids_mask(/*section_id_mask=*/0);
+      return libtextclassifier3::Status::OK;
+    }
+  }
+
+  doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+  return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
+}
+
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorPropertyInDocument::TrimRightMostNode() && {
+  // Don't generate suggestion if the last operator is this custom function.
+  return absl_ports::InvalidArgumentError(
+      "Cannot generate suggestion if the last term is hasProperty().");
+}
+
+std::string DocHitInfoIteratorPropertyInDocument::ToString() const {
+  return meta_hit_iterator_->ToString();
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-document.h b/icing/index/iterator/doc-hit-info-iterator-property-in-document.h
new file mode 100644
index 0000000..bb2c97a
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-document.h
@@ -0,0 +1,73 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_DOCUMENT_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_DOCUMENT_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// The iterator returned by the "hasProperty" function in advanced query that
+// post-processes metadata hits added by PropertyExistenceIndexingHandler.
+// Specifically, it filters out hits that are not recognized as metadata, and
+// always set hit_section_ids_mask to 0.
+//
+// It is marked as a subclass of DocHitInfoLeafIterator because section
+// restriction should not be passed down to meta_hit_iterator.
+class DocHitInfoIteratorPropertyInDocument : public DocHitInfoLeafIterator {
+ public:
+  explicit DocHitInfoIteratorPropertyInDocument(
+      std::unique_ptr<DocHitInfoIterator> meta_hit_iterator);
+
+  libtextclassifier3::Status Advance() override;
+
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
+  CallStats GetCallStats() const override {
+    return meta_hit_iterator_->GetCallStats();
+  }
+
+  std::string ToString() const override;
+
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    meta_hit_iterator_->PopulateMatchedTermsStats(matched_terms_stats,
+                                                  filtering_section_mask);
+  }
+
+ private:
+  std::unique_ptr<DocHitInfoIterator> meta_hit_iterator_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_DOCUMENT_H_
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc
new file mode 100644
index 0000000..8b98302
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc
@@ -0,0 +1,103 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+DocHitInfoIteratorPropertyInSchema::DocHitInfoIteratorPropertyInSchema(
+    std::unique_ptr<DocHitInfoIterator> delegate,
+    const DocumentStore* document_store, const SchemaStore* schema_store,
+    std::set<std::string> target_sections, int64_t current_time_ms)
+    : delegate_(std::move(delegate)),
+      document_store_(*document_store),
+      schema_store_(*schema_store),
+      target_properties_(std::move(target_sections)),
+      current_time_ms_(current_time_ms) {}
+
+libtextclassifier3::Status DocHitInfoIteratorPropertyInSchema::Advance() {
+  doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+
+  // Maps from SchemaTypeId to a bool indicating whether or not the type has
+  // the requested property.
+  std::unordered_map<SchemaTypeId, bool> property_defined_types;
+  while (delegate_->Advance().ok()) {
+    DocumentId document_id = delegate_->doc_hit_info().document_id();
+    auto data_optional = document_store_.GetAliveDocumentFilterData(
+        document_id, current_time_ms_);
+    if (!data_optional) {
+      // Ran into some error retrieving information on this hit, skip
+      continue;
+    }
+
+    // Guaranteed that the DocumentFilterData exists at this point
+    SchemaTypeId schema_type_id = data_optional.value().schema_type_id();
+    bool valid_match = false;
+    auto itr = property_defined_types.find(schema_type_id);
+    if (itr != property_defined_types.end()) {
+      valid_match = itr->second;
+    } else {
+      for (const auto& property : target_properties_) {
+        if (schema_store_.IsPropertyDefinedInSchema(schema_type_id, property)) {
+          valid_match = true;
+          break;
+        }
+      }
+      property_defined_types[schema_type_id] = valid_match;
+    }
+
+    if (valid_match) {
+      doc_hit_info_ = delegate_->doc_hit_info();
+      return libtextclassifier3::Status::OK;
+    }
+
+    // The document's schema does not define any properties listed in
+    // target_properties_. Continue.
+  }
+
+  // Didn't find anything on the delegate iterator.
+  return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
+}
+
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorPropertyInSchema::TrimRightMostNode() && {
+  // Don't generate suggestion if the last operator is this custom function.
+  return absl_ports::InvalidArgumentError(
+      "Cannot generate suggestion if the last term is hasPropertyDefined().");
+}
+
+std::string DocHitInfoIteratorPropertyInSchema::ToString() const {
+  return absl_ports::StrCat("(", absl_ports::StrJoin(target_properties_, ","),
+                            "): ", delegate_->ToString());
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h
new file mode 100644
index 0000000..c16a1c4
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h
@@ -0,0 +1,80 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+// An iterator that helps filter for DocHitInfos whose schemas define the
+// properties named in target_properties_.
+class DocHitInfoIteratorPropertyInSchema : public DocHitInfoIterator {
+ public:
+  // Does not take any ownership, and all pointers must refer to valid objects
+  // that outlive the one constructed. The delegate should be at minimum be
+  // a DocHitInfoIteratorAllDocumentId, but other optimizations are possible,
+  // cf. go/icing-property-in-schema-existence.
+  explicit DocHitInfoIteratorPropertyInSchema(
+      std::unique_ptr<DocHitInfoIterator> delegate,
+      const DocumentStore* document_store, const SchemaStore* schema_store,
+      std::set<std::string> target_sections, int64_t current_time_ms);
+
+  libtextclassifier3::Status Advance() override;
+
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
+  void MapChildren(const ChildrenMapper& mapper) override {
+    delegate_ = mapper(std::move(delegate_));
+  }
+
+  CallStats GetCallStats() const override { return delegate_->GetCallStats(); }
+
+  std::string ToString() const override;
+
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    delegate_->PopulateMatchedTermsStats(matched_terms_stats,
+                                         filtering_section_mask);
+  }
+
+ private:
+  std::unique_ptr<DocHitInfoIterator> delegate_;
+  const DocumentStore& document_store_;
+  const SchemaStore& schema_store_;
+
+  std::set<std::string> target_properties_;
+  int64_t current_time_ms_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
new file mode 100644
index 0000000..3f5a0a7
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
@@ -0,0 +1,269 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+class DocHitInfoIteratorPropertyInSchemaTest : public ::testing::Test {
+ protected:
+  DocHitInfoIteratorPropertyInSchemaTest()
+      : test_dir_(GetTestTempDir() + "/icing") {}
+
+  void SetUp() override {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+    document1_ = DocumentBuilder()
+                     .SetKey("namespace", "uri1")
+                     .SetSchema("email")
+                     .Build();
+    document2_ =
+        DocumentBuilder().SetKey("namespace", "uri2").SetSchema("note").Build();
+
+    indexed_section_0 = "indexedSection0";
+    unindexed_section_1 = "unindexedSection1";
+    not_defined_section_2 = "notDefinedSection2";
+
+    schema_ =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("email")
+                    // Add an indexed property so we generate section
+                    // metadata on it
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(indexed_section_0)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(unindexed_section_1)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(SchemaTypeConfigBuilder().SetType("note").AddProperty(
+                PropertyConfigBuilder()
+                    .SetName(unindexed_section_1)
+                    .SetDataType(TYPE_STRING)
+                    .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema_, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    document_store_ = std::move(create_result.document_store);
+  }
+
+  void TearDown() override {
+    document_store_.reset();
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  std::string indexed_section_0;
+  std::string unindexed_section_1;
+  std::string not_defined_section_2;
+  SchemaProto schema_;
+  DocumentProto document1_;
+  DocumentProto document2_;
+  FakeClock fake_clock_;
+};
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+       AdvanceToDocumentWithIndexedProperty) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document1_));
+
+  auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+      document_store_->num_documents());
+
+  DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      /*target_target_sections=*/{indexed_section_0},
+      fake_clock_.GetSystemTimeMilliseconds());
+
+  EXPECT_THAT(GetDocumentIds(&property_defined_iterator),
+              ElementsAre(document_id));
+
+  EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+       AdvanceToDocumentWithUnindexedProperty) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document1_));
+
+  auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+      document_store_->num_documents());
+
+  DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      /*target_target_sections=*/{unindexed_section_1},
+      fake_clock_.GetSystemTimeMilliseconds());
+
+  EXPECT_THAT(GetDocumentIds(&property_defined_iterator),
+              ElementsAre(document_id));
+
+  EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest, NoMatchWithUndefinedProperty) {
+  ICING_EXPECT_OK(document_store_->Put(document1_));
+
+  auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+      document_store_->num_documents());
+
+  DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      /*target_target_sections=*/{not_defined_section_2},
+      fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+       CorrectlySetsSectionIdMasksAndPopulatesTermMatchInfo) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document1_));
+
+  // Arbitrary section ids for the documents in the DocHitInfoIterators.
+  // Created to test correct section_id_mask behavior.
+  SectionIdMask original_section_id_mask = 0b00000101;  // hits in sections 0, 2
+
+  DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id);
+  doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+
+  // Create a hit that was found in the indexed section
+  std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1};
+
+  auto original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "hi");
+  original_iterator->set_hit_section_ids_mask(original_section_id_mask);
+
+  DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      /*target_target_sections=*/{indexed_section_0},
+      fake_clock_.GetSystemTimeMilliseconds());
+
+  std::vector<TermMatchInfo> matched_terms_stats;
+  property_defined_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+  ICING_EXPECT_OK(property_defined_iterator.Advance());
+  EXPECT_THAT(property_defined_iterator.doc_hit_info().document_id(),
+              Eq(document_id));
+
+  // The expected mask is the same as the original mask, since the iterator
+  // should treat it as a pass-through.
+  SectionIdMask expected_section_id_mask = original_section_id_mask;
+  EXPECT_EQ(property_defined_iterator.doc_hit_info().hit_section_ids_mask(),
+            expected_section_id_mask);
+
+  property_defined_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{0, 1}, {2, 2}};
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "hi", expected_section_ids_tf_map)));
+
+  EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+       TrimRightMostNodeResultsInError) {
+  auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+      document_store_->num_documents());
+
+  DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      /*target_target_sections=*/{indexed_section_0},
+      fake_clock_.GetSystemTimeMilliseconds());
+
+  EXPECT_THAT(std::move(property_defined_iterator).TrimRightMostNode(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+       FindPropertyDefinedByMultipleTypes) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2_));
+  auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+      document_store_->num_documents());
+
+  DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      /*target_target_sections=*/{unindexed_section_1},
+      fake_clock_.GetSystemTimeMilliseconds());
+
+  EXPECT_THAT(GetDocumentIds(&property_defined_iterator),
+              ElementsAre(document_id2, document_id1));
+
+  EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index 8acb91a..35dc0b9 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
@@ -16,98 +16,231 @@
 
 #include <cstdint>
 #include <memory>
+#include <set>
 #include <string>
 #include <string_view>
+#include <unordered_map>
 #include <utility>
+#include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
 #include "icing/index/hit/doc-hit-info.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/iterator/section-restrict-data.h"
+#include "icing/proto/search.pb.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
+#include "icing/util/status-macros.h"
 
 namespace icing {
 namespace lib {
 
-DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
-    std::unique_ptr<DocHitInfoIterator> delegate,
-    const DocumentStore* document_store, const SchemaStore* schema_store,
-    std::string_view target_section)
-    : delegate_(std::move(delegate)),
-      document_store_(*document_store),
-      schema_store_(*schema_store),
-      target_section_(target_section) {}
+// An iterator that simply takes ownership of SectionRestrictData.
+class SectionRestrictDataHolderIterator : public DocHitInfoIterator {
+ public:
+  explicit SectionRestrictDataHolderIterator(
+      std::unique_ptr<DocHitInfoIterator> delegate,
+      std::unique_ptr<SectionRestrictData> data)
+      : delegate_(std::move(delegate)), data_(std::move(data)) {}
+
+  libtextclassifier3::Status Advance() override {
+    auto result = delegate_->Advance();
+    doc_hit_info_ = delegate_->doc_hit_info();
+    return result;
+  }
 
-libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
-  if (!delegate_->Advance().ok()) {
-    // Didn't find anything on the delegate iterator.
-    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
-    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
-    return absl_ports::ResourceExhaustedError(
-        "No more DocHitInfos in iterator");
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
+    ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
+                           std::move(*delegate_).TrimRightMostNode());
+    if (trimmed_delegate.iterator_ != nullptr) {
+      trimmed_delegate.iterator_ =
+          std::make_unique<SectionRestrictDataHolderIterator>(
+              std::move(trimmed_delegate.iterator_), std::move(data_));
+    }
+    return trimmed_delegate;
+  }
+
+  void MapChildren(const ChildrenMapper& mapper) override {
+    delegate_ = mapper(std::move(delegate_));
   }
 
-  DocumentId document_id = delegate_->doc_hit_info().document_id();
+  CallStats GetCallStats() const override { return delegate_->GetCallStats(); }
 
-  SectionIdMask section_id_mask =
-      delegate_->doc_hit_info().hit_section_ids_mask();
+  std::string ToString() const override { return delegate_->ToString(); }
 
-  auto data_or = document_store_.GetDocumentFilterData(document_id);
-  if (!data_or.ok()) {
-    // Ran into some error retrieving information on this hit, skip
-    return Advance();
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask) const override {
+    return delegate_->PopulateMatchedTermsStats(matched_terms_stats,
+                                                filtering_section_mask);
   }
 
-  // Guaranteed that the DocumentFilterData exists at this point
-  DocumentFilterData data = std::move(data_or).ValueOrDie();
-  SchemaTypeId schema_type_id = data.schema_type_id();
-
-  // A hit can be in multiple sections at once, need to check that at least one
-  // of the confirmed section ids match the name of the target section
-  while (section_id_mask != 0) {
-    // There was a hit in this section id
-    SectionId section_id = __builtin_ctz(section_id_mask);
-
-    auto section_metadata_or =
-        schema_store_.GetSectionMetadata(schema_type_id, section_id);
-
-    if (section_metadata_or.ok()) {
-      const SectionMetadata* section_metadata =
-          section_metadata_or.ValueOrDie();
-
-      if (section_metadata->path == target_section_) {
-        // The hit was in the target section name, return OK/found
-        doc_hit_info_ = delegate_->doc_hit_info();
-        hit_intersect_section_ids_mask_ =
-            delegate_->hit_intersect_section_ids_mask();
-        return libtextclassifier3::Status::OK;
-      }
-    }
+ private:
+  std::unique_ptr<DocHitInfoIterator> delegate_;
+  std::unique_ptr<SectionRestrictData> data_;
+};
+
+DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
+    std::unique_ptr<DocHitInfoIterator> delegate, SectionRestrictData* data)
+    : delegate_(std::move(delegate)), data_(data) {}
 
-    // Mark this section as checked
-    section_id_mask &= ~(1U << section_id);
+std::unique_ptr<DocHitInfoIterator>
+DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+    std::unique_ptr<DocHitInfoIterator> iterator,
+    const DocumentStore* document_store, const SchemaStore* schema_store,
+    std::set<std::string> target_sections, int64_t current_time_ms) {
+  std::unordered_map<std::string, std::set<std::string>> type_property_filters;
+  type_property_filters[std::string(SchemaStore::kSchemaTypeWildcard)] =
+      std::move(target_sections);
+  auto data = std::make_unique<SectionRestrictData>(
+      document_store, schema_store, current_time_ms, type_property_filters);
+  std::unique_ptr<DocHitInfoIterator> result =
+      ApplyRestrictions(std::move(iterator), data.get());
+  return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result),
+                                                             std::move(data));
+}
+
+std::unique_ptr<DocHitInfoIterator>
+DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+    std::unique_ptr<DocHitInfoIterator> iterator,
+    const DocumentStore* document_store, const SchemaStore* schema_store,
+    const SearchSpecProto& search_spec, int64_t current_time_ms) {
+  std::unordered_map<std::string, std::set<std::string>> type_property_filters;
+  // TODO(b/294274922): Add support for polymorphism in type property filters.
+  for (const TypePropertyMask& type_property_mask :
+       search_spec.type_property_filters()) {
+    type_property_filters[type_property_mask.schema_type()] =
+        std::set<std::string>(type_property_mask.paths().begin(),
+                              type_property_mask.paths().end());
   }
+  auto data = std::make_unique<SectionRestrictData>(
+      document_store, schema_store, current_time_ms, type_property_filters);
+  std::unique_ptr<DocHitInfoIterator> result =
+      ApplyRestrictions(std::move(iterator), data.get());
+  return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result),
+                                                             std::move(data));
+}
 
-  // Didn't find a matching section name for this hit, go to the next hit
-  return Advance();
+std::unique_ptr<DocHitInfoIterator>
+DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+    std::unique_ptr<DocHitInfoIterator> iterator, SectionRestrictData* data) {
+  ChildrenMapper mapper;
+  mapper = [&data, &mapper](std::unique_ptr<DocHitInfoIterator> iterator)
+      -> std::unique_ptr<DocHitInfoIterator> {
+    if (iterator->is_leaf()) {
+      return std::make_unique<DocHitInfoIteratorSectionRestrict>(
+          std::move(iterator), data);
+    } else {
+      iterator->MapChildren(mapper);
+      return iterator;
+    }
+  };
+  return mapper(std::move(iterator));
 }
 
-int32_t DocHitInfoIteratorSectionRestrict::GetNumBlocksInspected() const {
-  return delegate_->GetNumBlocksInspected();
+libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
+  doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+  while (delegate_->Advance().ok()) {
+    DocumentId document_id = delegate_->doc_hit_info().document_id();
+
+    auto data_optional = data_->document_store().GetAliveDocumentFilterData(
+        document_id, data_->current_time_ms());
+    if (!data_optional) {
+      // Ran into some error retrieving information on this hit, skip
+      continue;
+    }
+
+    // Guaranteed that the DocumentFilterData exists at this point
+    SchemaTypeId schema_type_id = data_optional.value().schema_type_id();
+    auto schema_type_or = data_->schema_store().GetSchemaType(schema_type_id);
+    if (!schema_type_or.ok()) {
+      // Ran into error retrieving schema type, skip
+      continue;
+    }
+    const std::string* schema_type = std::move(schema_type_or).ValueOrDie();
+    SectionIdMask allowed_sections_mask =
+        data_->ComputeAllowedSectionsMask(*schema_type);
+
+    // A hit can be in multiple sections at once, need to check which of the
+    // section ids match the sections allowed by type_property_masks_. This can
+    // be done by doing a bitwise and of the section_id_mask in the doc hit and
+    // the allowed_sections_mask.
+    SectionIdMask section_id_mask =
+        delegate_->doc_hit_info().hit_section_ids_mask() &
+        allowed_sections_mask;
+
+    // Return this document if:
+    //  - the sectionIdMask is not empty after applying property filters, or
+    //  - no property filters apply for its schema type (allowed_sections_mask
+    //    == kSectionIdMaskAll). This is needed to ensure that in case of empty
+    //    query (which uses doc-hit-info-iterator-all-document-id), where
+    //    section_id_mask is kSectionIdMaskNone, doc hits with no property
+    //    restrictions don't get filtered out. Doc hits for schema types for
+    //    whom property filters are specified will still get filtered out.
+    if (allowed_sections_mask == kSectionIdMaskAll ||
+        section_id_mask != kSectionIdMaskNone) {
+      doc_hit_info_ = delegate_->doc_hit_info();
+      doc_hit_info_.set_hit_section_ids_mask(section_id_mask);
+      return libtextclassifier3::Status::OK;
+    }
+    // Didn't find a matching section name for this hit. Continue.
+  }
+
+  // Didn't find anything on the delegate iterator.
+  return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
 }
 
-int32_t DocHitInfoIteratorSectionRestrict::GetNumLeafAdvanceCalls() const {
-  return delegate_->GetNumLeafAdvanceCalls();
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && {
+  ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
+                         std::move(*delegate_).TrimRightMostNode());
+  // TrimRightMostNode is only used by suggestion processor to process query
+  // expression, so an entry for wildcard should always be present in
+  // type_property_filters_ when code flow reaches here. If the InternalError
+  // below is returned, that means TrimRightMostNode hasn't been called in the
+  // right context.
+  const auto it = data_->type_property_filters().find("*");
+  if (it == data_->type_property_filters().end()) {
+    return absl_ports::InternalError(
+        "A wildcard entry should always be present in type property filters "
+        "whenever TrimRightMostNode() is called for "
+        "DocHitInfoIteratorSectionRestrict");
+  }
+  const std::set<std::string>& target_sections = it->second;
+  if (target_sections.empty()) {
+    return absl_ports::InternalError(
+        "Target sections should not be empty whenever TrimRightMostNode() is "
+        "called for DocHitInfoIteratorSectionRestrict");
+  }
+  if (trimmed_delegate.iterator_ == nullptr) {
+    // TODO(b/228240987): Update TrimmedNode and downstream code to handle
+    // multiple section restricts.
+    trimmed_delegate.target_section_ = std::move(*target_sections.begin());
+    return trimmed_delegate;
+  }
+  trimmed_delegate.iterator_ =
+      std::unique_ptr<DocHitInfoIteratorSectionRestrict>(
+          new DocHitInfoIteratorSectionRestrict(
+              std::move(trimmed_delegate.iterator_), std::move(data_)));
+  return std::move(trimmed_delegate);
 }
 
 std::string DocHitInfoIteratorSectionRestrict::ToString() const {
-  return absl_ports::StrCat(target_section_, ": ", delegate_->ToString());
+  std::string output = "";
+  for (auto it = data_->type_property_filters().cbegin();
+       it != data_->type_property_filters().cend(); it++) {
+    std::string paths = absl_ports::StrJoin(it->second, ",");
+    output += (it->first) + ":" + (paths) + "; ";
+  }
+  std::string result = "{" + output.substr(0, output.size() - 2) + "}: ";
+  return absl_ports::StrCat(result, delegate_->ToString());
 }
 
 }  // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
index ae5a896..387ff52 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
@@ -17,12 +17,18 @@
 
 #include <cstdint>
 #include <memory>
+#include <set>
 #include <string>
-#include <string_view>
+#include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/iterator/section-restrict-data.h"
+#include "icing/proto/search.pb.h"
 #include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
 
 namespace icing {
@@ -35,30 +41,65 @@ namespace lib {
 // That class is meant to be applied to the root of a query tree and filter over
 // all results at the end. This class is more used in the limited scope of a
 // term or a small group of terms.
-class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
+class DocHitInfoIteratorSectionRestrict : public DocHitInfoLeafIterator {
  public:
   // Does not take any ownership, and all pointers must refer to valid objects
   // that outlive the one constructed.
   explicit DocHitInfoIteratorSectionRestrict(
-      std::unique_ptr<DocHitInfoIterator> delegate,
+      std::unique_ptr<DocHitInfoIterator> delegate, SectionRestrictData* data);
+
+  // Methods that apply section restrictions to all DocHitInfoLeafIterator nodes
+  // inside the provided iterator tree, and return the root of the tree
+  // afterwards. These methods do not take any ownership for the raw pointer
+  // parameters, which must refer to valid objects that outlive the iterator
+  // returned.
+  static std::unique_ptr<DocHitInfoIterator> ApplyRestrictions(
+      std::unique_ptr<DocHitInfoIterator> iterator,
+      const DocumentStore* document_store, const SchemaStore* schema_store,
+      std::set<std::string> target_sections, int64_t current_time_ms);
+  static std::unique_ptr<DocHitInfoIterator> ApplyRestrictions(
+      std::unique_ptr<DocHitInfoIterator> iterator,
       const DocumentStore* document_store, const SchemaStore* schema_store,
-      std::string_view target_section);
+      const SearchSpecProto& search_spec, int64_t current_time_ms);
+  static std::unique_ptr<DocHitInfoIterator> ApplyRestrictions(
+      std::unique_ptr<DocHitInfoIterator> iterator, SectionRestrictData* data);
 
   libtextclassifier3::Status Advance() override;
 
-  int32_t GetNumBlocksInspected() const override;
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
 
-  int32_t GetNumLeafAdvanceCalls() const override;
+  CallStats GetCallStats() const override { return delegate_->GetCallStats(); }
 
   std::string ToString() const override;
 
+  // Note that the DocHitInfoIteratorSectionRestrict can only be applied at
+  // DocHitInfoLeafIterator, which can be a term iterator or another
+  // DocHitInfoIteratorSectionRestrict.
+  //
+  // To filter the matching sections, filtering_section_mask should be set to
+  // doc_hit_info_.hit_section_ids_mask() held in the outermost
+  // DocHitInfoIteratorSectionRestrict, which is equal to the intersection of
+  // all hit_section_ids_mask in the DocHitInfoIteratorSectionRestrict chain,
+  // since for any two section restrict iterators chained together, the outer
+  // one's hit_section_ids_mask is always a subset of the inner one's
+  // hit_section_ids_mask.
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    delegate_->PopulateMatchedTermsStats(
+        matched_terms_stats,
+        /*filtering_section_mask=*/filtering_section_mask &
+            doc_hit_info_.hit_section_ids_mask());
+  }
+
  private:
   std::unique_ptr<DocHitInfoIterator> delegate_;
-  const DocumentStore& document_store_;
-  const SchemaStore& schema_store_;
-
-  // Ensure that this does not outlive the underlying string value.
-  std::string_view target_section_;
+  // Does not own.
+  SectionRestrictData* data_;
 };
 
 }  // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index df79c6d..ee65fe1 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -15,6 +15,7 @@
 #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
 
 #include <memory>
+#include <set>
 #include <string>
 #include <utility>
 #include <vector>
@@ -24,11 +25,13 @@
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
 #include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-and.h"
 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
@@ -46,6 +49,9 @@ using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::IsEmpty;
 
+constexpr SectionId kIndexedSectionId0 = 0;
+constexpr SectionId kIndexedSectionId1 = 1;
+
 class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
  protected:
   DocHitInfoIteratorSectionRestrictTest()
@@ -53,33 +59,58 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
 
   void SetUp() override {
     filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
-    document_ =
-        DocumentBuilder().SetKey("namespace", "uri").SetSchema("email").Build();
-
-    auto type_config = schema_.add_types();
-    type_config->set_schema_type("email");
-
-    // Add an indexed property so we generate section metadata on it
-    auto property = type_config->add_properties();
-    property->set_property_name(indexed_property_);
-    property->set_data_type(PropertyConfigProto::DataType::STRING);
-    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    property->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    property->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    document1_ = DocumentBuilder()
+                     .SetKey("namespace", "uri1")
+                     .SetSchema("email")
+                     .Build();
+    document2_ = DocumentBuilder()
+                     .SetKey("namespace", "uri2")
+                     .SetSchema("email")
+                     .Build();
+    document3_ = DocumentBuilder()
+                     .SetKey("namespace", "uri3")
+                     .SetSchema("email")
+                     .Build();
+
+    indexed_section_0 = "indexedSection0";
+    indexed_section_1 = "indexedSection1";
+    schema_ =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("email")
+                    // Add an indexed property so we generate section
+                    // metadata on it
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(indexed_section_0)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(indexed_section_1)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
 
-    // First and only indexed property, so it gets the first id of 0
-    indexed_section_id_ = 0;
-
-    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                               SchemaStore::Create(&filesystem_, test_dir_));
-    ICING_ASSERT_OK(schema_store_->SetSchema(schema_));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema_, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
 
     ICING_ASSERT_OK_AND_ASSIGN(
-        document_store_,
-        DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                              schema_store_.get()));
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    document_store_ = std::move(create_result.document_store);
   }
 
   void TearDown() override {
@@ -92,30 +123,87 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
   std::unique_ptr<DocumentStore> document_store_;
   const Filesystem filesystem_;
   const std::string test_dir_;
+  std::string indexed_section_0;
+  std::string indexed_section_1;
   SchemaProto schema_;
-  DocumentProto document_;
-  const std::string indexed_property_ = "subject";
-  int indexed_section_id_;
+  DocumentProto document1_;
+  DocumentProto document2_;
+  DocumentProto document3_;
   FakeClock fake_clock_;
 };
 
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+       PopulateMatchedTermsStats_IncludesHitWithMatchingSection) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document1_));
+
+  // Arbitrary section ids for the documents in the DocHitInfoIterators.
+  // Created to test correct section_id_mask behavior.
+  SectionIdMask original_section_id_mask = 0b00000101;  // hits in sections 0, 2
+
+  DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id);
+  doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+
+  // Create a hit that was found in the indexed section
+  std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1};
+
+  auto original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "hi");
+  original_iterator->set_hit_section_ids_mask(original_section_id_mask);
+
+  // Filtering for the indexed section name (which has a section id of 0) should
+  // get a result.
+  std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+      DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(original_iterator), document_store_.get(),
+          schema_store_.get(),
+          /*target_sections=*/{indexed_section_0},
+          fake_clock_.GetSystemTimeMilliseconds());
+
+  std::vector<TermMatchInfo> matched_terms_stats;
+  section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+  ICING_EXPECT_OK(section_restrict_iterator->Advance());
+  EXPECT_THAT(section_restrict_iterator->doc_hit_info().document_id(),
+              Eq(document_id));
+  SectionIdMask expected_section_id_mask = 0b00000001;  // hits in sections 0
+  EXPECT_EQ(section_restrict_iterator->doc_hit_info().hit_section_ids_mask(),
+            expected_section_id_mask);
+
+  section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{0, 1}};
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "hi", expected_section_ids_tf_map)));
+
+  EXPECT_FALSE(section_restrict_iterator->Advance().ok());
+}
+
 TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
   std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
       std::make_unique<DocHitInfoIteratorDummy>();
 
-  DocHitInfoIteratorSectionRestrict filtered_iterator(
-      std::move(original_iterator_empty), document_store_.get(),
-      schema_store_.get(), /*target_section=*/"");
+  std::unique_ptr<DocHitInfoIterator> filtered_iterator =
+      DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(original_iterator_empty), document_store_.get(),
+          schema_store_.get(), /*target_sections=*/std::set<std::string>(),
+          fake_clock_.GetSystemTimeMilliseconds());
 
-  EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+  EXPECT_THAT(GetDocumentIds(filtered_iterator.get()), IsEmpty());
+  std::vector<TermMatchInfo> matched_terms_stats;
+  filtered_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
 }
 
 TEST_F(DocHitInfoIteratorSectionRestrictTest, IncludesHitWithMatchingSection) {
   // Populate the DocumentStore's FilterCache with this document's data
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
-                             document_store_->Put(document_));
+                             document_store_->Put(document1_));
 
-  SectionIdMask section_id_mask = 1U << indexed_section_id_;
+  SectionIdMask section_id_mask = 1U << kIndexedSectionId0;
 
   // Create a hit that was found in the indexed section
   std::vector<DocHitInfo> doc_hit_infos = {
@@ -125,14 +213,107 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, IncludesHitWithMatchingSection) {
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
   // Filtering for the indexed section name should get a result
-  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      indexed_property_);
-
-  EXPECT_THAT(GetDocumentIds(&section_restrict_iterator),
+  std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+      DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(original_iterator), document_store_.get(),
+          schema_store_.get(),
+          /*target_sections=*/{indexed_section_0},
+          fake_clock_.GetSystemTimeMilliseconds());
+
+  EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()),
               ElementsAre(document_id));
 }
 
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+       IncludesHitWithMultipleMatchingSectionsWithMultipleSectionRestricts) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document1_));
+
+  SectionIdMask section_id_mask = 1U << kIndexedSectionId0;
+  section_id_mask |= 1U << kIndexedSectionId1;
+
+  // Create a hit that was found in the indexed section
+  std::vector<DocHitInfo> doc_hit_infos = {
+      DocHitInfo(document_id, section_id_mask)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Filter for both target_sections
+  std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+      DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(original_iterator), document_store_.get(),
+          schema_store_.get(),
+          /*target_sections=*/{indexed_section_0, indexed_section_1},
+          fake_clock_.GetSystemTimeMilliseconds());
+
+  ICING_ASSERT_OK(section_restrict_iterator->Advance());
+  std::vector<SectionId> expected_section_ids = {kIndexedSectionId0,
+                                                 kIndexedSectionId1};
+  EXPECT_THAT(section_restrict_iterator->doc_hit_info(),
+              EqualsDocHitInfo(document_id, expected_section_ids));
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+       IncludesHitWithMultipleMatchingSectionsWithSingleSectionRestrict) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document1_));
+
+  SectionIdMask section_id_mask = 1U << kIndexedSectionId0;
+  section_id_mask |= 1U << kIndexedSectionId1;
+
+  // Create a hit that was found in the indexed section
+  std::vector<DocHitInfo> doc_hit_infos = {
+      DocHitInfo(document_id, section_id_mask)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Filter for both target_sections
+  std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+      DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(original_iterator), document_store_.get(),
+          schema_store_.get(),
+          /*target_sections=*/{indexed_section_1},
+          fake_clock_.GetSystemTimeMilliseconds());
+
+  ICING_ASSERT_OK(section_restrict_iterator->Advance());
+  std::vector<SectionId> expected_section_ids = {kIndexedSectionId1};
+  EXPECT_THAT(section_restrict_iterator->doc_hit_info(),
+              EqualsDocHitInfo(document_id, expected_section_ids));
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+       IncludesHitWithSingleMatchingSectionsWithMultiSectionRestrict) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document1_));
+
+  SectionIdMask section_id_mask = 1U << kIndexedSectionId1;
+
+  // Create a hit that was found in the indexed section
+  std::vector<DocHitInfo> doc_hit_infos = {
+      DocHitInfo(document_id, section_id_mask)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Filter for both target_sections
+  std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+      DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(original_iterator), document_store_.get(),
+          schema_store_.get(),
+          /*target_sections=*/{indexed_section_0, indexed_section_1},
+          fake_clock_.GetSystemTimeMilliseconds());
+
+  ICING_ASSERT_OK(section_restrict_iterator->Advance());
+  std::vector<SectionId> expected_section_ids = {kIndexedSectionId1};
+  EXPECT_THAT(section_restrict_iterator->doc_hit_info(),
+              EqualsDocHitInfo(document_id, expected_section_ids));
+}
+
 TEST_F(DocHitInfoIteratorSectionRestrictTest, NoMatchingDocumentFilterData) {
   // Create a hit with a document id that doesn't exist in the DocumentStore yet
   std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(/*document_id_in=*/0)};
@@ -141,20 +322,25 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, NoMatchingDocumentFilterData) {
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
   // Filtering for the indexed section name should get a result
-  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      /*target_section=*/"");
-
-  EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+  std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+      DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(original_iterator), document_store_.get(),
+          schema_store_.get(),
+          /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds());
+
+  EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()), IsEmpty());
+  std::vector<TermMatchInfo> matched_terms_stats;
+  section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
 }
 
 TEST_F(DocHitInfoIteratorSectionRestrictTest,
        DoesntIncludeHitWithWrongSectionName) {
   // Populate the DocumentStore's FilterCache with this document's data
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
-                             document_store_->Put(document_));
+                             document_store_->Put(document1_));
 
-  SectionIdMask section_id_mask = 1U << indexed_section_id_;
+  SectionIdMask section_id_mask = 1U << kIndexedSectionId0;
 
   // Create a hit that was found in the indexed section
   std::vector<DocHitInfo> doc_hit_infos = {
@@ -164,18 +350,24 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
   // Filtering for the indexed section name should get a result
-  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      "some_section_name");
-
-  EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+  std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+      DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(original_iterator), document_store_.get(),
+          schema_store_.get(),
+          /*target_sections=*/{"some_section_name"},
+          fake_clock_.GetSystemTimeMilliseconds());
+
+  EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()), IsEmpty());
+  std::vector<TermMatchInfo> matched_terms_stats;
+  section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
 }
 
 TEST_F(DocHitInfoIteratorSectionRestrictTest,
        DoesntIncludeHitWithNoSectionIds) {
   // Populate the DocumentStore's FilterCache with this document's data
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
-                             document_store_->Put(document_));
+                             document_store_->Put(document1_));
 
   // Create a hit that doesn't exist in any sections, so it shouldn't match any
   // section filters
@@ -185,18 +377,24 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      indexed_property_);
-
-  EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+  std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+      DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(original_iterator), document_store_.get(),
+          schema_store_.get(),
+          /*target_sections=*/{indexed_section_0},
+          fake_clock_.GetSystemTimeMilliseconds());
+
+  EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()), IsEmpty());
+  std::vector<TermMatchInfo> matched_terms_stats;
+  section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
 }
 
 TEST_F(DocHitInfoIteratorSectionRestrictTest,
        DoesntIncludeHitWithDifferentSectionId) {
   // Populate the DocumentStore's FilterCache with this document's data
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
-                             document_store_->Put(document_));
+                             document_store_->Put(document1_));
 
   // Anything that's not 0, which is the indexed property
   SectionId not_matching_section_id = 2;
@@ -204,38 +402,140 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
   // Create a hit that exists in a different section, so it shouldn't match any
   // section filters
   std::vector<DocHitInfo> doc_hit_infos = {
-      DocHitInfo(document_id, kSectionIdMaskNone << not_matching_section_id)};
+      DocHitInfo(document_id, UINT64_C(1) << not_matching_section_id)};
 
   std::unique_ptr<DocHitInfoIterator> original_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      indexed_property_);
-
-  EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+  std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+      DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(original_iterator), document_store_.get(),
+          schema_store_.get(),
+          /*target_sections=*/{indexed_section_0},
+          fake_clock_.GetSystemTimeMilliseconds());
+
+  EXPECT_THAT(GetDocumentIds(section_restrict_iterator.get()), IsEmpty());
+  std::vector<TermMatchInfo> matched_terms_stats;
+  section_restrict_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
 }
 
-TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumBlocksInspected) {
+TEST_F(DocHitInfoIteratorSectionRestrictTest, GetCallStats) {
+  DocHitInfoIterator::CallStats original_call_stats(
+      /*num_leaf_advance_calls_lite_index_in=*/2,
+      /*num_leaf_advance_calls_main_index_in=*/5,
+      /*num_leaf_advance_calls_integer_index_in=*/3,
+      /*num_leaf_advance_calls_no_index_in=*/1,
+      /*num_blocks_inspected_in=*/4);  // arbitrary value
   auto original_iterator = std::make_unique<DocHitInfoIteratorDummy>();
-  original_iterator->SetNumBlocksInspected(5);
+  original_iterator->SetCallStats(original_call_stats);
 
-  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      /*target_section=*/"");
+  std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+      DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(original_iterator), document_store_.get(),
+          schema_store_.get(),
+          /*target_sections=*/{""}, fake_clock_.GetSystemTimeMilliseconds());
 
-  EXPECT_THAT(section_restrict_iterator.GetNumBlocksInspected(), Eq(5));
+  EXPECT_THAT(section_restrict_iterator->GetCallStats(),
+              Eq(original_call_stats));
 }
 
-TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumLeafAdvanceCalls) {
-  auto original_iterator = std::make_unique<DocHitInfoIteratorDummy>();
-  original_iterator->SetNumLeafAdvanceCalls(6);
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+       TrimSectionRestrictIterator_TwoLayer) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store_->Put(document3_));
+
+  // 0 is the indexed property
+  SectionId matching_section_id = 0;
+  // Anything that's not 0, which is the indexed property
+  SectionId not_matching_section_id = 2;
 
-  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
-      std::move(original_iterator), document_store_.get(), schema_store_.get(),
-      /*target_section=*/"");
+  // Build an iterator tree like:
+  //                  AND
+  //             /           \
+  //    [1, 1],[2, 2]        [3, 2]
+  std::vector<DocHitInfo> left_infos = {
+      DocHitInfo(document_id1, 1U << matching_section_id),
+      DocHitInfo(document_id2, 1U << not_matching_section_id)};
+  std::vector<DocHitInfo> right_infos = {
+      DocHitInfo(document_id3, 1U << not_matching_section_id)};
+
+  std::unique_ptr<DocHitInfoIterator> left_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(left_infos);
+  std::unique_ptr<DocHitInfoIterator> right_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(right_infos, "term", 10);
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorAnd>(std::move(left_iterator),
+                                              std::move(right_iterator));
+
+  // After applying section restriction:
+  //                  AND
+  //             /           \
+  //        Restrict       Restrict
+  //           |               |
+  //    [1, 1],[2, 2]        [3, 2]
+  std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+      DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(original_iterator), document_store_.get(),
+          schema_store_.get(), {indexed_section_0},
+          fake_clock_.GetSystemTimeMilliseconds());
+
+  // The trimmed tree.
+  //          Restrict
+  //             |
+  //       [1, 1],[2, 2]
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocHitInfoIterator::TrimmedNode node,
+      std::move(*section_restrict_iterator).TrimRightMostNode());
+
+  EXPECT_THAT(GetDocumentIds(node.iterator_.get()), ElementsAre(document_id1));
+  EXPECT_THAT(node.term_, Eq("term"));
+  EXPECT_THAT(node.term_start_index_, Eq(10));
+  EXPECT_THAT(node.target_section_, Eq(indexed_section_0));
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest, TrimSectionRestrictIterator) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2_));
 
-  EXPECT_THAT(section_restrict_iterator.GetNumLeafAdvanceCalls(), Eq(6));
+  // 0 is the indexed property
+  SectionId matching_section_id = 0;
+  // Anything that's not 0, which is the indexed property
+  SectionId not_matching_section_id = 2;
+
+  // Build an interator tree like:
+  //                Restrict
+  //                   |
+  //             [1, 1],[2, 2]
+  std::vector<DocHitInfo> doc_infos = {
+      DocHitInfo(document_id1, 1U << matching_section_id),
+      DocHitInfo(document_id2, 1U << not_matching_section_id)};
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_infos, "term", 10);
+
+  std::unique_ptr<DocHitInfoIterator> section_restrict_iterator =
+      DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(original_iterator), document_store_.get(),
+          schema_store_.get(), {indexed_section_0},
+          fake_clock_.GetSystemTimeMilliseconds());
+
+  // The trimmed tree has null iterator but has target section.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocHitInfoIterator::TrimmedNode node,
+      std::move(*section_restrict_iterator).TrimRightMostNode());
+
+  EXPECT_THAT(node.iterator_, testing::IsNull());
+  EXPECT_THAT(node.term_, Eq("term"));
+  EXPECT_THAT(node.term_start_index_, Eq(10));
+  EXPECT_THAT(node.target_section_, Eq(indexed_section_0));
 }
 
 }  // namespace
diff --git a/icing/index/iterator/doc-hit-info-iterator-term.cc b/icing/index/iterator/doc-hit-info-iterator-term.cc
deleted file mode 100644
index 97ca3c4..0000000
--- a/icing/index/iterator/doc-hit-info-iterator-term.cc
+++ /dev/null
@@ -1,125 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/index/iterator/doc-hit-info-iterator-term.h"
-
-#include <cstdint>
-
-#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/absl_ports/canonical_errors.h"
-#include "icing/absl_ports/str_cat.h"
-#include "icing/index/hit/doc-hit-info.h"
-#include "icing/schema/section.h"
-#include "icing/util/status-macros.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-
-std::string SectionIdMaskToString(SectionIdMask section_id_mask) {
-  std::string mask(kMaxSectionId + 1, '0');
-  for (SectionId i = kMaxSectionId; i >= 0; --i) {
-    if (section_id_mask & (1U << i)) {
-      mask[kMaxSectionId - i] = '1';
-    }
-  }
-  return mask;
-}
-
-}  // namespace
-
-libtextclassifier3::Status DocHitInfoIteratorTerm::Advance() {
-  if (cached_hits_idx_ == -1) {
-    ICING_RETURN_IF_ERROR(RetrieveMoreHits());
-  } else {
-    ++cached_hits_idx_;
-  }
-  if (cached_hits_idx_ == -1 || cached_hits_idx_ >= cached_hits_.size()) {
-    // Nothing more for the iterator to return. Set these members to invalid
-    // values.
-    doc_hit_info_ = DocHitInfo();
-    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
-    return absl_ports::ResourceExhaustedError(
-        "No more DocHitInfos in iterator");
-  }
-  doc_hit_info_ = cached_hits_.at(cached_hits_idx_);
-  hit_intersect_section_ids_mask_ = doc_hit_info_.hit_section_ids_mask();
-  return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::Status DocHitInfoIteratorTermExact::RetrieveMoreHits() {
-  // Exact match only. All hits in lite lexicon are exact.
-  ICING_ASSIGN_OR_RETURN(uint32_t tvi, lite_index_->FindTerm(term_));
-  ICING_ASSIGN_OR_RETURN(uint32_t term_id,
-                         term_id_codec_->EncodeTvi(tvi, TviType::LITE));
-  lite_index_->AppendHits(term_id, section_restrict_mask_,
-                          /*only_from_prefix_sections=*/false, &cached_hits_);
-  cached_hits_idx_ = 0;
-  return libtextclassifier3::Status::OK;
-}
-
-std::string DocHitInfoIteratorTermExact::ToString() const {
-  return absl_ports::StrCat(SectionIdMaskToString(section_restrict_mask_), ":",
-                            term_);
-}
-
-libtextclassifier3::Status DocHitInfoIteratorTermPrefix::RetrieveMoreHits() {
-  // Take union of lite terms.
-  int term_len = term_.length();
-  int terms_matched = 0;
-  for (LiteIndex::PrefixIterator it = lite_index_->FindTermPrefixes(term_);
-       it.IsValid(); it.Advance()) {
-    bool exact_match = strlen(it.GetKey()) == term_len;
-    ICING_ASSIGN_OR_RETURN(
-        uint32_t term_id,
-        term_id_codec_->EncodeTvi(it.GetValueIndex(), TviType::LITE));
-    lite_index_->AppendHits(term_id, section_restrict_mask_,
-                            /*only_from_prefix_sections=*/!exact_match,
-                            &cached_hits_);
-    ++terms_matched;
-  }
-  if (terms_matched > 1) {
-    SortAndDedupeDocumentIds();
-  }
-  cached_hits_idx_ = 0;
-  return libtextclassifier3::Status::OK;
-}
-
-void DocHitInfoIteratorTermPrefix::SortAndDedupeDocumentIds() {
-  // Re-sort cached document_ids and merge sections.
-  sort(cached_hits_.begin(), cached_hits_.end());
-
-  int idx = 0;
-  for (int i = 1; i < cached_hits_.size(); ++i) {
-    const DocHitInfo& hit_info = cached_hits_.at(i);
-    DocHitInfo& collapsed_hit_info = cached_hits_.at(idx);
-    if (collapsed_hit_info.document_id() == hit_info.document_id()) {
-      collapsed_hit_info.MergeSectionsFrom(hit_info);
-    } else {
-      // New document_id.
-      cached_hits_.at(++idx) = hit_info;
-    }
-  }
-  // idx points to last doc hit info.
-  cached_hits_.resize(idx + 1);
-}
-
-std::string DocHitInfoIteratorTermPrefix::ToString() const {
-  return absl_ports::StrCat(SectionIdMaskToString(section_restrict_mask_), ":",
-                            term_, "*");
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-term.h b/icing/index/iterator/doc-hit-info-iterator-term.h
deleted file mode 100644
index 7d02fc2..0000000
--- a/icing/index/iterator/doc-hit-info-iterator-term.h
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_H_
-#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_H_
-
-#include <cstdint>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/index/hit/doc-hit-info.h"
-#include "icing/index/iterator/doc-hit-info-iterator.h"
-#include "icing/index/lite-index.h"
-#include "icing/index/term-id-codec.h"
-#include "icing/schema/section.h"
-
-namespace icing {
-namespace lib {
-
-class DocHitInfoIteratorTerm : public DocHitInfoIterator {
- public:
-  explicit DocHitInfoIteratorTerm(const TermIdCodec* term_id_codec,
-                                  LiteIndex* lite_index, const std::string term,
-                                  SectionIdMask section_restrict_mask)
-      : term_(term),
-        lite_index_(lite_index),
-        cached_hits_idx_(-1),
-        term_id_codec_(term_id_codec),
-        num_advance_calls_(0),
-        section_restrict_mask_(section_restrict_mask) {}
-
-  libtextclassifier3::Status Advance() override;
-
-  int32_t GetNumBlocksInspected() const override {
-    // TODO(b/137862424): Implement this once the main index is added.
-    return 0;
-  }
-  int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
-
- protected:
-  // Add DocHitInfos corresponding to term_ to cached_hits_.
-  virtual libtextclassifier3::Status RetrieveMoreHits() = 0;
-
-  const std::string term_;
-  LiteIndex* const lite_index_;
-  // Stores hits retrieved from the index. This may only be a subset of the hits
-  // that are present in the index. Current value pointed to by the Iterator is
-  // tracked by cached_hits_idx_.
-  std::vector<DocHitInfo> cached_hits_;
-  int cached_hits_idx_;
-  const TermIdCodec* term_id_codec_;
-  int num_advance_calls_;
-  // Mask indicating which sections hits should be considered for.
-  // Ex. 0000 0000 0000 0010 means that only hits from section 1 are desired.
-  const SectionIdMask section_restrict_mask_;
-};
-
-class DocHitInfoIteratorTermExact : public DocHitInfoIteratorTerm {
- public:
-  explicit DocHitInfoIteratorTermExact(const TermIdCodec* term_id_codec,
-                                       LiteIndex* lite_index,
-                                       const std::string& term,
-                                       SectionIdMask section_id_mask)
-      : DocHitInfoIteratorTerm(term_id_codec, lite_index, term,
-                               section_id_mask) {}
-
-  std::string ToString() const override;
-
- protected:
-  libtextclassifier3::Status RetrieveMoreHits() override;
-};
-
-class DocHitInfoIteratorTermPrefix : public DocHitInfoIteratorTerm {
- public:
-  explicit DocHitInfoIteratorTermPrefix(const TermIdCodec* term_id_codec,
-                                        LiteIndex* lite_index,
-                                        const std::string& term,
-                                        SectionIdMask section_id_mask)
-      : DocHitInfoIteratorTerm(term_id_codec, lite_index, term,
-                               section_id_mask) {}
-
-  std::string ToString() const override;
-
- protected:
-  libtextclassifier3::Status RetrieveMoreHits() override;
-
- private:
-  // After retrieving DocHitInfos from the index, a DocHitInfo for docid 1 and
-  // "foo" and a DocHitInfo for docid 1 and "fool". These DocHitInfos should be
-  // merged.
-  void SortAndDedupeDocumentIds();
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_H_
diff --git a/icing/index/iterator/doc-hit-info-iterator-test-util.h b/icing/index/iterator/doc-hit-info-iterator-test-util.h
index c4d7aa7..c75fb33 100644
--- a/icing/index/iterator/doc-hit-info-iterator-test-util.h
+++ b/icing/index/iterator/doc-hit-info-iterator-test-util.h
@@ -15,7 +15,7 @@
 #ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TEST_UTIL_H_
 #define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TEST_UTIL_H_
 
-#include <cstdint>
+#include <cinttypes>
 #include <string>
 #include <utility>
 #include <vector>
@@ -32,21 +32,69 @@
 namespace icing {
 namespace lib {
 
+class DocHitInfoTermFrequencyPair {
+ public:
+  DocHitInfoTermFrequencyPair(
+      const DocHitInfo& doc_hit_info,
+      const Hit::TermFrequencyArray& hit_term_frequency = {})
+      : doc_hit_info_(doc_hit_info), hit_term_frequency_(hit_term_frequency) {}
+
+  void UpdateSection(SectionId section_id,
+                     Hit::TermFrequency hit_term_frequency) {
+    doc_hit_info_.UpdateSection(section_id);
+    hit_term_frequency_[section_id] = hit_term_frequency;
+  }
+
+  void MergeSectionsFrom(const DocHitInfoTermFrequencyPair& other) {
+    SectionIdMask other_mask = other.doc_hit_info_.hit_section_ids_mask();
+    doc_hit_info_.MergeSectionsFrom(other_mask);
+    while (other_mask) {
+      SectionId section_id = __builtin_ctzll(other_mask);
+      hit_term_frequency_[section_id] = other.hit_term_frequency_[section_id];
+      other_mask &= ~(UINT64_C(1) << section_id);
+    }
+  }
+
+  DocHitInfo doc_hit_info() const { return doc_hit_info_; }
+
+  Hit::TermFrequency hit_term_frequency(SectionId section_id) const {
+    return hit_term_frequency_[section_id];
+  }
+
+ private:
+  DocHitInfo doc_hit_info_;
+  Hit::TermFrequencyArray hit_term_frequency_;
+};
+
 // Dummy class to help with testing. It starts with an kInvalidDocumentId doc
 // hit info until an Advance is called (like normal DocHitInfoIterators). It
 // will then proceed to return the doc_hit_infos in order as Advance's are
 // called. After all doc_hit_infos are returned, Advance will return a NotFound
 // error (also like normal DocHitInfoIterators).
-class DocHitInfoIteratorDummy : public DocHitInfoIterator {
+class DocHitInfoIteratorDummy : public DocHitInfoLeafIterator {
  public:
   DocHitInfoIteratorDummy() = default;
-  explicit DocHitInfoIteratorDummy(std::vector<DocHitInfo> doc_hit_infos)
-      : doc_hit_infos_(std::move(doc_hit_infos)) {}
+  explicit DocHitInfoIteratorDummy(
+      std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos,
+      std::string term = "")
+      : doc_hit_infos_(std::move(doc_hit_infos)), term_(std::move(term)) {}
+
+  explicit DocHitInfoIteratorDummy(const std::vector<DocHitInfo>& doc_hit_infos,
+                                   std::string term = "",
+                                   int term_start_index = 0,
+                                   int unnormalized_term_length = 0)
+      : term_(std::move(term)),
+        term_start_index_(term_start_index),
+        unnormalized_term_length_(unnormalized_term_length) {
+    for (auto& doc_hit_info : doc_hit_infos) {
+      doc_hit_infos_.push_back(DocHitInfoTermFrequencyPair(doc_hit_info));
+    }
+  }
 
   libtextclassifier3::Status Advance() override {
+    ++index_;
     if (index_ < doc_hit_infos_.size()) {
-      doc_hit_info_ = doc_hit_infos_.at(index_);
-      index_++;
+      doc_hit_info_ = doc_hit_infos_.at(index_).doc_hit_info();
       return libtextclassifier3::Status::OK;
     }
 
@@ -54,43 +102,74 @@ class DocHitInfoIteratorDummy : public DocHitInfoIterator {
         "No more DocHitInfos in iterator");
   }
 
-  void set_hit_intersect_section_ids_mask(
-      SectionIdMask hit_intersect_section_ids_mask) {
-    hit_intersect_section_ids_mask_ = hit_intersect_section_ids_mask;
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
+    DocHitInfoIterator::TrimmedNode node = {nullptr, term_, term_start_index_,
+                                            unnormalized_term_length_};
+    return node;
   }
 
-  int32_t GetNumBlocksInspected() const override {
-    return num_blocks_inspected_;
+  // Imitates behavior of DocHitInfoIteratorTermMain/DocHitInfoIteratorTermLite
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    if (index_ == -1 || index_ >= doc_hit_infos_.size()) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    SectionIdMask section_mask =
+        doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+    SectionIdMask section_mask_copy = section_mask;
+    std::array<Hit::TermFrequency, kTotalNumSections> section_term_frequencies =
+        {Hit::kNoTermFrequency};
+    while (section_mask_copy) {
+      SectionId section_id = __builtin_ctzll(section_mask_copy);
+      section_term_frequencies.at(section_id) =
+          doc_hit_infos_.at(index_).hit_term_frequency(section_id);
+      section_mask_copy &= ~(UINT64_C(1) << section_id);
+    }
+    TermMatchInfo term_stats(term_, section_mask,
+                             std::move(section_term_frequencies));
+
+    for (auto& cur_term_stats : *matched_terms_stats) {
+      if (cur_term_stats.term == term_stats.term) {
+        // Same docId and same term, we don't need to add the term and the term
+        // frequency should always be the same
+        return;
+      }
+    }
+    matched_terms_stats->push_back(term_stats);
   }
 
-  void SetNumBlocksInspected(int32_t num_blocks_inspected) {
-    num_blocks_inspected_ = num_blocks_inspected;
+  void set_hit_section_ids_mask(SectionIdMask hit_section_ids_mask) {
+    doc_hit_info_.set_hit_section_ids_mask(hit_section_ids_mask);
   }
 
-  int32_t GetNumLeafAdvanceCalls() const override {
-    return num_leaf_advance_calls_;
-  }
+  CallStats GetCallStats() const override { return call_stats_; }
 
-  void SetNumLeafAdvanceCalls(int32_t num_leaf_advance_calls) {
-    num_leaf_advance_calls_ = num_leaf_advance_calls;
+  void SetCallStats(CallStats call_stats) {
+    call_stats_ = std::move(call_stats);
   }
 
   std::string ToString() const override {
     std::string ret = "<";
-    for (auto& doc_hit_info : doc_hit_infos_) {
-      absl_ports::StrAppend(&ret, IcingStringUtil::StringPrintf(
-                                      "[%d,%d]", doc_hit_info.document_id(),
-                                      doc_hit_info.hit_section_ids_mask()));
+    for (auto& doc_hit_info_pair : doc_hit_infos_) {
+      absl_ports::StrAppend(
+          &ret, IcingStringUtil::StringPrintf(
+                    "[%d,%" PRIu64 "]",
+                    doc_hit_info_pair.doc_hit_info().document_id(),
+                    doc_hit_info_pair.doc_hit_info().hit_section_ids_mask()));
     }
     absl_ports::StrAppend(&ret, ">");
     return ret;
   }
 
  private:
-  int32_t index_ = 0;
-  int32_t num_blocks_inspected_ = 0;
-  int32_t num_leaf_advance_calls_ = 0;
-  std::vector<DocHitInfo> doc_hit_infos_;
+  int32_t index_ = -1;
+  CallStats call_stats_;
+  std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos_;
+  std::string term_;
+  int term_start_index_;
+  int unnormalized_term_length_;
 };
 
 inline std::vector<DocumentId> GetDocumentIds(DocHitInfoIterator* iterator) {
diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h
index bcc2b6e..728f957 100644
--- a/icing/index/iterator/doc-hit-info-iterator.h
+++ b/icing/index/iterator/doc-hit-info-iterator.h
@@ -15,8 +15,14 @@
 #ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_H_
 #define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_H_
 
+#include <array>
 #include <cstdint>
+#include <functional>
+#include <memory>
 #include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -28,10 +34,29 @@
 namespace icing {
 namespace lib {
 
+// Data structure that maps a single matched query term to its section mask
+// and the list of term frequencies.
+// TODO(b/158603837): add stat on whether the matched terms are prefix matched
+// or not. This information will be used to boost exact match.
+struct TermMatchInfo {
+  std::string_view term;
+  // SectionIdMask associated to the term.
+  SectionIdMask section_ids_mask;
+  // Array with fixed size kMaxSectionId. For every section id, i.e.
+  // vector index, it stores the term frequency of the term.
+  std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies;
+
+  explicit TermMatchInfo(
+      std::string_view term, SectionIdMask section_ids_mask,
+      std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies)
+      : term(term),
+        section_ids_mask(section_ids_mask),
+        term_frequencies(std::move(term_frequencies)) {}
+};
+
 // Iterator over DocHitInfos (collapsed Hits) in REVERSE document_id order.
 //
-// NOTE: You must call Advance() before calling hit_info() or
-// hit_intersect_section_ids_mask().
+// NOTE: You must call Advance() before calling hit_info().
 //
 // Example:
 // DocHitInfoIterator itr = GetIterator(...);
@@ -40,10 +65,161 @@ namespace lib {
 // }
 class DocHitInfoIterator {
  public:
+  using ChildrenMapper = std::function<std::unique_ptr<DocHitInfoIterator>(
+      std::unique_ptr<DocHitInfoIterator>)>;
+
+  // CallStats is a wrapper class of all stats to collect among all levels of
+  // the DocHitInfoIterator tree. Mostly the internal nodes will aggregate the
+  // number of all leaf nodes, while the leaf nodes will return the actual
+  // numbers.
+  struct CallStats {
+    // The number of times Advance() was called on the leaf node for term lite
+    // index.
+    // - Leaf nodes:
+    //   - DocHitInfoIteratorTermLite should maintain and set it correctly.
+    //   - Others should set it 0.
+    // - Internal nodes: should aggregate values from all children.
+    int32_t num_leaf_advance_calls_lite_index;
+
+    // The number of times Advance() was called on the leaf node for term main
+    // index.
+    // - Leaf nodes:
+    //   - DocHitInfoIteratorTermMain should maintain and set it correctly.
+    //   - Others should set it 0.
+    // - Internal nodes: should aggregate values from all children.
+    int32_t num_leaf_advance_calls_main_index;
+
+    // The number of times Advance() was called on the leaf node for integer
+    // index.
+    // - Leaf nodes:
+    //   - DocHitInfoIteratorNumeric should maintain and set it correctly.
+    //   - Others should set it 0.
+    // - Internal nodes: should aggregate values from all children.
+    int32_t num_leaf_advance_calls_integer_index;
+
+    // The number of times Advance() was called on the leaf node without reading
+    // any hits from index. Usually it is a special field for
+    // DocHitInfoIteratorAllDocumentId.
+    // - Leaf nodes:
+    //   - DocHitInfoIteratorAllDocumentId should maintain and set it correctly.
+    //   - Others should set it 0.
+    // - Internal nodes: should aggregate values from all children.
+    int32_t num_leaf_advance_calls_no_index;
+
+    // The number of flash index blocks that have been read as a result of
+    // operations on this object.
+    // - Leaf nodes: should maintain and set it correctly for all child classes
+    //   involving flash index block access.
+    // - Internal nodes: should aggregate values from all children.
+    int32_t num_blocks_inspected;
+
+    explicit CallStats()
+        : CallStats(/*num_leaf_advance_calls_lite_index_in=*/0,
+                    /*num_leaf_advance_calls_main_index_in=*/0,
+                    /*num_leaf_advance_calls_integer_index_in=*/0,
+                    /*num_leaf_advance_calls_no_index_in=*/0,
+                    /*num_blocks_inspected_in=*/0) {}
+
+    explicit CallStats(int32_t num_leaf_advance_calls_lite_index_in,
+                       int32_t num_leaf_advance_calls_main_index_in,
+                       int32_t num_leaf_advance_calls_integer_index_in,
+                       int32_t num_leaf_advance_calls_no_index_in,
+                       int32_t num_blocks_inspected_in)
+        : num_leaf_advance_calls_lite_index(
+              num_leaf_advance_calls_lite_index_in),
+          num_leaf_advance_calls_main_index(
+              num_leaf_advance_calls_main_index_in),
+          num_leaf_advance_calls_integer_index(
+              num_leaf_advance_calls_integer_index_in),
+          num_leaf_advance_calls_no_index(num_leaf_advance_calls_no_index_in),
+          num_blocks_inspected(num_blocks_inspected_in) {}
+
+    int32_t num_leaf_advance_calls() const {
+      return num_leaf_advance_calls_lite_index +
+             num_leaf_advance_calls_main_index +
+             num_leaf_advance_calls_integer_index +
+             num_leaf_advance_calls_no_index;
+    }
+
+    bool operator==(const CallStats& other) const {
+      return num_leaf_advance_calls_lite_index ==
+                 other.num_leaf_advance_calls_lite_index &&
+             num_leaf_advance_calls_main_index ==
+                 other.num_leaf_advance_calls_main_index &&
+             num_leaf_advance_calls_integer_index ==
+                 other.num_leaf_advance_calls_integer_index &&
+             num_leaf_advance_calls_no_index ==
+                 other.num_leaf_advance_calls_no_index &&
+             num_blocks_inspected == other.num_blocks_inspected;
+    }
+
+    CallStats operator+(const CallStats& other) const {
+      return CallStats(num_leaf_advance_calls_lite_index +
+                           other.num_leaf_advance_calls_lite_index,
+                       num_leaf_advance_calls_main_index +
+                           other.num_leaf_advance_calls_main_index,
+                       num_leaf_advance_calls_integer_index +
+                           other.num_leaf_advance_calls_integer_index,
+                       num_leaf_advance_calls_no_index +
+                           other.num_leaf_advance_calls_no_index,
+                       num_blocks_inspected + other.num_blocks_inspected);
+    }
+
+    CallStats& operator+=(const CallStats& other) {
+      *this = *this + other;
+      return *this;
+    }
+  };
+
+  struct TrimmedNode {
+    // the query results which we should only search for suggestion in these
+    // documents.
+    std::unique_ptr<DocHitInfoIterator> iterator_;
+    // term of the trimmed node which we need to generate suggested strings.
+    std::string term_;
+    // the string in the query which indicates the target section we should
+    // search for suggestions.
+    std::string target_section_;
+    // the start index of the current term in the given search query.
+    int term_start_index_;
+    // The length of the given unnormalized term in the search query
+    int unnormalized_term_length_;
+
+    TrimmedNode(std::unique_ptr<DocHitInfoIterator> iterator, std::string term,
+                int term_start_index, int unnormalized_term_length)
+        : iterator_(std::move(iterator)),
+          term_(term),
+          target_section_(""),
+          term_start_index_(term_start_index),
+          unnormalized_term_length_(unnormalized_term_length) {}
+  };
+
+  // Trim the rightmost iterator of the iterator tree.
+  // This is to support search suggestions for the last term which is the
+  // right-most node of the root iterator tree. Only support trim the right-most
+  // node on the AND, AND_NARY, OR, OR_NARY, OR_LEAF, Filter, and the
+  // property-in-schema-check iterator.
+  //
+  // After calling this method, this iterator is no longer usable. Please use
+  // the returned iterator.
+  // Returns:
+  //   the new iterator without the right-most child, if was able to trim the
+  //   right-most node.
+  //   nullptr if the current iterator should be trimmed.
+  //   INVALID_ARGUMENT if the right-most node is not suppose to be trimmed.
+  virtual libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && = 0;
+
+  // Map all direct children of this iterator according to the passed mapper.
+  virtual void MapChildren(const ChildrenMapper& mapper) = 0;
+
+  virtual bool is_leaf() { return false; }
+
   virtual ~DocHitInfoIterator() = default;
 
   // Returns:
   //   OK if was able to advance to a new document_id.
+  //   INVALID_ARGUMENT if there are less than 2 iterators for an AND/OR
+  //       iterator
   //   RESOUCE_EXHAUSTED if we've run out of document_ids to iterate over
   virtual libtextclassifier3::Status Advance() = 0;
 
@@ -52,27 +228,25 @@ class DocHitInfoIterator {
   // construction or if Advance returned an error.
   const DocHitInfo& doc_hit_info() const { return doc_hit_info_; }
 
-  // SectionIdMask representing which sections (if any) have matched *ALL* query
-  // terms for the current document_id.
-  SectionIdMask hit_intersect_section_ids_mask() const {
-    return hit_intersect_section_ids_mask_;
-  }
-
-  // Gets the number of flash index blocks that have been read as a
-  // result of operations on this object.
-  virtual int32_t GetNumBlocksInspected() const = 0;
-
-  // HitIterators may be constructed into trees. Internal nodes will return the
-  // sum of the number of Advance() calls to all leaf nodes. Leaf nodes will
-  // return the number of times Advance() was called on it.
-  virtual int32_t GetNumLeafAdvanceCalls() const = 0;
+  // Returns CallStats of the DocHitInfoIterator tree.
+  virtual CallStats GetCallStats() const = 0;
 
   // A string representing the iterator.
   virtual std::string ToString() const = 0;
 
+  // For the last hit docid, retrieves all the matched query terms and other
+  // stats, see TermMatchInfo.
+  // filtering_section_mask filters the matching sections and should be set only
+  // by DocHitInfoIteratorSectionRestrict.
+  // If Advance() wasn't called after construction, Advance() returned false or
+  // the concrete HitIterator didn't override this method, the vectors aren't
+  // populated.
+  virtual void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const {}
+
  protected:
   DocHitInfo doc_hit_info_;
-  SectionIdMask hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
 
   // Helper function to advance the given iterator to at most the given
   // document_id.
@@ -87,11 +261,20 @@ class DocHitInfoIterator {
     // Didn't find anything for the other iterator, reset to invalid values and
     // return.
     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
-    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
     return absl_ports::ResourceExhaustedError(
         "No more DocHitInfos in iterator");
   }
-};  // namespace DocHitInfoIterator
+};
+
+// A leaf node is a term node or a chain of section restriction node applied on
+// a term node.
+class DocHitInfoLeafIterator : public DocHitInfoIterator {
+ public:
+  bool is_leaf() override { return true; }
+
+  // Calling MapChildren on leaf node does not make sense, and will do nothing.
+  void MapChildren(const ChildrenMapper& mapper) override {}
+};
 
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
index 90e4888..993c3b8 100644
--- a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
+++ b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
@@ -14,15 +14,15 @@
 
 #include <vector>
 
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
 #include "icing/index/hit/doc-hit-info.h"
 #include "icing/index/iterator/doc-hit-info-iterator-and.h"
 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
-#include "testing/base/public/benchmark.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
 
 namespace icing {
 namespace lib {
@@ -35,7 +35,7 @@ namespace {
 //
 //    $
 //    blaze-bin/icing/index/iterator/doc-hit-info-iterator_benchmark
-//    --benchmarks=all
+//    --benchmark_filter=all
 //
 // Run on an Android device:
 //    $ blaze build --config=android_arm64 -c opt --dynamic_mode=off
@@ -47,7 +47,7 @@ namespace {
 //    /data/local/tmp/
 //
 //    $ adb shell /data/local/tmp/doc-hit-info-iterator_benchmark
-//    --benchmarks=all
+//    --benchmark_filter=all
 
 // Functor to be used with std::generate to create a container of DocHitInfos.
 // DocHitInfos are generated starting at docid starting_docid and continuing at
diff --git a/icing/index/iterator/section-restrict-data.cc b/icing/index/iterator/section-restrict-data.cc
new file mode 100644
index 0000000..085437d
--- /dev/null
+++ b/icing/index/iterator/section-restrict-data.cc
@@ -0,0 +1,82 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/section-restrict-data.h"
+
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+
+namespace icing {
+namespace lib {
+
+SectionIdMask SectionRestrictData::GenerateSectionMask(
+    const std::string& schema_type,
+    const std::set<std::string>& target_sections) const {
+  SectionIdMask section_mask = kSectionIdMaskNone;
+  auto section_metadata_list = schema_store_.GetSectionMetadata(schema_type);
+  if (!section_metadata_list.ok()) {
+    // The current schema doesn't have section metadata.
+    return kSectionIdMaskNone;
+  }
+  for (const SectionMetadata& section_metadata :
+       *section_metadata_list.ValueOrDie()) {
+    if (target_sections.find(section_metadata.path) != target_sections.end()) {
+      section_mask |= UINT64_C(1) << section_metadata.id;
+    }
+  }
+  return section_mask;
+}
+
+SectionIdMask SectionRestrictData::ComputeAllowedSectionsMask(
+    const std::string& schema_type) {
+  if (const auto type_property_mask_itr =
+          type_property_masks_.find(schema_type);
+      type_property_mask_itr != type_property_masks_.end()) {
+    return type_property_mask_itr->second;
+  }
+
+  // Section id mask of schema_type is never calculated before, so
+  // calculate it here and put it into type_property_masks_.
+  // - If type property filters of schema_type or wildcard (*) are
+  //   specified, then create a mask according to the filters.
+  // - Otherwise, create a mask to match all properties.
+  SectionIdMask new_section_id_mask = kSectionIdMaskAll;
+  if (const auto itr = type_property_filters_.find(schema_type);
+      itr != type_property_filters_.end()) {
+    // Property filters defined for given schema type
+    new_section_id_mask = GenerateSectionMask(schema_type, itr->second);
+  } else if (const auto wildcard_itr = type_property_filters_.find(
+                 std::string(SchemaStore::kSchemaTypeWildcard));
+             wildcard_itr != type_property_filters_.end()) {
+    // Property filters defined for wildcard entry
+    new_section_id_mask =
+        GenerateSectionMask(schema_type, wildcard_itr->second);
+  } else {
+    // Do not cache the section mask if no property filters apply to this schema
+    // type to avoid taking up unnecessary space.
+    return kSectionIdMaskAll;
+  }
+
+  type_property_masks_[schema_type] = new_section_id_mask;
+  return new_section_id_mask;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/iterator/section-restrict-data.h b/icing/index/iterator/section-restrict-data.h
new file mode 100644
index 0000000..26ca597
--- /dev/null
+++ b/icing/index/iterator/section-restrict-data.h
@@ -0,0 +1,98 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_
+#define ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_
+
+#include <cstdint>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <utility>
+
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+class SectionRestrictData {
+ public:
+  // Does not take any ownership, and all pointers must refer to valid objects
+  // that outlive the one constructed.
+  SectionRestrictData(const DocumentStore* document_store,
+                      const SchemaStore* schema_store, int64_t current_time_ms,
+                      std::unordered_map<std::string, std::set<std::string>>
+                          type_property_filters)
+      : document_store_(*document_store),
+        schema_store_(*schema_store),
+        current_time_ms_(current_time_ms),
+        type_property_filters_(std::move(type_property_filters)) {}
+
+  // Calculates the section mask of allowed sections(determined by the
+  // property filters map) for the given schema type and caches the same for any
+  // future calls.
+  //
+  // Returns:
+  //  - If type_property_filters_ has an entry for the given schema type or
+  //    wildcard(*), return a bitwise or of section IDs in the schema type
+  //    that that are also present in the relevant filter list.
+  //  - Otherwise, return kSectionIdMaskAll.
+  SectionIdMask ComputeAllowedSectionsMask(const std::string& schema_type);
+
+  const DocumentStore& document_store() const { return document_store_; }
+
+  const SchemaStore& schema_store() const { return schema_store_; }
+
+  int64_t current_time_ms() const { return current_time_ms_; }
+
+  const std::unordered_map<std::string, std::set<std::string>>&
+  type_property_filters() const {
+    return type_property_filters_;
+  }
+
+ private:
+  const DocumentStore& document_store_;
+  const SchemaStore& schema_store_;
+  int64_t current_time_ms_;
+
+  // Map of property filters per schema type. Supports wildcard(*) for schema
+  // type that will apply to all schema types that are not specifically
+  // specified in the mapping otherwise.
+  std::unordered_map<std::string, std::set<std::string>> type_property_filters_;
+  // Mapping of schema type to the section mask of allowed sections for that
+  // schema type. This section mask is lazily calculated based on the
+  // specified property filters and cached for any future use.
+  std::unordered_map<std::string, SectionIdMask> type_property_masks_;
+
+  // Generates a section mask for the given schema type and the target
+  // sections.
+  //
+  // Returns:
+  //  - A bitwise or of section IDs in the schema_type that that are also
+  //    present in the target_sections list.
+  //  - If none of the sections in the schema_type are present in the
+  //    target_sections list, return kSectionIdMaskNone.
+  // This is done by doing a bitwise or of the target section ids for the
+  // given schema type.
+  SectionIdMask GenerateSectionMask(
+      const std::string& schema_type,
+      const std::set<std::string>& target_sections) const;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_
diff --git a/icing/index/lite-index.cc b/icing/index/lite-index.cc
deleted file mode 100644
index 489c53d..0000000
--- a/icing/index/lite-index.cc
+++ /dev/null
@@ -1,457 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/index/lite-index.h"
-
-#include <inttypes.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <sys/mman.h>
-
-#include <algorithm>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/absl_ports/canonical_errors.h"
-#include "icing/absl_ports/str_cat.h"
-#include "icing/file/filesystem.h"
-#include "icing/index/hit/doc-hit-info.h"
-#include "icing/index/hit/hit.h"
-#include "icing/index/term-property-id.h"
-#include "icing/legacy/core/icing-string-util.h"
-#include "icing/legacy/core/icing-timer.h"
-#include "icing/legacy/index/icing-array-storage.h"
-#include "icing/legacy/index/icing-dynamic-trie.h"
-#include "icing/legacy/index/icing-filesystem.h"
-#include "icing/legacy/index/icing-lite-index-header.h"
-#include "icing/legacy/index/icing-mmapper.h"
-#include "icing/proto/term.pb.h"
-#include "icing/schema/section.h"
-#include "icing/store/document-id.h"
-#include "icing/util/crc32.h"
-#include "icing/util/logging.h"
-#include "icing/util/status-macros.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-
-// Point at which we declare the trie full.
-constexpr double kTrieFullFraction = 0.95;
-
-std::string MakeHitBufferFilename(const std::string& filename_base) {
-  return filename_base + "hb";
-}
-
-size_t header_size() { return sizeof(IcingLiteIndex_HeaderImpl::HeaderData); }
-
-}  // namespace
-
-const LiteIndex::Element::Value LiteIndex::Element::kInvalidValue =
-    LiteIndex::Element(0, Hit()).value();
-
-libtextclassifier3::StatusOr<std::unique_ptr<LiteIndex>> LiteIndex::Create(
-    const LiteIndex::Options& options, const IcingFilesystem* filesystem) {
-  ICING_RETURN_ERROR_IF_NULL(filesystem);
-
-  std::unique_ptr<LiteIndex> lite_index =
-      std::unique_ptr<LiteIndex>(new LiteIndex(options, filesystem));
-  ICING_RETURN_IF_ERROR(lite_index->Initialize());
-  return std::move(lite_index);
-}
-
-// size is max size in elements. An appropriate lexicon and display
-// mapping size will be chosen based on hit buffer size.
-LiteIndex::LiteIndex(const LiteIndex::Options& options,
-                     const IcingFilesystem* filesystem)
-    : hit_buffer_(*filesystem),
-      hit_buffer_crc_(0),
-      lexicon_(options.filename_base + "lexicon", MakeTrieRuntimeOptions(),
-               filesystem),
-      header_mmap_(false, MAP_SHARED),
-      options_(options),
-      filesystem_(filesystem) {}
-
-LiteIndex::~LiteIndex() {
-  if (initialized()) {
-    libtextclassifier3::Status unused = PersistToDisk();
-  }
-}
-
-IcingDynamicTrie::RuntimeOptions LiteIndex::MakeTrieRuntimeOptions() {
-  return IcingDynamicTrie::RuntimeOptions().set_storage_policy(
-      IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc);
-}
-
-libtextclassifier3::Status LiteIndex::Initialize() {
-  // Size of hit buffer's header struct, rounded up to the nearest number of
-  // system memory pages.
-  const size_t header_padded_size =
-      IcingMMapper::page_aligned_size(header_size());
-
-  // Variable declarations cannot cross goto jumps, so declare these up top.
-  libtextclassifier3::Status status;
-  uint64_t file_size;
-  IcingTimer timer;
-
-  if (!lexicon_.CreateIfNotExist(options_.lexicon_options) ||
-      !lexicon_.Init()) {
-    return absl_ports::InternalError("Failed to initialize lexicon trie");
-  }
-
-  hit_buffer_fd_.reset(filesystem_->OpenForWrite(
-      MakeHitBufferFilename(options_.filename_base).c_str()));
-  if (!hit_buffer_fd_.is_valid()) {
-    status = absl_ports::InternalError("Failed to open hit buffer file");
-    goto error;
-  }
-
-  file_size = filesystem_->GetFileSize(hit_buffer_fd_.get());
-  if (file_size == IcingFilesystem::kBadFileSize) {
-    status = absl_ports::InternalError("Failed to query hit buffer file size");
-    goto error;
-  }
-
-  if (file_size < header_padded_size) {
-    if (file_size != 0) {
-      status = absl_ports::InternalError(IcingStringUtil::StringPrintf(
-          "Hit buffer had unexpected size %" PRIu64, file_size));
-      goto error;
-    }
-
-    ICING_VLOG(2) << "Creating new hit buffer";
-    // Make sure files are fresh.
-    if (!lexicon_.Remove() ||
-        !lexicon_.CreateIfNotExist(options_.lexicon_options) ||
-        !lexicon_.Init()) {
-      status =
-          absl_ports::InternalError("Failed to refresh lexicon during clear");
-      goto error;
-    }
-
-    // Create fresh hit buffer by first emptying the hit buffer file and then
-    // allocating header_padded_size of the cleared space.
-    if (!filesystem_->Truncate(hit_buffer_fd_.get(), 0) ||
-        !filesystem_->Truncate(hit_buffer_fd_.get(), header_padded_size)) {
-      status = absl_ports::InternalError("Failed to truncate hit buffer file");
-      goto error;
-    }
-
-    // Set up header.
-    header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size());
-    header_ = std::make_unique<IcingLiteIndex_HeaderImpl>(
-        reinterpret_cast<IcingLiteIndex_HeaderImpl::HeaderData*>(
-            header_mmap_.address()));
-    header_->Reset();
-
-    if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
-                          sizeof(Element::Value), header_->cur_size(),
-                          options_.hit_buffer_size, &hit_buffer_crc_, true)) {
-      status = absl_ports::InternalError("Failed to initialize new hit buffer");
-      goto error;
-    }
-
-    UpdateChecksum();
-  } else {
-    header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size());
-    header_ = std::make_unique<IcingLiteIndex_HeaderImpl>(
-        reinterpret_cast<IcingLiteIndex_HeaderImpl::HeaderData*>(
-            header_mmap_.address()));
-
-    if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
-                          sizeof(Element::Value), header_->cur_size(),
-                          options_.hit_buffer_size, &hit_buffer_crc_, true)) {
-      status = absl_ports::InternalError(
-          "Failed to re-initialize existing hit buffer");
-      goto error;
-    }
-
-    // Check integrity.
-    if (!header_->check_magic()) {
-      status = absl_ports::InternalError("Lite index header magic mismatch");
-      goto error;
-    }
-    Crc32 crc = ComputeChecksum();
-    if (crc.Get() != header_->lite_index_crc()) {
-      status = absl_ports::DataLossError(
-          IcingStringUtil::StringPrintf("Lite index crc check failed: %u vs %u",
-                                        crc.Get(), header_->lite_index_crc()));
-      goto error;
-    }
-  }
-
-  ICING_VLOG(2) << IcingStringUtil::StringPrintf("Lite index init ok in %.3fms",
-                                                 timer.Elapsed() * 1000);
-  return status;
-
-error:
-  header_ = nullptr;
-  header_mmap_.Unmap();
-  lexicon_.Close();
-  hit_buffer_crc_ = 0;
-  hit_buffer_.Reset();
-  hit_buffer_fd_.reset();
-  if (status.ok()) {
-    return absl_ports::InternalError(
-        "Error handling code ran but status was ok");
-  }
-  return status;
-}
-
-Crc32 LiteIndex::ComputeChecksum() {
-  IcingTimer timer;
-
-  // Update crcs.
-  uint32_t dependent_crcs[2];
-  hit_buffer_.UpdateCrc();
-  dependent_crcs[0] = hit_buffer_crc_;
-  dependent_crcs[1] = lexicon_.UpdateCrc();
-
-  // Compute the master crc.
-
-  // Header crc, excluding the actual crc field.
-  Crc32 all_crc(header_->CalculateHeaderCrc());
-  all_crc.Append(std::string_view(reinterpret_cast<const char*>(dependent_crcs),
-                                  sizeof(dependent_crcs)));
-  ICING_VLOG(2) << IcingStringUtil::StringPrintf(
-      "Lite index crc computed in %.3fms", timer.Elapsed() * 1000);
-
-  return all_crc;
-}
-
-libtextclassifier3::Status LiteIndex::Reset() {
-  IcingTimer timer;
-
-  // TODO(b/140436942): When these components have been changed to return errors
-  // they should be propagated from here.
-  lexicon_.Clear();
-  hit_buffer_.Clear();
-  header_->Reset();
-  UpdateChecksum();
-
-  ICING_VLOG(2) << IcingStringUtil::StringPrintf("Lite index clear in %.3fms",
-                                                 timer.Elapsed() * 1000);
-  return libtextclassifier3::Status::OK;
-}
-
-void LiteIndex::Warm() {
-  hit_buffer_.Warm();
-  lexicon_.Warm();
-}
-
-libtextclassifier3::Status LiteIndex::PersistToDisk() {
-  bool success = true;
-  if (!lexicon_.Sync()) {
-    ICING_VLOG(1) << "Failed to sync the lexicon.";
-    success = false;
-  }
-  hit_buffer_.Sync();
-  UpdateChecksum();
-  header_mmap_.Sync();
-
-  return (success) ? libtextclassifier3::Status::OK
-                   : absl_ports::InternalError(
-                         "Unable to sync lite index components.");
-}
-
-void LiteIndex::UpdateChecksum() {
-  header_->set_lite_index_crc(ComputeChecksum().Get());
-}
-
-libtextclassifier3::StatusOr<uint32_t> LiteIndex::InsertTerm(
-    const std::string& term, TermMatchType::Code term_match_type,
-    NamespaceId namespace_id) {
-  uint32_t tvi;
-  if (!lexicon_.Insert(term.c_str(), "", &tvi, false)) {
-    return absl_ports::ResourceExhaustedError(
-        absl_ports::StrCat("Unable to add term ", term, " to lexicon!"));
-  }
-  ICING_RETURN_IF_ERROR(UpdateTermProperties(
-      tvi, term_match_type == TermMatchType::PREFIX, namespace_id));
-  return tvi;
-}
-
-libtextclassifier3::Status LiteIndex::UpdateTermProperties(
-    uint32_t tvi, bool hasPrefixHits, NamespaceId namespace_id) {
-  if (hasPrefixHits &&
-      !lexicon_.SetProperty(tvi, GetHasHitsInPrefixSectionPropertyId())) {
-    return absl_ports::ResourceExhaustedError(
-        "Insufficient disk space to create prefix property!");
-  }
-
-  if (!lexicon_.SetProperty(tvi, GetNamespacePropertyId(namespace_id))) {
-    return absl_ports::ResourceExhaustedError(
-        "Insufficient disk space to create namespace property!");
-  }
-
-  return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::Status LiteIndex::AddHit(uint32_t term_id, const Hit& hit) {
-  if (is_full()) {
-    return absl_ports::ResourceExhaustedError("Hit buffer is full!");
-  }
-
-  header_->set_last_added_docid(hit.document_id());
-
-  Element elt(term_id, hit);
-  uint32_t cur_size = header_->cur_size();
-  Element::Value* valp = hit_buffer_.GetMutableMem<Element::Value>(cur_size, 1);
-  if (valp == nullptr) {
-    return absl_ports::ResourceExhaustedError(
-        "Allocating more space in hit buffer failed!");
-  }
-  *valp = elt.value();
-  header_->set_cur_size(cur_size + 1);
-
-  return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::StatusOr<uint32_t> LiteIndex::FindTerm(
-    const std::string& term) const {
-  char dummy;
-  uint32_t tvi;
-  if (!lexicon_.Find(term.c_str(), &dummy, &tvi)) {
-    return absl_ports::NotFoundError(
-        absl_ports::StrCat("Could not find ", term, " in the lexicon."));
-  }
-  return tvi;
-}
-
-uint32_t LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
-                               bool only_from_prefix_sections,
-                               std::vector<DocHitInfo>* hits_out) {
-  uint32_t count = 0;
-  DocumentId last_document_id = kInvalidDocumentId;
-  for (uint32_t idx = Seek(term_id); idx < header_->cur_size(); idx++) {
-    Element elt(hit_buffer_.array_cast<Element>()[idx]);
-    if (elt.term_id() != term_id) break;
-
-    const Hit& hit = elt.hit();
-    // Check sections.
-    if (((1u << hit.section_id()) & section_id_mask) == 0) {
-      continue;
-    }
-    // Check prefix section only.
-    if (only_from_prefix_sections && !hit.is_in_prefix_section()) {
-      continue;
-    }
-    DocumentId document_id = hit.document_id();
-    if (document_id != last_document_id) {
-      count++;
-      if (hits_out != nullptr) {
-        hits_out->push_back(DocHitInfo(document_id));
-      }
-      last_document_id = document_id;
-    }
-    if (hits_out != nullptr) {
-      hits_out->back().UpdateSection(hit.section_id(), hit.score());
-    }
-  }
-  return count;
-}
-
-uint32_t LiteIndex::CountHits(uint32_t term_id) {
-  return AppendHits(term_id, kSectionIdMaskAll,
-                    /*only_from_prefix_sections=*/false,
-                    /*hits_out=*/nullptr);
-}
-
-bool LiteIndex::is_full() const {
-  return (header_->cur_size() == options_.hit_buffer_size ||
-          lexicon_.min_free_fraction() < (1.0 - kTrieFullFraction));
-}
-
-void LiteIndex::GetDebugInfo(int verbosity, std::string* out) const {
-  absl_ports::StrAppend(
-      out, IcingStringUtil::StringPrintf("Lite Index\nHit buffer %u/%u\n",
-                                         header_->cur_size(),
-                                         options_.hit_buffer_size));
-
-  // Lexicon.
-  out->append("Lexicon stats:\n");
-  lexicon_.GetDebugInfo(verbosity, out);
-}
-
-libtextclassifier3::StatusOr<int64_t> LiteIndex::GetElementsSize() const {
-  int64_t header_and_hit_buffer_file_size =
-      filesystem_->GetFileSize(hit_buffer_fd_.get());
-
-  if (header_and_hit_buffer_file_size == Filesystem::kBadFileSize) {
-    return absl_ports::InternalError(
-        "Failed to get element size of the LiteIndex's header and hit buffer");
-  }
-
-  int64_t lexicon_disk_usage = lexicon_.GetElementsSize();
-  if (lexicon_disk_usage == IcingFilesystem::kBadFileSize) {
-    return absl_ports::InternalError(
-        "Failed to get element size of LiteIndex's lexicon");
-  }
-
-  // On initialization, we grow the file to a padded size first. So this size
-  // won't count towards the size taken up by elements
-  size_t header_padded_size = IcingMMapper::page_aligned_size(header_size());
-
-  return header_and_hit_buffer_file_size - header_padded_size +
-         lexicon_disk_usage;
-}
-
-uint32_t LiteIndex::Seek(uint32_t term_id) {
-  // Make searchable by sorting by hit buffer.
-  uint32_t sort_len = header_->cur_size() - header_->searchable_end();
-  if (sort_len > 0) {
-    IcingTimer timer;
-
-    auto* array_start =
-        hit_buffer_.GetMutableMem<Element::Value>(0, header_->cur_size());
-    Element::Value* sort_start = array_start + header_->searchable_end();
-    std::sort(sort_start, array_start + header_->cur_size());
-
-    // Now merge with previous region. Since the previous region is already
-    // sorted and deduplicated, optimize the merge by skipping everything less
-    // than the new region's smallest value.
-    if (header_->searchable_end() > 0) {
-      std::inplace_merge(array_start, array_start + header_->searchable_end(),
-                         array_start + header_->cur_size());
-    }
-    ICING_VLOG(2) << IcingStringUtil::StringPrintf(
-        "Lite index sort and merge %u into %u in %.3fms", sort_len,
-        header_->searchable_end(), timer.Elapsed() * 1000);
-
-    // Now the entire array is sorted.
-    header_->set_searchable_end(header_->cur_size());
-
-    // Update crc in-line.
-    UpdateChecksum();
-  }
-
-  // Binary search for our term_id.  Make sure we get the first
-  // element.  Using kBeginSortValue ensures this for the hit value.
-  Element elt(term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kMaxHitScore));
-
-  const Element::Value* array = hit_buffer_.array_cast<Element::Value>();
-  const Element::Value* ptr =
-      std::lower_bound(array, array + header_->cur_size(), elt.value());
-  return ptr - array;
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/index/lite-index.h b/icing/index/lite-index.h
deleted file mode 100644
index b60a947..0000000
--- a/icing/index/lite-index.h
+++ /dev/null
@@ -1,269 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// A small index with continuous updates (doesn't need explicit Flush
-// to persiste) but has more possibility for corruption. It can always
-// detect corruption reliably.
-
-#ifndef ICING_INDEX_LITE_INDEX_H_
-#define ICING_INDEX_LITE_INDEX_H_
-
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/file/filesystem.h"
-#include "icing/index/hit/doc-hit-info.h"
-#include "icing/index/hit/hit.h"
-#include "icing/legacy/index/icing-array-storage.h"
-#include "icing/legacy/index/icing-dynamic-trie.h"
-#include "icing/legacy/index/icing-filesystem.h"
-#include "icing/legacy/index/icing-lite-index-header.h"
-#include "icing/legacy/index/icing-lite-index-options.h"
-#include "icing/legacy/index/icing-mmapper.h"
-#include "icing/proto/term.pb.h"
-#include "icing/schema/section.h"
-#include "icing/store/document-id.h"
-#include "icing/store/namespace-id.h"
-#include "icing/util/bit-util.h"
-#include "icing/util/crc32.h"
-
-namespace icing {
-namespace lib {
-
-class LiteIndex {
- public:
-  // An entry in the hit buffer.
-  class Element {
-   public:
-    // Layout bits: 24 termid + 32 hit value + 8 hit score.
-    using Value = uint64_t;
-
-    static constexpr int kTermIdBits = 24;
-    static constexpr int kHitValueBits = sizeof(Hit::Value) * 8;
-    static constexpr int kHitScoreBits = sizeof(Hit::Score) * 8;
-
-    static const Value kInvalidValue;
-
-    explicit Element(Value v = kInvalidValue) : value_(v) {}
-
-    Element(uint32_t term_id, const Hit& hit) {
-      static_assert(
-          kTermIdBits + kHitValueBits + kHitScoreBits <= sizeof(Value) * 8,
-          "LiteIndexElementTooBig");
-
-      value_ = 0;
-      // Term id goes into the most significant bits because it takes
-      // precedent in sorts.
-      bit_util::BitfieldSet(term_id, kHitValueBits + kHitScoreBits, kTermIdBits,
-                            &value_);
-      bit_util::BitfieldSet(hit.value(), kHitScoreBits, kHitValueBits, &value_);
-      bit_util::BitfieldSet(hit.score(), 0, kHitScoreBits, &value_);
-    }
-
-    uint32_t term_id() const {
-      return bit_util::BitfieldGet(value_, kHitValueBits + kHitScoreBits,
-                                   kTermIdBits);
-    }
-
-    Hit hit() const {
-      return Hit(bit_util::BitfieldGet(value_, kHitScoreBits, kHitValueBits),
-                 bit_util::BitfieldGet(value_, 0, kHitScoreBits));
-    }
-
-    Value value() const { return value_; }
-
-   private:
-    Value value_;
-  };
-
-  using Options = IcingLiteIndexOptions;
-
-  // Updates checksum of subcomponents.
-  ~LiteIndex();
-
-  // Creates lite index from storage. The files will be created if they do not
-  // already exist.
-  //
-  // Returns:
-  //  OK on success
-  //  DATA_LOSS if the index was corrupted and cleared
-  //  INTERNAL on I/O error
-  static libtextclassifier3::StatusOr<std::unique_ptr<LiteIndex>> Create(
-      const Options& options, const IcingFilesystem* filesystem);
-
-  // Resets all internal members of the index. Returns OK if all operations were
-  // successful.
-  libtextclassifier3::Status Reset();
-
-  // Advises the OS to cache pages in the index, which will be accessed for a
-  // query soon.
-  void Warm();
-
-  // Syncs all modified files in the index to disk.
-  //
-  // Returns:
-  //   OK on success
-  //   INTERNAL on I/O error
-  libtextclassifier3::Status PersistToDisk();
-
-  // Calculate the checksum of all sub-components of the LiteIndex
-  Crc32 ComputeChecksum();
-
-  // Returns term_id if term found, NOT_FOUND otherwise.
-  libtextclassifier3::StatusOr<uint32_t> FindTerm(
-      const std::string& term) const;
-
-  // Returns an iterator for all terms for which 'prefix' is a prefix.
-  class PrefixIterator {
-   public:
-    explicit PrefixIterator(const IcingDynamicTrie::Iterator& delegate)
-        : delegate_(delegate) {}
-    bool IsValid() const { return delegate_.IsValid(); }
-
-    void Advance() { delegate_.Advance(); }
-
-    const char* GetKey() const { return delegate_.GetKey(); }
-
-    uint32_t GetValueIndex() const { return delegate_.GetValueIndex(); }
-
-   private:
-    IcingDynamicTrie::Iterator delegate_;
-  };
-
-  PrefixIterator FindTermPrefixes(const std::string& prefix) const {
-    return PrefixIterator(IcingDynamicTrie::Iterator(lexicon_, prefix.c_str()));
-  }
-
-  // Inserts a term with its properties.
-  //
-  // Returns:
-  //   A value index on success
-  //   RESOURCE_EXHAUSTED if lexicon is full or no disk space is available
-  libtextclassifier3::StatusOr<uint32_t> InsertTerm(
-      const std::string& term, TermMatchType::Code term_match_type,
-      NamespaceId namespace_id);
-
-  // Updates term properties by setting hasPrefixHits and namespace id of the
-  // term.
-  //
-  // Returns:
-  //   OK on success
-  //   RESOURCE_EXHAUSTED if no disk space is available
-  libtextclassifier3::Status UpdateTermProperties(uint32_t tvi,
-                                                  bool hasPrefixHits,
-                                                  NamespaceId namespace_id);
-
-  // Append hit to buffer. term_id must be encoded using the same term_id_codec
-  // supplied to the index constructor. Returns non-OK if hit cannot be added
-  // (either due to hit buffer or file system capacity reached).
-  libtextclassifier3::Status AddHit(uint32_t term_id, const Hit& hit);
-
-  // Add all hits with term_id from the sections specified in section_id_mask,
-  // skipping hits in non-prefix sections if only_from_prefix_sections is true,
-  // to hits_out.
-  uint32_t AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
-                      bool only_from_prefix_sections,
-                      std::vector<DocHitInfo>* hits_out);
-
-  // Returns the hit count of the term.
-  uint32_t CountHits(uint32_t term_id);
-
-  // Check if buffer has reached its capacity.
-  bool is_full() const;
-
-  constexpr static uint32_t max_hit_buffer_size() {
-    return std::numeric_limits<uint32_t>::max() / sizeof(LiteIndex::Element);
-  }
-
-  // We keep track of the last added document_id. This is always the largest
-  // document_id that has been added because hits can only be added in order of
-  // increasing document_id.
-  DocumentId last_added_document_id() const {
-    return header_->last_added_docid();
-  }
-
-  const IcingDynamicTrie& lexicon() const { return lexicon_; }
-
-  // Returns debug information for the index in out.
-  // verbosity <= 0, simplest debug information - size of lexicon, hit buffer
-  // verbosity > 0, more detailed debug information from the lexicon.
-  void GetDebugInfo(int verbosity, std::string* out) const;
-
-  // Returns the byte size of all the elements held in the index. This excludes
-  // the size of any internal metadata of the index, e.g. the index's header.
-  //
-  // Returns:
-  //   Byte size on success
-  //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
-
- private:
-  static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions();
-
-  LiteIndex(const Options& options, const IcingFilesystem* filesystem);
-
-  // Initializes lite index from storage. Must be called exactly once after
-  // object construction.
-  //
-  // Returns:
-  //  OK on success
-  //  DATA_LOSS if the index was corrupted and cleared
-  //  INTERNAL on I/O error
-  libtextclassifier3::Status Initialize();
-
-  bool initialized() const { return header_ != nullptr; }
-
-  // Sets the computed checksum in the header
-  void UpdateChecksum();
-
-  // Returns the position of the first element with term_id, or the size of the
-  // hit buffer if term_id is not present.
-  uint32_t Seek(uint32_t term_id);
-
-  // File descriptor that points to where the header and hit buffer are written
-  // to.
-  ScopedFd hit_buffer_fd_;
-
-  // Mmapped region past the header that stores the hits.
-  IcingArrayStorage hit_buffer_;
-
-  // Crc checksum of the hits, excludes the header.
-  uint32_t hit_buffer_crc_;
-
-  // Trie that maps indexed terms to their term id
-  IcingDynamicTrie lexicon_;
-
-  // TODO(b/140437260): Port over to MemoryMappedFile
-  // Memory mapped region of the underlying file that reflects the header.
-  IcingMMapper header_mmap_;
-
-  // Wrapper around the mmapped header that contains stats on the lite index.
-  std::unique_ptr<IcingLiteIndex_Header> header_;
-
-  // Options used to initialize the LiteIndex.
-  const Options options_;
-
-  // TODO(b/139087650) Move to icing::Filesystem
-  const IcingFilesystem* const filesystem_;
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_INDEX_LITE_INDEX_H_
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
new file mode 100644
index 0000000..21eecb6
--- /dev/null
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -0,0 +1,217 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <numeric>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/schema/section.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+std::string SectionIdMaskToString(SectionIdMask section_id_mask) {
+  std::string mask(kTotalNumSections, '0');
+  for (SectionId i = kMaxSectionId; i >= 0; --i) {
+    if (section_id_mask & (UINT64_C(1) << i)) {
+      mask[kMaxSectionId - i] = '1';
+    }
+  }
+  return mask;
+}
+
+}  // namespace
+
+libtextclassifier3::Status DocHitInfoIteratorTermLite::Advance() {
+  if (cached_hits_idx_ == -1) {
+    libtextclassifier3::Status status = RetrieveMoreHits();
+    if (!status.ok()) {
+      if (!absl_ports::IsNotFound(status)) {
+        // NOT_FOUND is expected to happen (not every term will be in the main
+        // index!). Other errors are worth logging.
+        ICING_LOG(ERROR)
+            << "Encountered unexpected failure while retrieving  hits "
+            << status.error_message();
+      }
+      return absl_ports::ResourceExhaustedError(
+          "No more DocHitInfos in iterator");
+    }
+  } else {
+    ++cached_hits_idx_;
+  }
+  if (cached_hits_idx_ == -1 || cached_hits_idx_ >= cached_hits_.size()) {
+    // Nothing more for the iterator to return. Set these members to invalid
+    // values.
+    doc_hit_info_ = DocHitInfo();
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+  ++num_advance_calls_;
+  doc_hit_info_ = cached_hits_.at(cached_hits_idx_);
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorTermLite::TrimRightMostNode() && {
+  // Leaf iterator should trim itself.
+  DocHitInfoIterator::TrimmedNode node = {nullptr, term_, term_start_index_,
+                                          unnormalized_term_length_};
+  return node;
+}
+
+libtextclassifier3::Status DocHitInfoIteratorTermLiteExact::RetrieveMoreHits() {
+  // Exact match only. All hits in lite lexicon are exact.
+  ICING_ASSIGN_OR_RETURN(uint32_t tvi, lite_index_->GetTermId(term_));
+  ICING_ASSIGN_OR_RETURN(uint32_t term_id,
+                         term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  lite_index_->FetchHits(
+      term_id, section_restrict_mask_,
+      /*only_from_prefix_sections=*/false,
+      /*score_by=*/
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE,
+      /*namespace_checker=*/nullptr, &cached_hits_,
+      need_hit_term_frequency_ ? &cached_hit_term_frequency_ : nullptr);
+  cached_hits_idx_ = 0;
+  return libtextclassifier3::Status::OK;
+}
+
+std::string DocHitInfoIteratorTermLiteExact::ToString() const {
+  return absl_ports::StrCat(SectionIdMaskToString(section_restrict_mask_), ":",
+                            term_);
+}
+
+libtextclassifier3::Status
+DocHitInfoIteratorTermLitePrefix::RetrieveMoreHits() {
+  // Take union of lite terms.
+  int term_len = term_.length();
+  int terms_matched = 0;
+  for (LiteIndex::PrefixIterator it = lite_index_->FindTermPrefixes(term_);
+       it.IsValid(); it.Advance()) {
+    bool exact_match = strlen(it.GetKey()) == term_len;
+    ICING_ASSIGN_OR_RETURN(
+        uint32_t term_id,
+        term_id_codec_->EncodeTvi(it.GetValueIndex(), TviType::LITE));
+    lite_index_->FetchHits(
+        term_id, section_restrict_mask_,
+        /*only_from_prefix_sections=*/!exact_match,
+        /*score_by=*/
+        SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE,
+        /*namespace_checker=*/nullptr, &cached_hits_,
+        need_hit_term_frequency_ ? &cached_hit_term_frequency_ : nullptr);
+    ++terms_matched;
+  }
+  if (terms_matched > 1) {
+    SortAndDedupeDocumentIds();
+  }
+  cached_hits_idx_ = 0;
+  return libtextclassifier3::Status::OK;
+}
+
+void DocHitInfoIteratorTermLitePrefix::SortDocumentIds() {
+  // Re-sort cached document_ids and merge sections.
+  if (!need_hit_term_frequency_) {
+    // If we don't need to also sort cached_hit_term_frequency_ along with
+    // cached_hits_, then just simply sort cached_hits_.
+    sort(cached_hits_.begin(), cached_hits_.end());
+  } else {
+    // Sort cached_hit_term_frequency_ along with cached_hits_.
+    std::vector<int> indices(cached_hits_.size());
+    std::iota(indices.begin(), indices.end(), 0);
+    std::sort(indices.begin(), indices.end(), [this](int i, int j) {
+      return cached_hits_[i] < cached_hits_[j];
+    });
+    // Now indices is a map from sorted index to current index. In other words,
+    // the sorted cached_hits_[i] should be the current cached_hits_[indices[i]]
+    // for every valid i.
+    std::vector<bool> done(indices.size());
+    // Apply permutation
+    for (int i = 0; i < indices.size(); ++i) {
+      if (done[i]) {
+        continue;
+      }
+      done[i] = true;
+      int curr = i;
+      int next = indices[i];
+      // Since every finite permutation is formed by disjoint cycles, we can
+      // start with the current element, at index i, and swap the element at
+      // this position with whatever element that *should* be here. Then,
+      // continue to swap the original element, at its updated positions, with
+      // the element that should be occupying that position until the original
+      // element has reached *its* correct position. This completes applying the
+      // single cycle in the permutation.
+      while (next != i) {
+        std::swap(cached_hits_[curr], cached_hits_[next]);
+        std::swap(cached_hit_term_frequency_[curr],
+                  cached_hit_term_frequency_[next]);
+        done[next] = true;
+        curr = next;
+        next = indices[next];
+      }
+    }
+  }
+}
+
+void DocHitInfoIteratorTermLitePrefix::SortAndDedupeDocumentIds() {
+  SortDocumentIds();
+  int idx = 0;
+  for (int i = 1; i < cached_hits_.size(); ++i) {
+    const DocHitInfo& hit_info = cached_hits_[i];
+    DocHitInfo& collapsed_hit_info = cached_hits_[idx];
+    if (collapsed_hit_info.document_id() == hit_info.document_id()) {
+      SectionIdMask curr_mask = hit_info.hit_section_ids_mask();
+      collapsed_hit_info.MergeSectionsFrom(curr_mask);
+      if (need_hit_term_frequency_) {
+        Hit::TermFrequencyArray& collapsed_term_frequency =
+            cached_hit_term_frequency_[idx];
+        while (curr_mask) {
+          SectionId section_id = __builtin_ctzll(curr_mask);
+          collapsed_term_frequency[section_id] =
+              cached_hit_term_frequency_[i][section_id];
+          curr_mask &= ~(UINT64_C(1) << section_id);
+        }
+      }
+    } else {
+      // New document_id.
+      ++idx;
+      cached_hits_[idx] = hit_info;
+      if (need_hit_term_frequency_) {
+        cached_hit_term_frequency_[idx] = cached_hit_term_frequency_[i];
+      }
+    }
+  }
+  // idx points to last doc hit info.
+  cached_hits_.resize(idx + 1);
+  if (need_hit_term_frequency_) {
+    cached_hit_term_frequency_.resize(idx + 1);
+  }
+}
+
+std::string DocHitInfoIteratorTermLitePrefix::ToString() const {
+  return absl_ports::StrCat(SectionIdMaskToString(section_restrict_mask_), ":",
+                            term_, "*");
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h
new file mode 100644
index 0000000..7facd88
--- /dev/null
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h
@@ -0,0 +1,173 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_LITE_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_LITE_H_
+
+#include <cstdint>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/lite/lite-index.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/schema/section.h"
+
+namespace icing {
+namespace lib {
+
+class DocHitInfoIteratorTermLite : public DocHitInfoLeafIterator {
+ public:
+  explicit DocHitInfoIteratorTermLite(const TermIdCodec* term_id_codec,
+                                      LiteIndex* lite_index,
+                                      const std::string& term,
+                                      int term_start_index,
+                                      int unnormalized_term_length,
+                                      SectionIdMask section_restrict_mask,
+                                      bool need_hit_term_frequency)
+      : term_(term),
+        term_start_index_(term_start_index),
+        unnormalized_term_length_(unnormalized_term_length),
+        lite_index_(lite_index),
+        cached_hits_idx_(-1),
+        term_id_codec_(term_id_codec),
+        num_advance_calls_(0),
+        section_restrict_mask_(section_restrict_mask),
+        need_hit_term_frequency_(need_hit_term_frequency) {}
+
+  libtextclassifier3::Status Advance() override;
+
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
+  CallStats GetCallStats() const override {
+    return CallStats(
+        /*num_leaf_advance_calls_lite_index_in=*/num_advance_calls_,
+        /*num_leaf_advance_calls_main_index_in=*/0,
+        /*num_leaf_advance_calls_integer_index_in=*/0,
+        /*num_leaf_advance_calls_no_index_in=*/0,
+        /*num_blocks_inspected_in=*/0);
+  }
+
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    if (cached_hits_idx_ == -1 || cached_hits_idx_ >= cached_hits_.size()) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    SectionIdMask section_mask =
+        doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+    SectionIdMask section_mask_copy = section_mask;
+    std::array<Hit::TermFrequency, kTotalNumSections> section_term_frequencies =
+        {Hit::kNoTermFrequency};
+    while (section_mask_copy) {
+      SectionId section_id = __builtin_ctzll(section_mask_copy);
+      if (need_hit_term_frequency_) {
+        section_term_frequencies.at(section_id) =
+            cached_hit_term_frequency_.at(cached_hits_idx_)[section_id];
+      }
+      section_mask_copy &= ~(UINT64_C(1) << section_id);
+    }
+    TermMatchInfo term_stats(term_, section_mask,
+                             std::move(section_term_frequencies));
+
+    for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) {
+      if (cur_term_stats.term == term_stats.term) {
+        // Same docId and same term, we don't need to add the term and the term
+        // frequency should always be the same
+        return;
+      }
+    }
+    matched_terms_stats->push_back(std::move(term_stats));
+  }
+
+ protected:
+  // Add DocHitInfos corresponding to term_ to cached_hits_.
+  //
+  // Returns:
+  //   - OK, on success
+  //   - NOT_FOUND if no term matching term_ was found in the lexicon.
+  //   - INVALID_ARGUMENT if unable to properly encode the termid
+  virtual libtextclassifier3::Status RetrieveMoreHits() = 0;
+
+  const std::string term_;
+  // The start index of the given term in the search query
+  int term_start_index_;
+  // The length of the given unnormalized term in the search query
+  int unnormalized_term_length_;
+  LiteIndex* const lite_index_;
+  // Stores hits retrieved from the index. This may only be a subset of the hits
+  // that are present in the index. Current value pointed to by the Iterator is
+  // tracked by cached_hits_idx_.
+  std::vector<DocHitInfo> cached_hits_;
+  std::vector<Hit::TermFrequencyArray> cached_hit_term_frequency_;
+  int cached_hits_idx_;
+  const TermIdCodec* term_id_codec_;
+  int num_advance_calls_;
+  // Mask indicating which sections hits should be considered for.
+  // Ex. 0000 0000 0000 0010 means that only hits from section 1 are desired.
+  const SectionIdMask section_restrict_mask_;
+  const bool need_hit_term_frequency_;
+};
+
+class DocHitInfoIteratorTermLiteExact : public DocHitInfoIteratorTermLite {
+ public:
+  explicit DocHitInfoIteratorTermLiteExact(const TermIdCodec* term_id_codec,
+                                           LiteIndex* lite_index,
+                                           const std::string& term,
+                                           int term_start_index,
+                                           int unnormalized_term_length,
+                                           SectionIdMask section_id_mask,
+                                           bool need_hit_term_frequency)
+      : DocHitInfoIteratorTermLite(term_id_codec, lite_index, term,
+                                   term_start_index, unnormalized_term_length,
+                                   section_id_mask, need_hit_term_frequency) {}
+
+  std::string ToString() const override;
+
+ protected:
+  libtextclassifier3::Status RetrieveMoreHits() override;
+};
+
+class DocHitInfoIteratorTermLitePrefix : public DocHitInfoIteratorTermLite {
+ public:
+  explicit DocHitInfoIteratorTermLitePrefix(const TermIdCodec* term_id_codec,
+                                            LiteIndex* lite_index,
+                                            const std::string& term,
+                                            int term_start_index,
+                                            int unnormalized_term_length,
+                                            SectionIdMask section_id_mask,
+                                            bool need_hit_term_frequency)
+      : DocHitInfoIteratorTermLite(term_id_codec, lite_index, term,
+                                   term_start_index, unnormalized_term_length,
+                                   section_id_mask, need_hit_term_frequency) {}
+
+  std::string ToString() const override;
+
+ protected:
+  libtextclassifier3::Status RetrieveMoreHits() override;
+
+ private:
+  // After retrieving DocHitInfos from the index, a DocHitInfo for docid 1 and
+  // "foo" and a DocHitInfo for docid 1 and "fool". These DocHitInfos should be
+  // merged.
+  void SortDocumentIds();
+  void SortAndDedupeDocumentIds();
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_LITE_H_
diff --git a/icing/legacy/index/icing-lite-index-header.h b/icing/index/lite/lite-index-header.h
index ac2d3c0..75de8fa 100644
--- a/icing/legacy/index/icing-lite-index-header.h
+++ b/icing/index/lite/lite-index-header.h
@@ -15,16 +15,19 @@
 #ifndef ICING_LEGACY_INDEX_ICING_LITE_INDEX_HEADER_H_
 #define ICING_LEGACY_INDEX_ICING_LITE_INDEX_HEADER_H_
 
+#include <cstddef>
+#include <cstdint>
+
 #include "icing/legacy/core/icing-string-util.h"
-#include "icing/legacy/index/icing-common-types.h"
+#include "icing/store/document-id.h"
 
 namespace icing {
 namespace lib {
 
 // A wrapper around the actual mmapped header data.
-class IcingLiteIndex_Header {
+class LiteIndex_Header {
  public:
-  virtual ~IcingLiteIndex_Header() = default;
+  virtual ~LiteIndex_Header() = default;
 
   // Returns true if the magic of the header matches the hard-coded magic
   // value associated with this header format.
@@ -47,10 +50,17 @@ class IcingLiteIndex_Header {
   virtual void Reset() = 0;
 };
 
-class IcingLiteIndex_HeaderImpl : public IcingLiteIndex_Header {
+class LiteIndex_HeaderImpl : public LiteIndex_Header {
  public:
   struct HeaderData {
-    static const uint32_t kMagic = 0x6dfba6a0;
+    static uint32_t GetCurrentMagic(
+        bool include_property_existence_metadata_hits) {
+      if (!include_property_existence_metadata_hits) {
+        return 0x01c61418;
+      } else {
+        return 0x56e07d5b;
+      }
+    }
 
     uint32_t lite_index_crc;
     uint32_t magic;
@@ -66,10 +76,15 @@ class IcingLiteIndex_HeaderImpl : public IcingLiteIndex_Header {
     uint32_t searchable_end;
   };
 
-  explicit IcingLiteIndex_HeaderImpl(HeaderData *hdr) : hdr_(hdr) {}
+  explicit LiteIndex_HeaderImpl(HeaderData *hdr,
+                                bool include_property_existence_metadata_hits)
+      : hdr_(hdr),
+        include_property_existence_metadata_hits_(
+            include_property_existence_metadata_hits) {}
 
   bool check_magic() const override {
-    return hdr_->magic == HeaderData::kMagic;
+    return hdr_->magic == HeaderData::GetCurrentMagic(
+                              include_property_existence_metadata_hits_);
   }
 
   uint32_t lite_index_crc() const override { return hdr_->lite_index_crc; }
@@ -96,16 +111,18 @@ class IcingLiteIndex_HeaderImpl : public IcingLiteIndex_Header {
 
   void Reset() override {
     hdr_->lite_index_crc = 0;
-    hdr_->magic = HeaderData::kMagic;
-    hdr_->last_added_docid = kIcingInvalidDocId;
+    hdr_->magic =
+        HeaderData::GetCurrentMagic(include_property_existence_metadata_hits_);
+    hdr_->last_added_docid = kInvalidDocumentId;
     hdr_->cur_size = 0;
     hdr_->searchable_end = 0;
   }
 
  private:
   HeaderData *hdr_;
+  bool include_property_existence_metadata_hits_;
 };
-static_assert(24 == sizeof(IcingLiteIndex_HeaderImpl::HeaderData),
+static_assert(24 == sizeof(LiteIndex_HeaderImpl::HeaderData),
               "sizeof(HeaderData) != 24");
 
 }  // namespace lib
diff --git a/icing/legacy/index/icing-lite-index-options.cc b/icing/index/lite/lite-index-options.cc
index 4bf0d38..7e6c076 100644
--- a/icing/legacy/index/icing-lite-index-options.cc
+++ b/icing/index/lite/lite-index-options.cc
@@ -12,13 +12,31 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "icing/legacy/index/icing-lite-index-options.h"
+#include "icing/index/lite/lite-index-options.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+
+#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
 
 namespace icing {
 namespace lib {
 
 namespace {
 
+constexpr int kIcingMaxVariantsPerToken = 10;  // Maximum number of variants
+
+constexpr size_t kIcingMaxSearchableDocumentSize = (1u << 16) - 1;  // 64K
+// Max num tokens per document. 64KB is our original maximum (searchable)
+// document size. We clip if document exceeds this.
+constexpr uint32_t kIcingMaxNumTokensPerDoc =
+    kIcingMaxSearchableDocumentSize / 5;
+constexpr uint32_t kIcingMaxNumHitsPerDocument =
+    kIcingMaxNumTokensPerDoc * kIcingMaxVariantsPerToken;
+
 uint32_t CalculateHitBufferSize(uint32_t hit_buffer_want_merge_bytes) {
   constexpr uint32_t kHitBufferSlopMult = 2;
 
@@ -27,7 +45,7 @@ uint32_t CalculateHitBufferSize(uint32_t hit_buffer_want_merge_bytes) {
   // TODO(b/111690435) Move LiteIndex::Element to a separate file so that this
   // can use sizeof(LiteIndex::Element)
   uint32_t hit_capacity_elts_with_slop =
-      hit_buffer_want_merge_bytes / sizeof(uint64_t);
+      hit_buffer_want_merge_bytes / sizeof(TermIdHitPair);
   // Add some slop for index variants on top of max num tokens.
   hit_capacity_elts_with_slop += kIcingMaxNumHitsPerDocument;
   hit_capacity_elts_with_slop *= kHitBufferSlopMult;
@@ -51,10 +69,16 @@ IcingDynamicTrie::Options CalculateTrieOptions(uint32_t hit_buffer_size) {
 
 }  // namespace
 
-IcingLiteIndexOptions::IcingLiteIndexOptions(
-    const std::string& filename_base, uint32_t hit_buffer_want_merge_bytes)
+LiteIndexOptions::LiteIndexOptions(
+    const std::string& filename_base, uint32_t hit_buffer_want_merge_bytes,
+    bool hit_buffer_sort_at_indexing, uint32_t hit_buffer_sort_threshold_bytes,
+    bool include_property_existence_metadata_hits)
     : filename_base(filename_base),
-      hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes) {
+      hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes),
+      hit_buffer_sort_at_indexing(hit_buffer_sort_at_indexing),
+      hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes),
+      include_property_existence_metadata_hits(
+          include_property_existence_metadata_hits) {
   hit_buffer_size = CalculateHitBufferSize(hit_buffer_want_merge_bytes);
   lexicon_options = CalculateTrieOptions(hit_buffer_size);
   display_mappings_options = CalculateTrieOptions(hit_buffer_size);
diff --git a/icing/legacy/index/icing-lite-index-options.h b/icing/index/lite/lite-index-options.h
index 2922621..8b03449 100644
--- a/icing/legacy/index/icing-lite-index-options.h
+++ b/icing/index/lite/lite-index-options.h
@@ -15,20 +15,25 @@
 #ifndef ICING_LEGACY_INDEX_ICING_LITE_INDEX_OPTIONS_H_
 #define ICING_LEGACY_INDEX_ICING_LITE_INDEX_OPTIONS_H_
 
-#include "icing/legacy/index/icing-common-types.h"
+#include <cstdint>
+#include <string>
+
 #include "icing/legacy/index/icing-dynamic-trie.h"
 
 namespace icing {
 namespace lib {
 
-struct IcingLiteIndexOptions {
-  IcingLiteIndexOptions() = default;
-  // Creates IcingLiteIndexOptions based off of the specified parameters. All
+struct LiteIndexOptions {
+  LiteIndexOptions() = default;
+  // Creates LiteIndexOptions based off of the specified parameters. All
   // other fields are calculated based on the value of
   // hit_buffer_want_merge_bytes and the logic in CalculateHitBufferSize and
   // CalculateTrieOptions.
-  IcingLiteIndexOptions(const std::string& filename_base,
-                        uint32_t hit_buffer_want_merge_bytes);
+  LiteIndexOptions(const std::string& filename_base,
+                   uint32_t hit_buffer_want_merge_bytes,
+                   bool hit_buffer_sort_at_indexing,
+                   uint32_t hit_buffer_sort_threshold_bytes,
+                   bool include_property_existence_metadata_hits = false);
 
   IcingDynamicTrie::Options lexicon_options;
   IcingDynamicTrie::Options display_mappings_options;
@@ -36,6 +41,9 @@ struct IcingLiteIndexOptions {
   std::string filename_base;
   uint32_t hit_buffer_want_merge_bytes = 0;
   uint32_t hit_buffer_size = 0;
+  bool hit_buffer_sort_at_indexing = false;
+  uint32_t hit_buffer_sort_threshold_bytes = 0;
+  bool include_property_existence_metadata_hits = false;
 };
 
 }  // namespace lib
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
new file mode 100644
index 0000000..3f9cc93
--- /dev/null
+++ b/icing/index/lite/lite-index.cc
@@ -0,0 +1,716 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/lite/lite-index.h"
+
+#include <sys/mman.h>
+
+#include <algorithm>
+#include <cinttypes>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/mutex.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/lite/lite-index-header.h"
+#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/index/term-property-id.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/core/icing-timer.h"
+#include "icing/legacy/index/icing-array-storage.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-mmapper.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/suggestion-result-checker.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Point at which we declare the trie full.
+constexpr double kTrieFullFraction = 0.95;
+
+std::string MakeHitBufferFilename(const std::string& filename_base) {
+  return filename_base + "hb";
+}
+
+size_t header_size() { return sizeof(LiteIndex_HeaderImpl::HeaderData); }
+
+}  // namespace
+
+const TermIdHitPair::Value TermIdHitPair::kInvalidValue =
+    TermIdHitPair(0, Hit()).value();
+
+libtextclassifier3::StatusOr<std::unique_ptr<LiteIndex>> LiteIndex::Create(
+    const LiteIndex::Options& options, const IcingFilesystem* filesystem) {
+  ICING_RETURN_ERROR_IF_NULL(filesystem);
+
+  std::unique_ptr<LiteIndex> lite_index =
+      std::unique_ptr<LiteIndex>(new LiteIndex(options, filesystem));
+  ICING_RETURN_IF_ERROR(lite_index->Initialize());
+  return std::move(lite_index);
+}
+
+// size is max size in elements. An appropriate lexicon and display
+// mapping size will be chosen based on hit buffer size.
+LiteIndex::LiteIndex(const LiteIndex::Options& options,
+                     const IcingFilesystem* filesystem)
+    : hit_buffer_(*filesystem),
+      hit_buffer_crc_(0),
+      lexicon_(options.filename_base + "lexicon", MakeTrieRuntimeOptions(),
+               filesystem),
+      header_mmap_(false, MAP_SHARED),
+      options_(options),
+      filesystem_(filesystem) {}
+
+LiteIndex::~LiteIndex() {
+  if (initialized()) {
+    libtextclassifier3::Status unused = PersistToDisk();
+  }
+}
+
+IcingDynamicTrie::RuntimeOptions LiteIndex::MakeTrieRuntimeOptions() {
+  return IcingDynamicTrie::RuntimeOptions().set_storage_policy(
+      IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc);
+}
+
+libtextclassifier3::Status LiteIndex::Initialize() {
+  // Size of hit buffer's header struct, rounded up to the nearest number of
+  // system memory pages.
+  const size_t header_padded_size =
+      IcingMMapper::page_aligned_size(header_size());
+
+  // Variable declarations cannot cross goto jumps, so declare these up top.
+  libtextclassifier3::Status status;
+  uint64_t file_size;
+  IcingTimer timer;
+
+  absl_ports::unique_lock l(&mutex_);
+  if (!lexicon_.CreateIfNotExist(options_.lexicon_options) ||
+      !lexicon_.Init()) {
+    return absl_ports::InternalError("Failed to initialize lexicon trie");
+  }
+
+  hit_buffer_fd_.reset(filesystem_->OpenForWrite(
+      MakeHitBufferFilename(options_.filename_base).c_str()));
+  if (!hit_buffer_fd_.is_valid()) {
+    status = absl_ports::InternalError("Failed to open hit buffer file");
+    goto error;
+  }
+
+  file_size = filesystem_->GetFileSize(hit_buffer_fd_.get());
+  if (file_size == IcingFilesystem::kBadFileSize) {
+    status = absl_ports::InternalError("Failed to query hit buffer file size");
+    goto error;
+  }
+
+  if (file_size < header_padded_size) {
+    if (file_size != 0) {
+      status = absl_ports::InternalError(IcingStringUtil::StringPrintf(
+          "Hit buffer had unexpected size %" PRIu64, file_size));
+      goto error;
+    }
+
+    ICING_VLOG(2) << "Creating new hit buffer";
+    // Make sure files are fresh.
+    if (!lexicon_.Remove() ||
+        !lexicon_.CreateIfNotExist(options_.lexicon_options) ||
+        !lexicon_.Init()) {
+      status =
+          absl_ports::InternalError("Failed to refresh lexicon during clear");
+      goto error;
+    }
+
+    // Create fresh hit buffer by first emptying the hit buffer file and then
+    // allocating header_padded_size of the cleared space.
+    if (!filesystem_->Truncate(hit_buffer_fd_.get(), 0) ||
+        !filesystem_->Truncate(hit_buffer_fd_.get(), header_padded_size)) {
+      status = absl_ports::InternalError("Failed to truncate hit buffer file");
+      goto error;
+    }
+
+    // Set up header.
+    header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
+    header_ = std::make_unique<LiteIndex_HeaderImpl>(
+        reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
+            header_mmap_.address()),
+        options_.include_property_existence_metadata_hits);
+    header_->Reset();
+
+    if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
+                          sizeof(TermIdHitPair::Value), header_->cur_size(),
+                          options_.hit_buffer_size, &hit_buffer_crc_, true)) {
+      status = absl_ports::InternalError("Failed to initialize new hit buffer");
+      goto error;
+    }
+
+    UpdateChecksum();
+  } else {
+    header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
+    header_ = std::make_unique<LiteIndex_HeaderImpl>(
+        reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
+            header_mmap_.address()),
+        options_.include_property_existence_metadata_hits);
+
+    if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
+                          sizeof(TermIdHitPair::Value), header_->cur_size(),
+                          options_.hit_buffer_size, &hit_buffer_crc_, true)) {
+      status = absl_ports::InternalError(
+          "Failed to re-initialize existing hit buffer");
+      goto error;
+    }
+
+    // Check integrity.
+    if (!header_->check_magic()) {
+      status = absl_ports::InternalError("Lite index header magic mismatch");
+      goto error;
+    }
+    Crc32 crc = ComputeChecksum();
+    if (crc.Get() != header_->lite_index_crc()) {
+      status = absl_ports::DataLossError(
+          IcingStringUtil::StringPrintf("Lite index crc check failed: %u vs %u",
+                                        crc.Get(), header_->lite_index_crc()));
+      goto error;
+    }
+  }
+
+  ICING_VLOG(2) << "Lite index init ok in " << timer.Elapsed() * 1000 << "ms";
+  return status;
+
+error:
+  header_ = nullptr;
+  header_mmap_.Unmap();
+  lexicon_.Close();
+  hit_buffer_crc_ = 0;
+  hit_buffer_.Reset();
+  hit_buffer_fd_.reset();
+  if (status.ok()) {
+    return absl_ports::InternalError(
+        "Error handling code ran but status was ok");
+  }
+  return status;
+}
+
+Crc32 LiteIndex::ComputeChecksum() {
+  IcingTimer timer;
+
+  // Update crcs.
+  uint32_t dependent_crcs[2];
+  hit_buffer_.UpdateCrc();
+  dependent_crcs[0] = hit_buffer_crc_;
+  dependent_crcs[1] = lexicon_.UpdateCrc();
+
+  // Compute the master crc.
+
+  // Header crc, excluding the actual crc field.
+  Crc32 all_crc(header_->CalculateHeaderCrc());
+  all_crc.Append(std::string_view(reinterpret_cast<const char*>(dependent_crcs),
+                                  sizeof(dependent_crcs)));
+  ICING_VLOG(2) << "Lite index crc computed in " << timer.Elapsed() * 1000
+                << "ms";
+
+  return all_crc;
+}
+
+libtextclassifier3::Status LiteIndex::Reset() {
+  IcingTimer timer;
+
+  absl_ports::unique_lock l(&mutex_);
+  // TODO(b/140436942): When these components have been changed to return errors
+  // they should be propagated from here.
+  lexicon_.Clear();
+  hit_buffer_.Clear();
+  header_->Reset();
+  UpdateChecksum();
+
+  ICING_VLOG(2) << "Lite index clear in " << timer.Elapsed() * 1000 << "ms";
+  return libtextclassifier3::Status::OK;
+}
+
+void LiteIndex::Warm() {
+  absl_ports::shared_lock l(&mutex_);
+  hit_buffer_.Warm();
+  lexicon_.Warm();
+}
+
+libtextclassifier3::Status LiteIndex::PersistToDisk() {
+  absl_ports::unique_lock l(&mutex_);
+  bool success = true;
+  if (!lexicon_.Sync()) {
+    ICING_VLOG(1) << "Failed to sync the lexicon.";
+    success = false;
+  }
+  hit_buffer_.Sync();
+  UpdateChecksum();
+  header_mmap_.Sync();
+
+  return (success) ? libtextclassifier3::Status::OK
+                   : absl_ports::InternalError(
+                         "Unable to sync lite index components.");
+}
+
+void LiteIndex::UpdateChecksum() {
+  header_->set_lite_index_crc(ComputeChecksum().Get());
+}
+
+libtextclassifier3::StatusOr<uint32_t> LiteIndex::InsertTerm(
+    const std::string& term, TermMatchType::Code term_match_type,
+    NamespaceId namespace_id) {
+  absl_ports::unique_lock l(&mutex_);
+  uint32_t tvi;
+  libtextclassifier3::Status status =
+      lexicon_.Insert(term.c_str(), "", &tvi, false);
+  if (!status.ok()) {
+    ICING_LOG(DBG) << "Unable to add term " << term << " to lexicon!\n"
+                   << status.error_message();
+    return status;
+  }
+  ICING_RETURN_IF_ERROR(UpdateTermPropertiesImpl(
+      tvi, term_match_type == TermMatchType::PREFIX, namespace_id));
+  return tvi;
+}
+
+libtextclassifier3::Status LiteIndex::UpdateTermProperties(
+    uint32_t tvi, bool hasPrefixHits, NamespaceId namespace_id) {
+  absl_ports::unique_lock l(&mutex_);
+  return UpdateTermPropertiesImpl(tvi, hasPrefixHits, namespace_id);
+}
+
+libtextclassifier3::Status LiteIndex::UpdateTermPropertiesImpl(
+    uint32_t tvi, bool hasPrefixHits, NamespaceId namespace_id) {
+  if (hasPrefixHits &&
+      !lexicon_.SetProperty(tvi, GetHasHitsInPrefixSectionPropertyId())) {
+    return absl_ports::ResourceExhaustedError(
+        "Insufficient disk space to create prefix property!");
+  }
+
+  if (!lexicon_.SetProperty(tvi, GetNamespacePropertyId(namespace_id))) {
+    return absl_ports::ResourceExhaustedError(
+        "Insufficient disk space to create namespace property!");
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status LiteIndex::AddHit(uint32_t term_id, const Hit& hit) {
+  absl_ports::unique_lock l(&mutex_);
+  if (is_full()) {
+    return absl_ports::ResourceExhaustedError("Hit buffer is full!");
+  }
+
+  TermIdHitPair term_id_hit_pair(term_id, hit);
+  uint32_t cur_size = header_->cur_size();
+  TermIdHitPair::Value* valp =
+      hit_buffer_.GetMutableMem<TermIdHitPair::Value>(cur_size, 1);
+  if (valp == nullptr) {
+    return absl_ports::ResourceExhaustedError(
+        "Allocating more space in hit buffer failed!");
+  }
+  *valp = term_id_hit_pair.value();
+  header_->set_cur_size(cur_size + 1);
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<uint32_t> LiteIndex::GetTermId(
+    const std::string& term) const {
+  absl_ports::shared_lock l(&mutex_);
+  char dummy;
+  uint32_t tvi;
+  if (!lexicon_.Find(term.c_str(), &dummy, &tvi)) {
+    return absl_ports::NotFoundError(
+        absl_ports::StrCat("Could not find ", term, " in the lexicon."));
+  }
+  return tvi;
+}
+
+void LiteIndex::ScoreAndAppendFetchedHit(
+    const Hit& hit, SectionIdMask section_id_mask,
+    bool only_from_prefix_sections,
+    SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+    const SuggestionResultChecker* suggestion_result_checker,
+    DocumentId& last_document_id, bool& is_last_document_desired,
+    int& total_score_out, std::vector<DocHitInfo>* hits_out,
+    std::vector<Hit::TermFrequencyArray>* term_frequency_out) const {
+  // Check sections.
+  if (((UINT64_C(1) << hit.section_id()) & section_id_mask) == 0) {
+    return;
+  }
+  // Check prefix section only.
+  if (only_from_prefix_sections && !hit.is_in_prefix_section()) {
+    return;
+  }
+  // Check whether this Hit is desired.
+  // TODO(b/230553264) Move common logic into helper function once we support
+  // score term by prefix_hit in lite_index.
+  DocumentId document_id = hit.document_id();
+  bool is_new_document = document_id != last_document_id;
+  if (is_new_document) {
+    last_document_id = document_id;
+    is_last_document_desired =
+        suggestion_result_checker == nullptr ||
+        suggestion_result_checker->BelongsToTargetResults(document_id,
+                                                          hit.section_id());
+  }
+  if (!is_last_document_desired) {
+    // The document is removed or expired or not desired.
+    return;
+  }
+
+  // Score the hit by the strategy
+  switch (score_by) {
+    case SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE:
+      total_score_out = 1;
+      break;
+    case SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT:
+      if (is_new_document) {
+        ++total_score_out;
+      }
+      break;
+    case SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY:
+      if (hit.has_term_frequency()) {
+        total_score_out += hit.term_frequency();
+      } else {
+        ++total_score_out;
+      }
+      break;
+  }
+
+  // Append the Hit or update hit section to the output vector.
+  if (is_new_document && hits_out != nullptr) {
+    hits_out->push_back(DocHitInfo(document_id));
+    if (term_frequency_out != nullptr) {
+      term_frequency_out->push_back(Hit::TermFrequencyArray());
+    }
+  }
+  if (hits_out != nullptr) {
+    hits_out->back().UpdateSection(hit.section_id());
+    if (term_frequency_out != nullptr) {
+      term_frequency_out->back()[hit.section_id()] = hit.term_frequency();
+    }
+  }
+}
+
+int LiteIndex::FetchHits(
+    uint32_t term_id, SectionIdMask section_id_mask,
+    bool only_from_prefix_sections,
+    SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+    const SuggestionResultChecker* suggestion_result_checker,
+    std::vector<DocHitInfo>* hits_out,
+    std::vector<Hit::TermFrequencyArray>* term_frequency_out) {
+  bool need_sort_at_querying = false;
+  {
+    absl_ports::shared_lock l(&mutex_);
+
+    // We sort here when:
+    // 1. We don't enable sorting at indexing time (i.e. we sort at querying
+    //    time), and there is an unsorted tail portion. OR
+    // 2. The unsorted tail size exceeds the hit_buffer_sort_threshold,
+    //    regardless of whether or not hit_buffer_sort_at_indexing is enabled.
+    //    This is more of a sanity check. We should not really be encountering
+    //    this case.
+    need_sort_at_querying = NeedSortAtQuerying();
+  }
+  if (need_sort_at_querying) {
+    absl_ports::unique_lock l(&mutex_);
+    IcingTimer timer;
+
+    // Transition from shared_lock to unique_lock is safe here because it
+    // doesn't hurt to sort again if sorting was done already by another thread
+    // after need_sort_at_querying is evaluated.
+    // We check need_sort_at_querying to improve query concurrency as threads
+    // can avoid acquiring the unique lock if no sorting is needed.
+    SortHitsImpl();
+
+    if (options_.hit_buffer_sort_at_indexing) {
+      // This is the second case for sort. Log as this should be a very rare
+      // occasion.
+      ICING_LOG(WARNING) << "Sorting HitBuffer at querying time when "
+                            "hit_buffer_sort_at_indexing is enabled. Sort and "
+                            "merge HitBuffer in "
+                         << timer.Elapsed() * 1000 << " ms.";
+    }
+  }
+
+  // This downgrade from an unique_lock to a shared_lock is safe because we're
+  // searching for the term in the searchable (sorted) section of the HitBuffer
+  // only in Seek().
+  // Any operations that might execute in between the transition of downgrading
+  // the lock here are guaranteed not to alter the searchable section (or the
+  // LiteIndex) due to a global lock in IcingSearchEngine.
+  absl_ports::shared_lock l(&mutex_);
+
+  // Search in the HitBuffer array for Hits with the corresponding term_id.
+  // Hits are added in increasing order of doc ids, so hits that get appended
+  // later have larger docIds. This means that:
+  // 1. Hits in the unsorted tail will have larger docIds than hits in the
+  //    sorted portion.
+  // 2. Hits at the end of the unsorted tail will have larger docIds than hits
+  //    in the front of the tail.
+  // We want to retrieve hits in descending order of docIds. Therefore we should
+  // search by doing:
+  // 1. Linear search first in reverse iteration order over the unsorted tail
+  //    portion.
+  // 2. Followed by binary search on the sorted portion.
+  const TermIdHitPair* array = hit_buffer_.array_cast<TermIdHitPair>();
+
+  DocumentId last_document_id = kInvalidDocumentId;
+  // Record whether the last document belongs to the given namespaces.
+  bool is_last_document_desired = false;
+  int total_score = 0;
+
+  // Linear search over unsorted tail in reverse iteration order.
+  // This should only be performed when hit_buffer_sort_at_indexing is enabled.
+  // When disabled, the entire HitBuffer should be sorted already and only
+  // binary search is needed.
+  if (options_.hit_buffer_sort_at_indexing) {
+    uint32_t unsorted_length = header_->cur_size() - header_->searchable_end();
+    for (uint32_t i = 1; i <= unsorted_length; ++i) {
+      TermIdHitPair term_id_hit_pair = array[header_->cur_size() - i];
+      if (term_id_hit_pair.term_id() == term_id) {
+        // We've found a matched hit.
+        const Hit& matched_hit = term_id_hit_pair.hit();
+        // Score the hit and add to total_score. Also add the hits and its term
+        // frequency info to hits_out and term_frequency_out if the two vectors
+        // are non-null.
+        ScoreAndAppendFetchedHit(matched_hit, section_id_mask,
+                                 only_from_prefix_sections, score_by,
+                                 suggestion_result_checker, last_document_id,
+                                 is_last_document_desired, total_score,
+                                 hits_out, term_frequency_out);
+      }
+    }
+  }
+
+  // Do binary search over the sorted section and repeat the above steps.
+  TermIdHitPair target_term_id_hit_pair(
+      term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultTermFrequency));
+  for (const TermIdHitPair* ptr = std::lower_bound(
+           array, array + header_->searchable_end(), target_term_id_hit_pair);
+       ptr < array + header_->searchable_end(); ++ptr) {
+    if (ptr->term_id() != term_id) {
+      // We've processed all matches. Stop iterating further.
+      break;
+    }
+
+    const Hit& matched_hit = ptr->hit();
+    // Score the hit and add to total_score. Also add the hits and its term
+    // frequency info to hits_out and term_frequency_out if the two vectors are
+    // non-null.
+    ScoreAndAppendFetchedHit(
+        matched_hit, section_id_mask, only_from_prefix_sections, score_by,
+        suggestion_result_checker, last_document_id, is_last_document_desired,
+        total_score, hits_out, term_frequency_out);
+  }
+  return total_score;
+}
+
+libtextclassifier3::StatusOr<int> LiteIndex::ScoreHits(
+    uint32_t term_id,
+    SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+    const SuggestionResultChecker* suggestion_result_checker) {
+  return FetchHits(term_id, kSectionIdMaskAll,
+                   /*only_from_prefix_sections=*/false, score_by,
+                   suggestion_result_checker,
+                   /*hits_out=*/nullptr);
+}
+
+bool LiteIndex::is_full() const {
+  return (header_->cur_size() == options_.hit_buffer_size ||
+          lexicon_.min_free_fraction() < (1.0 - kTrieFullFraction));
+}
+
+std::string LiteIndex::GetDebugInfo(DebugInfoVerbosity::Code verbosity) {
+  absl_ports::unique_lock l(&mutex_);
+  std::string res;
+  std::string lexicon_info;
+  lexicon_.GetDebugInfo(verbosity, &lexicon_info);
+  IcingStringUtil::SStringAppendF(
+      &res, 0,
+      "curr_size: %u\n"
+      "hit_buffer_size: %u\n"
+      "last_added_document_id %u\n"
+      "searchable_end: %u\n"
+      "index_crc: %u\n"
+      "\n"
+      "lite_lexicon_info:\n%s\n",
+      header_->cur_size(), options_.hit_buffer_size,
+      header_->last_added_docid(), header_->searchable_end(),
+      ComputeChecksum().Get(), lexicon_info.c_str());
+  return res;
+}
+
+libtextclassifier3::StatusOr<int64_t> LiteIndex::GetElementsSize() const {
+  IndexStorageInfoProto storage_info = GetStorageInfo(IndexStorageInfoProto());
+  if (storage_info.lite_index_hit_buffer_size() == -1 ||
+      storage_info.lite_index_lexicon_size() == -1) {
+    return absl_ports::AbortedError(
+        "Failed to get size of LiteIndex's members.");
+  }
+  // On initialization, we grow the file to a padded size first. So this size
+  // won't count towards the size taken up by elements
+  size_t header_padded_size = IcingMMapper::page_aligned_size(header_size());
+  return storage_info.lite_index_hit_buffer_size() - header_padded_size +
+         storage_info.lite_index_lexicon_size();
+}
+
+IndexStorageInfoProto LiteIndex::GetStorageInfo(
+    IndexStorageInfoProto storage_info) const {
+  absl_ports::shared_lock l(&mutex_);
+  int64_t header_and_hit_buffer_file_size =
+      filesystem_->GetFileSize(hit_buffer_fd_.get());
+  storage_info.set_lite_index_hit_buffer_size(
+      IcingFilesystem::SanitizeFileSize(header_and_hit_buffer_file_size));
+  int64_t lexicon_disk_usage = lexicon_.GetElementsSize();
+  if (lexicon_disk_usage != Filesystem::kBadFileSize) {
+    storage_info.set_lite_index_lexicon_size(lexicon_disk_usage);
+  } else {
+    storage_info.set_lite_index_lexicon_size(-1);
+  }
+  return storage_info;
+}
+
+void LiteIndex::SortHitsImpl() {
+  // Make searchable by sorting by hit buffer.
+  uint32_t sort_len = header_->cur_size() - header_->searchable_end();
+  if (sort_len <= 0) {
+    return;
+  }
+  IcingTimer timer;
+
+  auto* array_start =
+      hit_buffer_.GetMutableMem<TermIdHitPair::Value>(0, header_->cur_size());
+  TermIdHitPair::Value* sort_start = array_start + header_->searchable_end();
+  std::sort(sort_start, array_start + header_->cur_size());
+
+  // Now merge with previous region. Since the previous region is already
+  // sorted and deduplicated, optimize the merge by skipping everything less
+  // than the new region's smallest value.
+  if (header_->searchable_end() > 0) {
+    std::inplace_merge(array_start, array_start + header_->searchable_end(),
+                       array_start + header_->cur_size());
+  }
+  ICING_VLOG(2) << "Lite index sort and merge " << sort_len << " into "
+                << header_->searchable_end() << " in " << timer.Elapsed() * 1000
+                << "ms";
+
+  // Now the entire array is sorted.
+  header_->set_searchable_end(header_->cur_size());
+
+  // Update crc in-line.
+  UpdateChecksum();
+}
+
+libtextclassifier3::Status LiteIndex::Optimize(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id) {
+  absl_ports::unique_lock l(&mutex_);
+  header_->set_last_added_docid(new_last_added_document_id);
+  if (header_->cur_size() == 0) {
+    return libtextclassifier3::Status::OK;
+  }
+  // Sort the hits so that hits with the same term id will be grouped together,
+  // which helps later to determine which terms will be unused after compaction.
+  SortHitsImpl();
+  uint32_t new_size = 0;
+  uint32_t curr_term_id = 0;
+  uint32_t curr_tvi = 0;
+  std::unordered_set<uint32_t> tvi_to_delete;
+  for (uint32_t idx = 0; idx < header_->cur_size(); ++idx) {
+    TermIdHitPair term_id_hit_pair(
+        hit_buffer_.array_cast<TermIdHitPair>()[idx]);
+    if (idx == 0 || term_id_hit_pair.term_id() != curr_term_id) {
+      curr_term_id = term_id_hit_pair.term_id();
+      ICING_ASSIGN_OR_RETURN(TermIdCodec::DecodedTermInfo term_info,
+                             term_id_codec->DecodeTermInfo(curr_term_id));
+      curr_tvi = term_info.tvi;
+      // Mark the property of the current term as not having hits in prefix
+      // section. The property will be set below if there are any valid hits
+      // from a prefix section.
+      lexicon_.ClearProperty(curr_tvi, GetHasHitsInPrefixSectionPropertyId());
+      // Add curr_tvi to tvi_to_delete. It will be removed from tvi_to_delete
+      // below if there are any valid hits pointing to that termid.
+      tvi_to_delete.insert(curr_tvi);
+    }
+    DocumentId new_document_id =
+        document_id_old_to_new[term_id_hit_pair.hit().document_id()];
+    if (new_document_id == kInvalidDocumentId) {
+      continue;
+    }
+    if (term_id_hit_pair.hit().is_in_prefix_section()) {
+      lexicon_.SetProperty(curr_tvi, GetHasHitsInPrefixSectionPropertyId());
+    }
+    tvi_to_delete.erase(curr_tvi);
+    TermIdHitPair new_term_id_hit_pair(
+        term_id_hit_pair.term_id(),
+        Hit::TranslateHit(term_id_hit_pair.hit(), new_document_id));
+    // Rewriting the hit_buffer in place.
+    // new_size is weakly less than idx so we are okay to overwrite the entry at
+    // new_size, and valp should never be nullptr since it is within the already
+    // allocated region of hit_buffer_.
+    TermIdHitPair::Value* valp =
+        hit_buffer_.GetMutableMem<TermIdHitPair::Value>(new_size++, 1);
+    *valp = new_term_id_hit_pair.value();
+  }
+  header_->set_cur_size(new_size);
+  header_->set_searchable_end(new_size);
+
+  // Delete unused terms.
+  std::unordered_set<std::string> terms_to_delete;
+  for (IcingDynamicTrie::Iterator term_iter(lexicon_, /*prefix=*/"");
+       term_iter.IsValid(); term_iter.Advance()) {
+    if (tvi_to_delete.find(term_iter.GetValueIndex()) != tvi_to_delete.end()) {
+      terms_to_delete.insert(term_iter.GetKey());
+    }
+  }
+  for (const std::string& term : terms_to_delete) {
+    // Mark "term" as deleted. This won't actually free space in the lexicon. It
+    // will simply make it impossible to Find "term" in subsequent calls (which
+    // saves an unnecessary search through the hit buffer). This is acceptable
+    // because the free space will eventually be reclaimed the next time that
+    // the lite index is merged with the main index.
+    if (!lexicon_.Delete(term)) {
+      return absl_ports::InternalError(
+          "Could not delete invalid terms in lite lexicon during compaction.");
+    }
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
new file mode 100644
index 0000000..288602a
--- /dev/null
+++ b/icing/index/lite/lite-index.h
@@ -0,0 +1,444 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// A small index with continuous updates (doesn't need explicit Flush
+// to persiste) but has more possibility for corruption. It can always
+// detect corruption reliably.
+
+#ifndef ICING_INDEX_LITE_INDEX_H_
+#define ICING_INDEX_LITE_INDEX_H_
+
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/mutex.h"
+#include "icing/absl_ports/thread_annotations.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/lite/lite-index-header.h"
+#include "icing/index/lite/lite-index-options.h"
+#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/legacy/index/icing-array-storage.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-mmapper.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/suggestion-result-checker.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// The LiteIndex is go/thread-compatible. Operations on the same data member
+// object interfere with each other, unless they are guaranteed not to mutate
+// the object (In the case of LiteIndex, this means all const methods,
+// FetchHits and ScoreHits).
+class LiteIndex {
+ public:
+  // An entry in the hit buffer.
+  using Options = LiteIndexOptions;
+
+  // Offset for the LiteIndex_Header in the hit buffer mmap.
+  static constexpr uint32_t kHeaderFileOffset = 0;
+
+  // Updates checksum of subcomponents.
+  ~LiteIndex();
+
+  // Creates lite index from storage. The files will be created if they do not
+  // already exist.
+  //
+  // Returns:
+  //  OK on success
+  //  DATA_LOSS if the index was corrupted and cleared
+  //  INTERNAL on I/O error
+  static libtextclassifier3::StatusOr<std::unique_ptr<LiteIndex>> Create(
+      const Options& options, const IcingFilesystem* filesystem);
+
+  // Resets all internal members of the index. Returns OK if all operations were
+  // successful.
+  libtextclassifier3::Status Reset() ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Advises the OS to cache pages in the index, which will be accessed for a
+  // query soon.
+  void Warm() ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Syncs all modified files in the index to disk.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL on I/O error
+  libtextclassifier3::Status PersistToDisk() ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Returns term_id if term found, NOT_FOUND otherwise.
+  libtextclassifier3::StatusOr<uint32_t> GetTermId(
+      const std::string& term) const ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Returns an iterator for all terms for which 'prefix' is a prefix.
+  class PrefixIterator {
+   public:
+    explicit PrefixIterator(const IcingDynamicTrie::Iterator& delegate)
+        : delegate_(delegate) {}
+    bool IsValid() const { return delegate_.IsValid(); }
+
+    void Advance() { delegate_.Advance(); }
+
+    const char* GetKey() const { return delegate_.GetKey(); }
+
+    uint32_t GetValueIndex() const { return delegate_.GetValueIndex(); }
+
+   private:
+    IcingDynamicTrie::Iterator delegate_;
+  };
+
+  // WARNING: Subsequent calls to AddHit/InsertTerm may invalidate any
+  // previously returned PrefixIterator.
+  PrefixIterator FindTermPrefixes(const std::string& prefix) const
+      ICING_LOCKS_EXCLUDED(mutex_) {
+    absl_ports::shared_lock l(&mutex_);
+    return PrefixIterator(IcingDynamicTrie::Iterator(lexicon_, prefix.c_str()));
+  }
+
+  // Inserts a term with its properties.
+  //
+  // Returns:
+  //   A value index on success
+  //   RESOURCE_EXHAUSTED if lexicon is full or no disk space is available
+  libtextclassifier3::StatusOr<uint32_t> InsertTerm(
+      const std::string& term, TermMatchType::Code term_match_type,
+      NamespaceId namespace_id) ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Updates term properties by setting hasPrefixHits and namespace id of the
+  // term.
+  //
+  // Returns:
+  //   OK on success
+  //   RESOURCE_EXHAUSTED if no disk space is available
+  libtextclassifier3::Status UpdateTermProperties(uint32_t tvi,
+                                                  bool hasPrefixHits,
+                                                  NamespaceId namespace_id)
+      ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Append hit to buffer. term_id must be encoded using the same term_id_codec
+  // supplied to the index constructor.
+  // RETURNS:
+  //  - OK if hit was successfully added
+  //  - RESOURCE_EXHAUSTED if hit could not be added (either due to hit buffer
+  //    or file system capacity reached).
+  libtextclassifier3::Status AddHit(uint32_t term_id, const Hit& hit)
+      ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Add all hits with term_id from the sections specified in section_id_mask,
+  // skipping hits in non-prefix sections if only_from_prefix_sections is true,
+  // to hits_out. If hits_out is nullptr, no hits will be added. The
+  // corresponding hit term frequencies will also not be added if
+  // term_frequency_out is nullptr.
+  //
+  // Only those hits which belongs to the given namespaces will be counted and
+  // fetched. A nullptr namespace checker will disable this check.
+  //
+  // Returns the score of hits that would be added to hits_out according the
+  // given score_by.
+  int FetchHits(
+      uint32_t term_id, SectionIdMask section_id_mask,
+      bool only_from_prefix_sections,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+      const SuggestionResultChecker* suggestion_result_checker,
+      std::vector<DocHitInfo>* hits_out,
+      std::vector<Hit::TermFrequencyArray>* term_frequency_out = nullptr)
+      ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Returns the hit count of the term.
+  // Only those hits which belongs to the given namespaces will be counted.
+  libtextclassifier3::StatusOr<int> ScoreHits(
+      uint32_t term_id,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+      const SuggestionResultChecker* suggestion_result_checker)
+      ICING_LOCKS_EXCLUDED(mutex_);
+
+  bool empty() const ICING_LOCKS_EXCLUDED(mutex_) { return size() == 0; }
+
+  uint32_t size() const ICING_LOCKS_EXCLUDED(mutex_) {
+    absl_ports::shared_lock l(&mutex_);
+    return size_impl();
+  }
+
+  bool WantsMerge() const ICING_LOCKS_EXCLUDED(mutex_) {
+    absl_ports::shared_lock l(&mutex_);
+    return is_full() || size_impl() >= (options_.hit_buffer_want_merge_bytes /
+                                        sizeof(TermIdHitPair::Value));
+  }
+
+  // Whether or not the HitBuffer's unsorted tail size exceeds the sort
+  // threshold.
+  bool HasUnsortedHitsExceedingSortThreshold() const
+      ICING_LOCKS_EXCLUDED(mutex_) {
+    absl_ports::shared_lock l(&mutex_);
+    return HasUnsortedHitsExceedingSortThresholdImpl();
+  }
+
+  // Sort hits stored in the index.
+  void SortHits() ICING_LOCKS_EXCLUDED(mutex_) {
+    absl_ports::unique_lock l(&mutex_);
+    SortHitsImpl();
+  };
+
+  class const_iterator {
+    friend class LiteIndex;
+
+   public:
+    using iterator_category = std::forward_iterator_tag;
+    using value_type = TermIdHitPair;
+    using reference = const value_type&;
+    using pointer = const value_type*;
+
+    const_iterator() : const_iterator(nullptr, -1, -1) {}
+
+    reference operator*() const { return start_[position_]; }
+
+    pointer operator->() const { return start_ + position_; }
+
+    const_iterator& operator++() {
+      if (++position_ >= end_position_) {
+        start_ = nullptr;
+        position_ = -1;
+        end_position_ = -1;
+      }
+      return *this;
+    }
+
+    const_iterator operator++(int) {
+      auto tmp = *this;
+      ++*this;
+      return tmp;
+    }
+
+    bool operator!=(const const_iterator& rhs) { return !(*this == rhs); }
+
+    bool operator==(const const_iterator& rhs) {
+      return start_ == rhs.start_ && position_ == rhs.position_;
+    }
+
+   private:
+    explicit const_iterator(const TermIdHitPair* start, int position,
+                            int end_position)
+        : start_(start), position_(position), end_position_(end_position) {}
+
+    const TermIdHitPair* start_;
+    int position_;
+    int end_position_;
+  };
+
+  const_iterator begin() const ICING_LOCKS_EXCLUDED(mutex_) {
+    absl_ports::shared_lock l(&mutex_);
+    // If the LiteIndex is empty, just return end().
+    return empty_impl()
+               ? end()
+               : const_iterator(hit_buffer_.array_cast<TermIdHitPair>(), 0,
+                                header_->cur_size());
+  }
+
+  const_iterator end() const { return const_iterator(); }
+
+  constexpr static uint32_t max_hit_buffer_size() {
+    return std::numeric_limits<uint32_t>::max() / sizeof(TermIdHitPair);
+  }
+
+  // We keep track of the last added document_id. This is always the largest
+  // document_id that has been added because hits can only be added in order of
+  // increasing document_id.
+  DocumentId last_added_document_id() const ICING_LOCKS_EXCLUDED(mutex_) {
+    absl_ports::shared_lock l(&mutex_);
+    return header_->last_added_docid();
+  }
+  void set_last_added_document_id(DocumentId document_id)
+      ICING_LOCKS_EXCLUDED(mutex_) {
+    absl_ports::unique_lock l(&mutex_);
+    header_->set_last_added_docid(document_id);
+  }
+
+  // WARNING: Subsequent calls to AddHit/InsertTerm may invalidate the reference
+  // returned here.
+  const IcingDynamicTrie& lexicon() const { return lexicon_; }
+
+  // Returns debug information for the index in out.
+  // verbosity = BASIC, simplest debug information - size of lexicon, hit buffer
+  // verbosity = DETAILED, more detailed debug information from the lexicon.
+  std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity)
+      ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Returns the byte size of all the elements held in the index. This excludes
+  // the size of any internal metadata of the index, e.g. the index's header.
+  //
+  // Returns:
+  //   Byte size on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetElementsSize() const
+      ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Takes the provided storage_info, populates the fields related to the lite
+  // index and returns that storage_info.
+  //
+  // If an IO error occurs while trying to calculate the value for a field, then
+  // that field will be set to -1.
+  IndexStorageInfoProto GetStorageInfo(IndexStorageInfoProto storage_info) const
+      ICING_LOCKS_EXCLUDED(mutex_);
+
+  // Reduces internal file sizes by reclaiming space of deleted documents.
+  //
+  // This method also sets the last_added_docid of the index to
+  // new_last_added_document_id.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error, this indicates that the index may be in an
+  //                               invalid state and should be cleared.
+  libtextclassifier3::Status Optimize(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id)
+      ICING_LOCKS_EXCLUDED(mutex_);
+
+ private:
+  static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions();
+
+  LiteIndex(const Options& options, const IcingFilesystem* filesystem);
+
+  // Initializes lite index from storage. Must be called exactly once after
+  // object construction.
+  //
+  // Returns:
+  //  OK on success
+  //  DATA_LOSS if the index was corrupted and cleared
+  //  INTERNAL on I/O error
+  libtextclassifier3::Status Initialize() ICING_LOCKS_EXCLUDED(mutex_);
+
+  bool initialized() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+    return header_ != nullptr;
+  }
+
+  // Check if the hit buffer has reached its capacity.
+  bool is_full() const ICING_SHARED_LOCKS_REQUIRED(mutex_);
+
+  // Non-locking implementation for empty().
+  bool empty_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+    return size_impl() == 0;
+  }
+
+  // Non-locking implementation for size().
+  uint32_t size_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+    return header_->cur_size();
+  }
+
+  // Calculate the checksum of all sub-components of the LiteIndex
+  Crc32 ComputeChecksum() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Sets the computed checksum in the header
+  void UpdateChecksum() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Non-locking implementation for UpdateTermProperties.
+  libtextclassifier3::Status UpdateTermPropertiesImpl(uint32_t tvi,
+                                                      bool hasPrefixHits,
+                                                      NamespaceId namespace_id)
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // We need to sort during querying time when:
+  // 1. Sorting at indexing time is not enabled and there is an unsorted tail
+  //    section in the HitBuffer.
+  // 2. The unsorted tail size exceeds the hit_buffer_sort_threshold, regardless
+  //    of whether or not hit_buffer_sort_at_indexing is enabled. This is to
+  //    prevent performing sequential search on a large unsorted tail section,
+  //    which would result in bad query performance.
+  //    This is more of a sanity check. We should not really be encountering
+  //    this case.
+  bool NeedSortAtQuerying() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+    return HasUnsortedHitsExceedingSortThresholdImpl() ||
+           (!options_.hit_buffer_sort_at_indexing &&
+            header_->cur_size() - header_->searchable_end() > 0);
+  }
+
+  // Non-locking implementation for HasUnsortedHitsExceedingSortThresholdImpl().
+  bool HasUnsortedHitsExceedingSortThresholdImpl() const
+      ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+    return header_->cur_size() - header_->searchable_end() >=
+           (options_.hit_buffer_sort_threshold_bytes /
+            sizeof(TermIdHitPair::Value));
+  }
+
+  // Non-locking implementation for SortHits().
+  void SortHitsImpl() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Calculates and adds the score for a fetched hit to total_score_out, while
+  // updating last_document_id (which keeps track of the last added docId so
+  // far), and is_last_document_desired (which keeps track of whether that last
+  // added docId belongs to the query's desired namespace.)
+  //
+  // Also appends the hit to hits_out and term_frequency_out if the vectors are
+  // not null.
+  void ScoreAndAppendFetchedHit(
+      const Hit& hit, SectionIdMask section_id_mask,
+      bool only_from_prefix_sections,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+      const SuggestionResultChecker* suggestion_result_checker,
+      DocumentId& last_document_id, bool& is_last_document_desired,
+      int& total_score_out, std::vector<DocHitInfo>* hits_out,
+      std::vector<Hit::TermFrequencyArray>* term_frequency_out) const
+      ICING_SHARED_LOCKS_REQUIRED(mutex_);
+
+  // File descriptor that points to where the header and hit buffer are written
+  // to.
+  ScopedFd hit_buffer_fd_ ICING_GUARDED_BY(mutex_);
+
+  // Mmapped region past the header that stores the hits.
+  IcingArrayStorage hit_buffer_ ICING_GUARDED_BY(mutex_);
+
+  // Crc checksum of the hits, excludes the header.
+  uint32_t hit_buffer_crc_ ICING_GUARDED_BY(mutex_);
+
+  // Trie that maps indexed terms to their term id
+  IcingDynamicTrie lexicon_ ICING_GUARDED_BY(mutex_);
+
+  // TODO(b/140437260): Port over to MemoryMappedFile
+  // Memory mapped region of the underlying file that reflects the header.
+  IcingMMapper header_mmap_ ICING_GUARDED_BY(mutex_);
+
+  // Wrapper around the mmapped header that contains stats on the lite index.
+  std::unique_ptr<LiteIndex_Header> header_ ICING_GUARDED_BY(mutex_);
+
+  // Options used to initialize the LiteIndex.
+  const Options options_;
+
+  // TODO(b/139087650) Move to icing::Filesystem
+  const IcingFilesystem* const filesystem_;
+
+  // Used to provide reader and writer locks
+  mutable absl_ports::shared_mutex mutex_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_LITE_INDEX_H_
diff --git a/icing/index/lite/lite-index_test.cc b/icing/index/lite/lite-index_test.cc
new file mode 100644
index 0000000..9811fa2
--- /dev/null
+++ b/icing/index/lite/lite-index_test.cc
@@ -0,0 +1,741 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/lite/lite-index.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
+#include "icing/index/lite/lite-index-header.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+#include "icing/store/namespace-id.h"
+#include "icing/testing/always-false-suggestion-result-checker-impl.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::SizeIs;
+
+class LiteIndexTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    index_dir_ = GetTestTempDir() + "/test_dir";
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(index_dir_.c_str()));
+  }
+
+  void TearDown() override {
+    term_id_codec_.reset();
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
+  }
+
+  std::string index_dir_;
+  Filesystem filesystem_;
+  IcingFilesystem icing_filesystem_;
+  std::unique_ptr<TermIdCodec> term_id_codec_;
+};
+
+constexpr NamespaceId kNamespace0 = 0;
+
+TEST_F(LiteIndexTest,
+       LiteIndexFetchHits_sortAtQuerying_unsortedHitsBelowSortThreshold) {
+  // Set up LiteIndex and TermIdCodec
+  std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+  // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+  LiteIndex::Options options(lite_index_file_name,
+                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+                             /*hit_buffer_sort_at_indexing=*/false,
+                             /*hit_buffer_sort_threshold_bytes=*/64);
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+                             LiteIndex::Create(options, &icing_filesystem_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      term_id_codec_,
+      TermIdCodec::Create(
+          IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+          IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+  // Add some hits
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foo_tvi,
+      lite_index->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+  Hit foo_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  Hit foo_hit1(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit1));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t bar_tvi,
+      lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+                             term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+  Hit bar_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  Hit bar_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1));
+
+  // Check that unsorted hits does not exceed the sort threshold.
+  EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+
+  // Check that hits are unsorted. Persist the data and pread from
+  // LiteIndexHeader.
+  ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+  LiteIndex_HeaderImpl::HeaderData header_data;
+  ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+                                &header_data, sizeof(header_data),
+                                LiteIndex::kHeaderFileOffset));
+  EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
+
+  // Query the LiteIndex
+  std::vector<DocHitInfo> hits1;
+  lite_index->FetchHits(
+      foo_term_id, kSectionIdMaskAll,
+      /*only_from_prefix_sections=*/false,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+      /*namespace_checker=*/nullptr, &hits1);
+  EXPECT_THAT(hits1, SizeIs(1));
+  EXPECT_THAT(hits1.back().document_id(), Eq(1));
+  // Check that the hits are coming from section 0 and section 1.
+  EXPECT_THAT(hits1.back().hit_section_ids_mask(), Eq(0b11));
+
+  std::vector<DocHitInfo> hits2;
+  AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
+  lite_index->FetchHits(
+      foo_term_id, kSectionIdMaskAll,
+      /*only_from_prefix_sections=*/false,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+      &always_false_suggestion_result_checker, &hits2);
+  // Check that no hits are returned because they get skipped by the namespace
+  // checker.
+  EXPECT_THAT(hits2, IsEmpty());
+
+  // Check that hits are sorted after querying LiteIndex. Persist the data and
+  // pread from LiteIndexHeader.
+  ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+  ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+                                &header_data, sizeof(header_data),
+                                LiteIndex::kHeaderFileOffset));
+  EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
+}
+
+TEST_F(LiteIndexTest,
+       LiteIndexFetchHits_sortAtIndexing_unsortedHitsBelowSortThreshold) {
+  // Set up LiteIndex and TermIdCodec
+  std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+  // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+  // However note that in these tests we're unable to sort hits after
+  // indexing, as sorting performed by the string-section-indexing-handler
+  // after indexing all hits in an entire document, rather than after each
+  // AddHits() operation.
+  LiteIndex::Options options(lite_index_file_name,
+                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+                             /*hit_buffer_sort_at_indexing=*/true,
+                             /*hit_buffer_sort_threshold_bytes=*/64);
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+                             LiteIndex::Create(options, &icing_filesystem_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      term_id_codec_,
+      TermIdCodec::Create(
+          IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+          IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+  // Add some hits
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foo_tvi,
+      lite_index->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+  Hit foo_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  Hit foo_hit1(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit1));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t bar_tvi,
+      lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+                             term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+  Hit bar_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  Hit bar_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1));
+
+  // Check that unsorted hits does not exceed the sort threshold.
+  EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+
+  // Check that hits are unsorted. Persist the data and pread from
+  // LiteIndexHeader.
+  ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+  LiteIndex_HeaderImpl::HeaderData header_data;
+  ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+                                &header_data, sizeof(header_data),
+                                LiteIndex::kHeaderFileOffset));
+  EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
+
+  // Query the LiteIndex
+  std::vector<DocHitInfo> hits1;
+  lite_index->FetchHits(
+      foo_term_id, kSectionIdMaskAll,
+      /*only_from_prefix_sections=*/false,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+      /*namespace_checker=*/nullptr, &hits1);
+  EXPECT_THAT(hits1, SizeIs(1));
+  EXPECT_THAT(hits1.back().document_id(), Eq(1));
+  // Check that the hits are coming from section 0 and section 1.
+  EXPECT_THAT(hits1.back().hit_section_ids_mask(), Eq(0b11));
+
+  std::vector<DocHitInfo> hits2;
+  AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
+  lite_index->FetchHits(
+      foo_term_id, kSectionIdMaskAll,
+      /*only_from_prefix_sections=*/false,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+      &always_false_suggestion_result_checker, &hits2);
+  // Check that no hits are returned because they get skipped by the namespace
+  // checker.
+  EXPECT_THAT(hits2, IsEmpty());
+
+  // Check that hits are still unsorted after querying LiteIndex because the
+  // HitBuffer unsorted size is still below the sort threshold, and we've
+  // enabled sort_at_indexing.
+  // Persist the data and performing a pread on LiteIndexHeader.
+  ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+  ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+                                &header_data, sizeof(header_data),
+                                LiteIndex::kHeaderFileOffset));
+  EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
+}
+
+TEST_F(
+    LiteIndexTest,
+    LiteIndexFetchHits_sortAtQuerying_unsortedHitsExceedingSortAtIndexThreshold) {
+  // Set up LiteIndex and TermIdCodec
+  std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+  // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+  // However note that in these tests we're unable to sort hits after
+  // indexing, as sorting performed by the string-section-indexing-handler
+  // after indexing all hits in an entire document, rather than after each
+  // AddHits() operation.
+  LiteIndex::Options options(lite_index_file_name,
+                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+                             /*hit_buffer_sort_at_indexing=*/false,
+                             /*hit_buffer_sort_threshold_bytes=*/64);
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+                             LiteIndex::Create(options, &icing_filesystem_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      term_id_codec_,
+      TermIdCodec::Create(
+          IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+          IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+  // Create 4 hits for docs 0-2, and 2 hits for doc 3 -- 14 in total
+  // Doc 0
+  Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc0_hit1(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc0_hit2(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc0_hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  // Doc 1
+  Hit doc1_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc1_hit1(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc1_hit2(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc1_hit3(/*section_id=*/2, /*document_id=*/1, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  // Doc 2
+  Hit doc2_hit0(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc2_hit1(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc2_hit2(/*section_id=*/1, /*document_id=*/2, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc2_hit3(/*section_id=*/2, /*document_id=*/2, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  // Doc 3
+  Hit doc3_hit0(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc3_hit1(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+
+  // Create terms
+  // Foo
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foo_tvi,
+      lite_index->InsertTerm("foo", TermMatchType::EXACT_ONLY, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+  // Bar
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t bar_tvi,
+      lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+                             term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+  // Baz
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t baz_tvi,
+      lite_index->InsertTerm("baz", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t baz_term_id,
+                             term_id_codec_->EncodeTvi(baz_tvi, TviType::LITE));
+  // Qux
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t qux_tvi,
+      lite_index->InsertTerm("qux", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t qux_term_id,
+                             term_id_codec_->EncodeTvi(qux_tvi, TviType::LITE));
+
+  // Add 14 hits and make sure that termIds are added in unsorted order.
+  // Documents should be inserted in order as new incoming hits should have
+  // larger document ids.
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc0_hit1));
+  ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc0_hit2));
+  ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc0_hit3));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit1));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
+  ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit3));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc2_hit1));
+  ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc2_hit2));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit3));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc3_hit1));
+  // Verify that the HitBuffer has not been sorted.
+  EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
+
+  // We now have the following in the hit buffer:
+  // <term>: {(docId, sectionId)...}
+  // foo: {(0, 0); (1, 0); (1, 1); (2, 0); (2, 2); (3, 0)}
+  // bar: {(0, 0); (1, 0); (1, 2)}
+  // baz: {(0, 1); (2, 0); (3, 0)}
+  // quz: {(0, 2); (2, 1)}
+
+  // Search over the HitBuffer.
+  std::vector<DocHitInfo> hits1;
+  lite_index->FetchHits(
+      foo_term_id, kSectionIdMaskAll,
+      /*only_from_prefix_sections=*/false,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+      /*namespace_checker=*/nullptr, &hits1);
+  EXPECT_THAT(hits1, SizeIs(4));
+  // Check that hits are retrieved in descending order of docIds.
+  EXPECT_THAT(hits1[0].document_id(), Eq(3));
+  EXPECT_THAT(hits1[0].hit_section_ids_mask(), Eq(0b1));
+  EXPECT_THAT(hits1[1].document_id(), Eq(2));
+  EXPECT_THAT(hits1[1].hit_section_ids_mask(), Eq(0b101));
+  EXPECT_THAT(hits1[2].document_id(), Eq(1));
+  EXPECT_THAT(hits1[2].hit_section_ids_mask(), Eq(0b11));
+  EXPECT_THAT(hits1[3].document_id(), Eq(0));
+  EXPECT_THAT(hits1[3].hit_section_ids_mask(), Eq(0b1));
+
+  std::vector<DocHitInfo> hits2;
+  AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
+  lite_index->FetchHits(
+      foo_term_id, kSectionIdMaskAll,
+      /*only_from_prefix_sections=*/false,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+      &always_false_suggestion_result_checker, &hits2);
+  // Check that no hits are returned because they get skipped by the namespace
+  // checker.
+  EXPECT_THAT(hits2, IsEmpty());
+
+  std::vector<DocHitInfo> hits3;
+  lite_index->FetchHits(
+      bar_term_id, 0b1,
+      /*only_from_prefix_sections=*/false,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+      /*namespace_checker=*/nullptr, &hits3);
+  EXPECT_THAT(hits3, SizeIs(2));
+  // Check fetching hits with SectionIdMask.
+  EXPECT_THAT(hits3[0].document_id(), Eq(1));
+  EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+  EXPECT_THAT(hits3[1].document_id(), Eq(0));
+  EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+
+  // Check that the HitBuffer is sorted after the query call.
+  EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+}
+
+TEST_F(
+    LiteIndexTest,
+    LiteIndexFetchHits_sortAtIndexing_unsortedHitsExceedingSortAtIndexThreshold) {
+  // Set up LiteIndex and TermIdCodec
+  std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+  // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+  LiteIndex::Options options(lite_index_file_name,
+                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+                             /*hit_buffer_sort_at_indexing=*/true,
+                             /*hit_buffer_sort_threshold_bytes=*/64);
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+                             LiteIndex::Create(options, &icing_filesystem_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      term_id_codec_,
+      TermIdCodec::Create(
+          IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+          IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+  // Create 4 hits for docs 0-2, and 2 hits for doc 3 -- 14 in total
+  // Doc 0
+  Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc0_hit1(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc0_hit2(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc0_hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  // Doc 1
+  Hit doc1_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc1_hit1(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc1_hit2(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc1_hit3(/*section_id=*/2, /*document_id=*/1, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  // Doc 2
+  Hit doc2_hit0(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc2_hit1(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc2_hit2(/*section_id=*/1, /*document_id=*/2, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc2_hit3(/*section_id=*/2, /*document_id=*/2, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  // Doc 3
+  Hit doc3_hit0(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc3_hit1(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc3_hit2(/*section_id=*/1, /*document_id=*/3, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc3_hit3(/*section_id=*/2, /*document_id=*/3, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  // Doc 4
+  Hit doc4_hit0(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc4_hit1(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc4_hit2(/*section_id=*/1, /*document_id=*/4, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+  Hit doc4_hit3(/*section_id=*/2, /*document_id=*/4, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+
+  // Create terms
+  // Foo
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foo_tvi,
+      lite_index->InsertTerm("foo", TermMatchType::EXACT_ONLY, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+  // Bar
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t bar_tvi,
+      lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+                             term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+  // Baz
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t baz_tvi,
+      lite_index->InsertTerm("baz", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t baz_term_id,
+                             term_id_codec_->EncodeTvi(baz_tvi, TviType::LITE));
+  // Qux
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t qux_tvi,
+      lite_index->InsertTerm("qux", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t qux_term_id,
+                             term_id_codec_->EncodeTvi(qux_tvi, TviType::LITE));
+
+  // Add hits and make sure that termIds are added in unsorted order.
+  // Documents should be inserted in order as new incoming hits should have
+  // larger document ids.
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc0_hit1));
+  ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc0_hit2));
+  ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc0_hit3));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit1));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
+  ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit3));
+  // Adding 8 hits exceeds the sort threshold. However when sort_at_indexing is
+  // enabled, sorting is done in the string-section-indexing-handler rather than
+  // AddHit() itself, we need to invoke SortHits() manually.
+  EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
+  lite_index->SortHits();
+  // Check that the HitBuffer is sorted.
+  ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+  LiteIndex_HeaderImpl::HeaderData header_data;
+  ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+                                &header_data, sizeof(header_data),
+                                LiteIndex::kHeaderFileOffset));
+  EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
+
+  // Add 12 more hits so that sort threshold is exceeded again.
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc2_hit1));
+  ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc2_hit2));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit3));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc3_hit1));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit2));
+  ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc3_hit3));
+  ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc4_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc4_hit1));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc4_hit2));
+  ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc4_hit3));
+
+  // Adding these hits exceeds the sort threshold. However when sort_at_indexing
+  // is enabled, sorting is done in the string-section-indexing-handler rather
+  // than AddHit() itself.
+  EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
+
+  // We now have the following in the hit buffer:
+  // <term>: {(docId, sectionId)...}
+  // foo: {(0, 0); (1, 0); (1, 1); (2, 0); (2, 2); (3, 0); (3, 1); (4, 1)}
+  // bar: {(0, 0); (1, 0); (1, 2); (3, 2); (4, 2)}
+  // baz: {(0, 1); (2, 0); (3, 0); (4, 0)}
+  // quz: {(0, 2); (2, 1); (4, 0)}
+
+  // Search over the HitBuffer.
+  std::vector<DocHitInfo> hits1;
+  lite_index->FetchHits(
+      foo_term_id, kSectionIdMaskAll,
+      /*only_from_prefix_sections=*/false,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+      /*namespace_checker=*/nullptr, &hits1);
+  EXPECT_THAT(hits1, SizeIs(5));
+  // Check that hits are retrieved in descending order of docIds.
+  EXPECT_THAT(hits1[0].document_id(), Eq(4));
+  EXPECT_THAT(hits1[0].hit_section_ids_mask(), Eq(0b10));
+  EXPECT_THAT(hits1[1].document_id(), Eq(3));
+  EXPECT_THAT(hits1[1].hit_section_ids_mask(), Eq(0b11));
+  EXPECT_THAT(hits1[2].document_id(), Eq(2));
+  EXPECT_THAT(hits1[2].hit_section_ids_mask(), Eq(0b101));
+  EXPECT_THAT(hits1[3].document_id(), Eq(1));
+  EXPECT_THAT(hits1[3].hit_section_ids_mask(), Eq(0b11));
+  EXPECT_THAT(hits1[4].document_id(), Eq(0));
+  EXPECT_THAT(hits1[4].hit_section_ids_mask(), Eq(0b1));
+
+  std::vector<DocHitInfo> hits2;
+  AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
+  lite_index->FetchHits(
+      foo_term_id, kSectionIdMaskAll,
+      /*only_from_prefix_sections=*/false,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+      &always_false_suggestion_result_checker, &hits2);
+  // Check that no hits are returned because they get skipped by the namespace
+  // checker.
+  EXPECT_THAT(hits2, IsEmpty());
+
+  std::vector<DocHitInfo> hits3;
+  lite_index->FetchHits(
+      bar_term_id, 0b1,
+      /*only_from_prefix_sections=*/false,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+      /*namespace_checker=*/nullptr, &hits3);
+  EXPECT_THAT(hits3, SizeIs(2));
+  // Check fetching hits with SectionIdMask.
+  EXPECT_THAT(hits3[0].document_id(), Eq(1));
+  EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+  EXPECT_THAT(hits3[1].document_id(), Eq(0));
+  EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+
+  // Check that the HitBuffer is sorted after the query call. FetchHits should
+  // sort before performing binary search if the HitBuffer unsorted size exceeds
+  // the sort threshold. Regardless of the sort_at_indexing config.
+  EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+  ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+  ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+                                &header_data, sizeof(header_data),
+                                LiteIndex::kHeaderFileOffset));
+  EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
+}
+
+TEST_F(LiteIndexTest, LiteIndexIterator) {
+  // Set up LiteIndex and TermIdCodec
+  std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+  // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+  LiteIndex::Options options(lite_index_file_name,
+                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+                             /*hit_buffer_sort_at_indexing=*/true,
+                             /*hit_buffer_sort_threshold_bytes=*/64);
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+                             LiteIndex::Create(options, &icing_filesystem_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      term_id_codec_,
+      TermIdCodec::Create(
+          IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+          IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+  const std::string term = "foo";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index->InsertTerm(term, TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/3,
+                /*is_in_prefix_section=*/false);
+  Hit doc0_hit1(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/5,
+                /*is_in_prefix_section=*/false);
+  SectionIdMask doc0_section_id_mask = 0b11;
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map0 = {{0, 3}, {1, 5}};
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit1));
+
+  Hit doc1_hit1(/*section_id=*/1, /*document_id=*/1, /*term_frequency=*/7,
+                /*is_in_prefix_section=*/false);
+  Hit doc1_hit2(/*section_id=*/2, /*document_id=*/1, /*term_frequency=*/11,
+                /*is_in_prefix_section=*/false);
+  SectionIdMask doc1_section_id_mask = 0b110;
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map1 = {{1, 7}, {2, 11}};
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit1));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
+
+  std::unique_ptr<DocHitInfoIteratorTermLiteExact> iter =
+      std::make_unique<DocHitInfoIteratorTermLiteExact>(
+          term_id_codec_.get(), lite_index.get(), term, /*term_start_index=*/0,
+          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+          /*need_hit_term_frequency=*/true);
+
+  ASSERT_THAT(iter->Advance(), IsOk());
+  EXPECT_THAT(iter->doc_hit_info().document_id(), Eq(1));
+  EXPECT_THAT(iter->doc_hit_info().hit_section_ids_mask(),
+              Eq(doc1_section_id_mask));
+
+  std::vector<TermMatchInfo> matched_terms_stats;
+  iter->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       term, expected_section_ids_tf_map1)));
+
+  ASSERT_THAT(iter->Advance(), IsOk());
+  EXPECT_THAT(iter->doc_hit_info().document_id(), Eq(0));
+  EXPECT_THAT(iter->doc_hit_info().hit_section_ids_mask(),
+              Eq(doc0_section_id_mask));
+  matched_terms_stats.clear();
+  iter->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       term, expected_section_ids_tf_map0)));
+}
+
+TEST_F(LiteIndexTest, LiteIndexIterator_sortAtIndexingDisabled) {
+  // Set up LiteIndex and TermIdCodec
+  std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+  // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+  LiteIndex::Options options(lite_index_file_name,
+                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+                             /*hit_buffer_sort_at_indexing=*/false,
+                             /*hit_buffer_sort_threshold_bytes=*/64);
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+                             LiteIndex::Create(options, &icing_filesystem_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      term_id_codec_,
+      TermIdCodec::Create(
+          IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+          IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+  const std::string term = "foo";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index->InsertTerm(term, TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/3,
+                /*is_in_prefix_section=*/false);
+  Hit doc0_hit1(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/5,
+                /*is_in_prefix_section=*/false);
+  SectionIdMask doc0_section_id_mask = 0b11;
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map0 = {{0, 3}, {1, 5}};
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit1));
+
+  Hit doc1_hit1(/*section_id=*/1, /*document_id=*/1, /*term_frequency=*/7,
+                /*is_in_prefix_section=*/false);
+  Hit doc1_hit2(/*section_id=*/2, /*document_id=*/1, /*term_frequency=*/11,
+                /*is_in_prefix_section=*/false);
+  SectionIdMask doc1_section_id_mask = 0b110;
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map1 = {{1, 7}, {2, 11}};
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit1));
+  ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
+
+  std::unique_ptr<DocHitInfoIteratorTermLiteExact> iter =
+      std::make_unique<DocHitInfoIteratorTermLiteExact>(
+          term_id_codec_.get(), lite_index.get(), term, /*term_start_index=*/0,
+          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+          /*need_hit_term_frequency=*/true);
+
+  ASSERT_THAT(iter->Advance(), IsOk());
+  EXPECT_THAT(iter->doc_hit_info().document_id(), Eq(1));
+  EXPECT_THAT(iter->doc_hit_info().hit_section_ids_mask(),
+              Eq(doc1_section_id_mask));
+
+  std::vector<TermMatchInfo> matched_terms_stats;
+  iter->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       term, expected_section_ids_tf_map1)));
+
+  ASSERT_THAT(iter->Advance(), IsOk());
+  EXPECT_THAT(iter->doc_hit_info().document_id(), Eq(0));
+  EXPECT_THAT(iter->doc_hit_info().hit_section_ids_mask(),
+              Eq(doc0_section_id_mask));
+  matched_terms_stats.clear();
+  iter->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       term, expected_section_ids_tf_map0)));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/lite/lite-index_thread-safety_test.cc b/icing/index/lite/lite-index_thread-safety_test.cc
new file mode 100644
index 0000000..53aa6cd
--- /dev/null
+++ b/icing/index/lite/lite-index_thread-safety_test.cc
@@ -0,0 +1,399 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <array>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/index/lite/lite-index.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/schema/section.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Le;
+using ::testing::SizeIs;
+
+// These tests cover concurrent FetchHits operations, as well as interleaving
+// AddHit and FetchHits operations. Other usages of the LiteIndex other than
+// these scenarios are not guaranteed with to be thread-safe as the LiteIndex is
+// go/thread-compatible.
+class LiteIndexThreadSafetyTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    index_dir_ = GetTestTempDir() + "/test_dir";
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(index_dir_.c_str()));
+
+    std::string lite_index_file_name =
+        index_dir_ + "/test_file.lite-idx-thread-safety.index";
+    LiteIndex::Options options(lite_index_file_name,
+                               /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+                               /*hit_buffer_sort_at_indexing=*/true,
+                               /*hit_buffer_sort_threshold_bytes=*/64);
+    ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
+                               LiteIndex::Create(options, &icing_filesystem_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        term_id_codec_,
+        TermIdCodec::Create(
+            IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+            IcingDynamicTrie::max_value_index(options.lexicon_options)));
+  }
+
+  void TearDown() override {
+    term_id_codec_.reset();
+    lite_index_.reset();
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
+  }
+
+  std::string index_dir_;
+  Filesystem filesystem_;
+  IcingFilesystem icing_filesystem_;
+  std::unique_ptr<LiteIndex> lite_index_;
+  std::unique_ptr<TermIdCodec> term_id_codec_;
+};
+
+constexpr NamespaceId kNamespace0 = 0;
+constexpr DocumentId kDocumentId0 = 0;
+constexpr DocumentId kDocumentId1 = 1;
+constexpr SectionId kSectionId0 = 1;
+constexpr SectionId kSectionId1 = 0b11;
+
+static constexpr std::array<std::string_view, 100> kCommonWords = {
+    "the",   "and",      "for",    "that",     "this",        "with",
+    "you",   "not",      "are",    "from",     "your",        "all",
+    "have",  "new",      "more",   "was",      "will",        "home",
+    "can",   "about",    "page",   "has",      "search",      "free",
+    "but",   "our",      "one",    "other",    "information", "time",
+    "they",  "site",     "may",    "what",     "which",       "their",
+    "news",  "out",      "use",    "any",      "there",       "see",
+    "only",  "his",      "when",   "contact",  "here",        "business",
+    "who",   "web",      "also",   "now",      "help",        "get",
+    "view",  "online",   "first",  "been",     "would",       "how",
+    "were",  "services", "some",   "these",    "click",       "its",
+    "like",  "service",  "than",   "find",     "price",       "date",
+    "back",  "top",      "people", "had",      "list",        "name",
+    "just",  "over",     "state",  "year",     "day",         "into",
+    "email", "two",      "health", "world",    "next",        "used",
+    "work",  "last",     "most",   "products", "music",       "buy",
+    "data",  "make",     "them",   "should"};
+
+TEST_F(LiteIndexThreadSafetyTest, SimultaneousFetchHits_singleTerm) {
+  // Add some hits
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foo_tvi,
+      lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+  Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+               Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+  Hit doc_hit1(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId1,
+               Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
+  ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit1));
+
+  // Create kNumThreads threads to call lite_index_->FetchHits()
+  // simultaneously. Each thread should get a valid result of 2 hits for the
+  // term 'foo', and there should be no crash.
+  constexpr int kNumThreads = 50;
+  std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+  auto callable = [&](int thread_id) {
+    lite_index_->FetchHits(
+        foo_term_id, kSectionIdMaskAll,
+        /*only_from_prefix_sections=*/false,
+        SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+        /*namespace_checker=*/nullptr, &hits[thread_id]);
+  };
+  // Spawn threads for FetchHits().
+  std::vector<std::thread> thread_objs;
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs.emplace_back(callable, /*thread_id=*/i);
+  }
+
+  // Join threads and verify results
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs[i].join();
+    EXPECT_THAT(
+        hits[i],
+        ElementsAre(
+            EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId0}),
+            EqualsDocHitInfo(kDocumentId0,
+                             std::vector<SectionId>{kSectionId0})));
+  }
+}
+
+TEST_F(LiteIndexThreadSafetyTest, SimultaneousFetchHits_multipleTerms) {
+  // Add two hits for each of the first 50 terms in kCommonWords.
+  for (int i = 0; i < 50; ++i) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        uint32_t tvi,
+        lite_index_->InsertTerm(std::string(kCommonWords[i]),
+                                TermMatchType::PREFIX, kNamespace0));
+    ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+                               term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+    Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+                 Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+    Hit doc_hit1(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId1,
+                 Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+    ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit0));
+    ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit1));
+  }
+
+  // Create kNumThreads threads to call lite_index_->FetchHits()
+  // simultaneously. Each thread should get a valid result of 2 hits for each
+  // term, and there should be no crash.
+  constexpr int kNumThreads = 50;
+  std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+  auto callable = [&](int thread_id) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        uint32_t tvi,
+        lite_index_->InsertTerm(std::string(kCommonWords[thread_id]),
+                                TermMatchType::PREFIX, kNamespace0));
+    ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+                               term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+    lite_index_->FetchHits(
+        term_id, kSectionIdMaskAll,
+        /*only_from_prefix_sections=*/false,
+        SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+        /*namespace_checker=*/nullptr, &hits[thread_id]);
+  };
+
+  // Spawn threads for FetchHits().
+  std::vector<std::thread> thread_objs;
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs.emplace_back(callable, /*thread_id=*/i);
+  }
+
+  // Join threads and verify results
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs[i].join();
+    EXPECT_THAT(
+        hits[i],
+        ElementsAre(
+            EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId0}),
+            EqualsDocHitInfo(kDocumentId0,
+                             std::vector<SectionId>{kSectionId0})));
+  }
+}
+
+TEST_F(LiteIndexThreadSafetyTest, SimultaneousAddHitAndFetchHits_singleTerm) {
+  // Add some hits
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foo_tvi,
+      lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+  Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+               Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
+
+  // Create kNumThreads threads. Every even-numbered thread calls FetchHits and
+  // every odd numbered thread calls AddHit.
+  // Each AddHit operation adds the term 'foo' to a new section of the same doc.
+  // Each query result should contain one hit, and there should be no crash.
+  constexpr int kNumThreads = 50;
+  std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+  auto callable = [&](int thread_id) {
+    if (thread_id % 2 == 0) {
+      // Even-numbered thread calls FetchHits.
+      lite_index_->FetchHits(
+          foo_term_id, kSectionIdMaskAll,
+          /*only_from_prefix_sections=*/false,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          /*namespace_checker=*/nullptr, &hits[thread_id]);
+    } else {
+      // Odd-numbered thread calls AddHit.
+      Hit doc_hit(/*section_id=*/thread_id / 2, /*document_id=*/kDocumentId0,
+                  Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+      ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit));
+    }
+  };
+
+  // Spawn threads.
+  std::vector<std::thread> thread_objs;
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs.emplace_back(callable, /*thread_id=*/i);
+  }
+
+  // Join threads and verify results.
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs[i].join();
+    // All AddHit operations add 'foo' to the same document, so there should
+    // only be one DocHitInfo per run.
+    if (i % 2 == 0) {
+      EXPECT_THAT(hits[i], SizeIs(1));
+      EXPECT_THAT(hits[i].back().document_id(), Eq(0));
+    }
+  }
+
+  // After all threads have executed, hits should come from sections 0-24.
+  std::vector<DocHitInfo> final_hits;
+  lite_index_->FetchHits(
+      foo_term_id, kSectionIdMaskAll,
+      /*only_from_prefix_sections=*/false,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+      /*namespace_checker=*/nullptr, &final_hits);
+  EXPECT_THAT(final_hits, SizeIs(1));
+  EXPECT_THAT(final_hits.back().document_id(), Eq(0));
+  // Section mask of sections 0-24.
+  EXPECT_THAT(final_hits.back().hit_section_ids_mask(), Eq((1 << 25) - 1));
+}
+
+TEST_F(LiteIndexThreadSafetyTest,
+       SimultaneousAddHitAndFetchHits_multipleTerms) {
+  // Add the initial hit 'foo'.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foo_tvi,
+      lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+  Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+               Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
+
+  // Create kNumThreads threads. Every even-numbered thread calls FetchHits and
+  // every odd numbered thread calls AddHit.
+  // Each AddHit operation adds a different term to a new doc.
+  // Queries always search for the term 'foo' added above so there will always
+  // be a hit. There should be no crash.
+  constexpr int kNumThreads = 50;
+  std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+  auto callable = [&](int thread_id) {
+    // Create new tvi and term_id for new term kCommonWords[thread_id].
+    ICING_ASSERT_OK_AND_ASSIGN(
+        uint32_t tvi,
+        lite_index_->InsertTerm(std::string(kCommonWords[thread_id]),
+                                TermMatchType::PREFIX, kNamespace0));
+    ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+                               term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+    if (thread_id % 2 == 0) {
+      // Even-numbered thread calls FetchHits.
+      lite_index_->FetchHits(
+          foo_term_id, kSectionIdMaskAll, /*only_from_prefix_sections=*/false,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          /*namespace_checker=*/nullptr, &hits[thread_id]);
+    } else {
+      // Odd-numbered thread calls AddHit.
+      // AddHit to section 0 of a new doc.
+      Hit doc_hit(/*section_id=*/kSectionId0, /*document_id=*/thread_id / 2,
+                  Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+      ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit));
+    }
+  };
+
+  // Spawn threads.
+  std::vector<std::thread> thread_objs;
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs.emplace_back(callable, /*thread_id=*/i);
+  }
+
+  // Join threads and verify results. Queries always search for the term 'foo'
+  // so there will always be a hit
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs[i].join();
+    if (i % 2 == 0) {
+      EXPECT_THAT(hits[i],
+                  ElementsAre(EqualsDocHitInfo(
+                      kDocumentId0, std::vector<SectionId>{kSectionId0})));
+    }
+  }
+}
+
+TEST_F(LiteIndexThreadSafetyTest, ManyAddHitAndOneFetchHits_multipleTerms) {
+  // Add two hits for each of the first 20 terms in kCommonWords.
+  for (int i = 0; i < 20; ++i) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        uint32_t tvi,
+        lite_index_->InsertTerm(std::string(kCommonWords[i]),
+                                TermMatchType::PREFIX, kNamespace0));
+    ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+                               term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+    Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+                 Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+    Hit doc_hit1(/*section_id=*/kSectionId1, /*document_id=*/kDocumentId0,
+                 Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+    ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit0));
+    ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit1));
+  }
+
+  // Create kNumThreads threads. Call one FetchHits operation after every 5
+  // AddHit operations.
+  // Each AddHit operation adds a different term to a new doc.
+  // Queries always search for the term 'foo' added above so there will always
+  // be a hit. There should be no crash.
+  constexpr int kNumThreads = 100;
+  std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+  auto callable = [&](int thread_id) {
+    // Create new tvi and term_id for new term kCommonWords[thread_id].
+    ICING_ASSERT_OK_AND_ASSIGN(
+        uint32_t tvi,
+        lite_index_->InsertTerm(std::string(kCommonWords[thread_id / 5]),
+                                TermMatchType::PREFIX, kNamespace0));
+    ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+                               term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+    if (thread_id % 5 == 0) {
+      // Call FetchHits on term kCommonWords[thread_id / 5]
+      lite_index_->FetchHits(
+          term_id, kSectionIdMaskAll,
+          /*only_from_prefix_sections=*/false,
+          SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+          /*namespace_checker=*/nullptr, &hits[thread_id]);
+    } else {
+      // Odd-numbered thread calls AddHit.
+      // AddHit to section (thread_id % 5 + 1) of doc 0.
+      Hit doc_hit(/*section_id=*/thread_id % 5 + 1,
+                  /*document_id=*/kDocumentId0, Hit::kDefaultTermFrequency,
+                  /*is_in_prefix_section=*/false);
+      ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit));
+    }
+  };
+  // Spawn threads.
+  std::vector<std::thread> thread_objs;
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs.emplace_back(callable, /*thread_id=*/i);
+  }
+
+  // Join threads and verify FetchHits results.
+  // Every query should see a hit in doc 0 sections 0 and 1. Additional hits
+  // might also be found in sections 2-6 depending on thread execution order.
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs[i].join();
+    if (i % 5 == 0) {
+      EXPECT_THAT(hits[i], SizeIs(1));
+      EXPECT_THAT(hits[i].back().document_id(), Eq(0));
+      EXPECT_THAT(hits[i].back().hit_section_ids_mask(), Ge(0b11));
+      EXPECT_THAT(hits[i].back().hit_section_ids_mask(), Le(0b1111111));
+    }
+  }
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/lite/term-id-hit-pair.h b/icing/index/lite/term-id-hit-pair.h
new file mode 100644
index 0000000..82bd010
--- /dev/null
+++ b/icing/index/lite/term-id-hit-pair.h
@@ -0,0 +1,85 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_TERM_ID_HIT_PAIR_H_
+#define ICING_INDEX_TERM_ID_HIT_PAIR_H_
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "icing/index/hit/hit.h"
+#include "icing/util/bit-util.h"
+
+namespace icing {
+namespace lib {
+
+class TermIdHitPair {
+ public:
+  // Layout bits: 24 termid + 32 hit value + 8 hit term frequency.
+  using Value = uint64_t;
+
+  static constexpr int kTermIdBits = 24;
+  static constexpr int kHitValueBits = sizeof(Hit::Value) * 8;
+  static constexpr int kHitTermFrequencyBits = sizeof(Hit::TermFrequency) * 8;
+
+  static const Value kInvalidValue;
+
+  explicit TermIdHitPair(Value v = kInvalidValue) : value_(v) {}
+
+  TermIdHitPair(uint32_t term_id, const Hit& hit) {
+    static_assert(kTermIdBits + kHitValueBits + kHitTermFrequencyBits <=
+                      sizeof(Value) * 8,
+                  "TermIdHitPairTooBig");
+
+    value_ = 0;
+    // Term id goes into the most significant bits because it takes
+    // precedent in sorts.
+    bit_util::BitfieldSet(term_id, kHitValueBits + kHitTermFrequencyBits,
+                          kTermIdBits, &value_);
+    bit_util::BitfieldSet(hit.value(), kHitTermFrequencyBits, kHitValueBits,
+                          &value_);
+    bit_util::BitfieldSet(hit.term_frequency(), 0, kHitTermFrequencyBits,
+                          &value_);
+  }
+
+  uint32_t term_id() const {
+    return bit_util::BitfieldGet(value_, kHitValueBits + kHitTermFrequencyBits,
+                                 kTermIdBits);
+  }
+
+  Hit hit() const {
+    return Hit(
+        bit_util::BitfieldGet(value_, kHitTermFrequencyBits, kHitValueBits),
+        bit_util::BitfieldGet(value_, 0, kHitTermFrequencyBits));
+  }
+
+  Value value() const { return value_; }
+
+  bool operator==(const TermIdHitPair& rhs) const {
+    return value_ == rhs.value_;
+  }
+
+  bool operator<(const TermIdHitPair& rhs) const { return value_ < rhs.value_; }
+
+ private:
+  Value value_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_TERM_ID_HIT_PAIR_H_
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc
new file mode 100644
index 0000000..3e66858
--- /dev/null
+++ b/icing/index/main/doc-hit-info-iterator-term-main.cc
@@ -0,0 +1,218 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/main/doc-hit-info-iterator-term-main.h"
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/main/main-index.h"
+#include "icing/index/main/posting-list-hit-accessor.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+std::string SectionIdMaskToString(SectionIdMask section_id_mask) {
+  std::string mask(kTotalNumSections, '0');
+  for (SectionId i = kMaxSectionId; i >= 0; --i) {
+    if (section_id_mask & (UINT64_C(1) << i)) {
+      mask[kMaxSectionId - i] = '1';
+    }
+  }
+  return mask;
+}
+
+void MergeNewHitIntoCachedDocHitInfos(
+    const Hit& hit, bool need_hit_term_frequency,
+    std::vector<DocHitInfoIteratorTermMain::DocHitInfoAndTermFrequencyArray>&
+        cached_doc_hit_infos_out) {
+  if (cached_doc_hit_infos_out.empty() ||
+      hit.document_id() !=
+          cached_doc_hit_infos_out.back().doc_hit_info.document_id()) {
+    std::optional<Hit::TermFrequencyArray> tf_arr;
+    if (need_hit_term_frequency) {
+      tf_arr = std::make_optional<Hit::TermFrequencyArray>();
+    }
+
+    cached_doc_hit_infos_out.push_back(
+        DocHitInfoIteratorTermMain::DocHitInfoAndTermFrequencyArray(
+            DocHitInfo(hit.document_id()), std::move(tf_arr)));
+  }
+
+  cached_doc_hit_infos_out.back().doc_hit_info.UpdateSection(hit.section_id());
+  if (need_hit_term_frequency) {
+    (*cached_doc_hit_infos_out.back().term_frequency_array)[hit.section_id()] =
+        hit.term_frequency();
+  }
+}
+
+}  // namespace
+
+libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
+  ++cached_doc_hit_infos_idx_;
+  while (posting_list_accessor_ == nullptr ||
+         (!all_pages_consumed_ && cached_doc_hit_info_count() == 1)) {
+    // If we haven't retrieved any hits before or we've already returned all but
+    // the last cached hit, then go get some more!
+    // We hold back the last cached hit because it could have more hits on the
+    // next posting list in the chain.
+    libtextclassifier3::Status status = RetrieveMoreHits();
+    if (!status.ok()) {
+      if (!absl_ports::IsNotFound(status)) {
+        // NOT_FOUND is expected to happen (not every term will be in the main
+        // index!). Other errors are worth logging.
+        ICING_LOG(ERROR)
+            << "Encountered unexpected failure while retrieving  hits "
+            << status.error_message();
+      }
+      return absl_ports::ResourceExhaustedError(
+          "No more DocHitInfos in iterator");
+    }
+  }
+  if (cached_doc_hit_infos_idx_ == -1 ||
+      cached_doc_hit_infos_idx_ >= cached_doc_hit_infos_.size()) {
+    // Nothing more for the iterator to return. Set these members to invalid
+    // values.
+    doc_hit_info_ = DocHitInfo();
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+  ++num_advance_calls_;
+  doc_hit_info_ =
+      cached_doc_hit_infos_.at(cached_doc_hit_infos_idx_).doc_hit_info;
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorTermMain::TrimRightMostNode() && {
+  // Leaf iterator should trim itself.
+  DocHitInfoIterator::TrimmedNode node = {nullptr, term_, term_start_index_,
+                                          unnormalized_term_length_};
+  return node;
+}
+
+libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() {
+  DocHitInfoAndTermFrequencyArray last_doc_hit_info;
+  if (!cached_doc_hit_infos_.empty()) {
+    last_doc_hit_info = std::move(cached_doc_hit_infos_.back());
+  }
+  cached_doc_hit_infos_idx_ = 0;
+  cached_doc_hit_infos_.clear();
+  if (last_doc_hit_info.doc_hit_info.document_id() != kInvalidDocumentId) {
+    // Carry over the last hit. It might need to be merged with the first hit of
+    // of the next posting list in the chain.
+    cached_doc_hit_infos_.push_back(std::move(last_doc_hit_info));
+  }
+  if (posting_list_accessor_ == nullptr) {
+    ICING_ASSIGN_OR_RETURN(posting_list_accessor_,
+                           main_index_->GetAccessorForExactTerm(term_));
+  }
+
+  ICING_ASSIGN_OR_RETURN(std::vector<Hit> hits,
+                         posting_list_accessor_->GetNextHitsBatch());
+  if (hits.empty()) {
+    all_pages_consumed_ = true;
+    return libtextclassifier3::Status::OK;
+  }
+
+  ++num_blocks_inspected_;
+  cached_doc_hit_infos_.reserve(cached_doc_hit_infos_.size() + hits.size());
+  for (const Hit& hit : hits) {
+    // Check sections.
+    if (((UINT64_C(1) << hit.section_id()) & section_restrict_mask_) == 0) {
+      continue;
+    }
+    // We want exact hits, skip prefix-only hits.
+    if (hit.is_prefix_hit()) {
+      continue;
+    }
+
+    MergeNewHitIntoCachedDocHitInfos(hit, need_hit_term_frequency_,
+                                     cached_doc_hit_infos_);
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+std::string DocHitInfoIteratorTermMainExact::ToString() const {
+  return absl_ports::StrCat(SectionIdMaskToString(section_restrict_mask_), ":",
+                            term_);
+}
+
+libtextclassifier3::Status
+DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() {
+  DocHitInfoAndTermFrequencyArray last_doc_hit_info;
+  if (!cached_doc_hit_infos_.empty()) {
+    last_doc_hit_info = std::move(cached_doc_hit_infos_.back());
+  }
+  cached_doc_hit_infos_idx_ = 0;
+  cached_doc_hit_infos_.clear();
+  if (last_doc_hit_info.doc_hit_info.document_id() != kInvalidDocumentId) {
+    // Carry over the last hit. It might need to be merged with the first hit of
+    // of the next posting list in the chain.
+    cached_doc_hit_infos_.push_back(std::move(last_doc_hit_info));
+  }
+
+  if (posting_list_accessor_ == nullptr) {
+    ICING_ASSIGN_OR_RETURN(MainIndex::GetPrefixAccessorResult result,
+                           main_index_->GetAccessorForPrefixTerm(term_));
+    posting_list_accessor_ = std::move(result.accessor);
+    exact_ = result.exact;
+  }
+  ICING_ASSIGN_OR_RETURN(std::vector<Hit> hits,
+                         posting_list_accessor_->GetNextHitsBatch());
+  if (hits.empty()) {
+    all_pages_consumed_ = true;
+    return libtextclassifier3::Status::OK;
+  }
+
+  ++num_blocks_inspected_;
+  cached_doc_hit_infos_.reserve(cached_doc_hit_infos_.size() + hits.size());
+  for (const Hit& hit : hits) {
+    // Check sections.
+    if (((UINT64_C(1) << hit.section_id()) & section_restrict_mask_) == 0) {
+      continue;
+    }
+    // If we only want hits from prefix sections.
+    if (!exact_ && !hit.is_in_prefix_section()) {
+      continue;
+    }
+
+    MergeNewHitIntoCachedDocHitInfos(hit, need_hit_term_frequency_,
+                                     cached_doc_hit_infos_);
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+std::string DocHitInfoIteratorTermMainPrefix::ToString() const {
+  return absl_ports::StrCat(SectionIdMaskToString(section_restrict_mask_), ":",
+                            term_, "*");
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.h b/icing/index/main/doc-hit-info-iterator-term-main.h
new file mode 100644
index 0000000..e32db2a
--- /dev/null
+++ b/icing/index/main/doc-hit-info-iterator-term-main.h
@@ -0,0 +1,204 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_MAIN_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_MAIN_H_
+
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/main/main-index.h"
+#include "icing/index/main/posting-list-hit-accessor.h"
+#include "icing/schema/section.h"
+
+namespace icing {
+namespace lib {
+
+class DocHitInfoIteratorTermMain : public DocHitInfoLeafIterator {
+ public:
+  struct DocHitInfoAndTermFrequencyArray {
+    DocHitInfo doc_hit_info;
+    std::optional<Hit::TermFrequencyArray> term_frequency_array;
+
+    explicit DocHitInfoAndTermFrequencyArray() = default;
+
+    explicit DocHitInfoAndTermFrequencyArray(
+        DocHitInfo doc_hit_info_in,
+        std::optional<Hit::TermFrequencyArray> term_frequency_array_in)
+        : doc_hit_info(std::move(doc_hit_info_in)),
+          term_frequency_array(std::move(term_frequency_array_in)) {}
+  };
+
+  explicit DocHitInfoIteratorTermMain(MainIndex* main_index,
+                                      const std::string& term,
+                                      int term_start_index,
+                                      int unnormalized_term_length,
+                                      SectionIdMask section_restrict_mask,
+                                      bool need_hit_term_frequency)
+      : term_(term),
+        term_start_index_(term_start_index),
+        unnormalized_term_length_(unnormalized_term_length),
+        posting_list_accessor_(nullptr),
+        main_index_(main_index),
+        cached_doc_hit_infos_idx_(-1),
+        num_advance_calls_(0),
+        num_blocks_inspected_(0),
+        all_pages_consumed_(false),
+        section_restrict_mask_(section_restrict_mask),
+        need_hit_term_frequency_(need_hit_term_frequency) {}
+
+  libtextclassifier3::Status Advance() override;
+
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
+  CallStats GetCallStats() const override {
+    return CallStats(
+        /*num_leaf_advance_calls_lite_index_in=*/0,
+        /*num_leaf_advance_calls_main_index_in=*/num_advance_calls_,
+        /*num_leaf_advance_calls_integer_index_in=*/0,
+        /*num_leaf_advance_calls_no_index_in=*/0,
+        /*num_blocks_inspected_in=*/num_blocks_inspected_);
+  }
+
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    if (cached_doc_hit_infos_idx_ == -1 ||
+        cached_doc_hit_infos_idx_ >= cached_doc_hit_infos_.size()) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    SectionIdMask section_mask =
+        doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+    SectionIdMask section_mask_copy = section_mask;
+    std::array<Hit::TermFrequency, kTotalNumSections> section_term_frequencies =
+        {Hit::kNoTermFrequency};
+    while (section_mask_copy) {
+      SectionId section_id = __builtin_ctzll(section_mask_copy);
+      if (need_hit_term_frequency_) {
+        section_term_frequencies.at(section_id) =
+            (*cached_doc_hit_infos_.at(cached_doc_hit_infos_idx_)
+                  .term_frequency_array)[section_id];
+      }
+      section_mask_copy &= ~(UINT64_C(1) << section_id);
+    }
+    TermMatchInfo term_stats(term_, section_mask,
+                             std::move(section_term_frequencies));
+
+    for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) {
+      if (cur_term_stats.term == term_stats.term) {
+        // Same docId and same term, we don't need to add the term and the term
+        // frequency should always be the same
+        return;
+      }
+    }
+    matched_terms_stats->push_back(std::move(term_stats));
+  }
+
+ protected:
+  // Add DocHitInfos corresponding to term_ to cached_doc_hit_infos_.
+  virtual libtextclassifier3::Status RetrieveMoreHits() = 0;
+
+  const std::string term_;
+
+  // The start index of the given term in the search query
+  int term_start_index_;
+  // The length of the given unnormalized term in the search query
+  int unnormalized_term_length_;
+  // The accessor of the posting list chain for the requested term.
+  std::unique_ptr<PostingListHitAccessor> posting_list_accessor_;
+
+  MainIndex* main_index_;
+  // Stores hits and optional term frequency arrays retrieved from the index.
+  // This may only be a subset of the hits that are present in the index.
+  // Current value pointed to by the Iterator is tracked by
+  // cached_doc_hit_infos_idx_.
+  std::vector<DocHitInfoAndTermFrequencyArray> cached_doc_hit_infos_;
+  int cached_doc_hit_infos_idx_;
+
+  int num_advance_calls_;
+  int num_blocks_inspected_;
+  bool all_pages_consumed_;
+  // Mask indicating which sections hits should be considered for.
+  // Ex. 0000 0000 0000 0010 means that only hits from section 1 are desired.
+  const SectionIdMask section_restrict_mask_;
+  const bool need_hit_term_frequency_;
+
+ private:
+  // Remaining number of hits including the current hit.
+  // Returns -1 if cached_doc_hit_infos_idx_ is invalid.
+  int cached_doc_hit_info_count() const {
+    if (cached_doc_hit_infos_idx_ == -1 ||
+        cached_doc_hit_infos_idx_ >= cached_doc_hit_infos_.size()) {
+      return -1;
+    }
+    return cached_doc_hit_infos_.size() - cached_doc_hit_infos_idx_;
+  }
+};
+
+class DocHitInfoIteratorTermMainExact : public DocHitInfoIteratorTermMain {
+ public:
+  explicit DocHitInfoIteratorTermMainExact(MainIndex* main_index,
+                                           const std::string& term,
+                                           int term_start_index,
+                                           int unnormalized_term_length,
+                                           SectionIdMask section_restrict_mask,
+                                           bool need_hit_term_frequency)
+      : DocHitInfoIteratorTermMain(
+            main_index, term, term_start_index, unnormalized_term_length,
+            section_restrict_mask, need_hit_term_frequency) {}
+
+  std::string ToString() const override;
+
+ protected:
+  libtextclassifier3::Status RetrieveMoreHits() override;
+};
+
+class DocHitInfoIteratorTermMainPrefix : public DocHitInfoIteratorTermMain {
+ public:
+  explicit DocHitInfoIteratorTermMainPrefix(MainIndex* main_index,
+                                            const std::string& term,
+                                            int term_start_index,
+                                            int unnormalized_term_length,
+                                            SectionIdMask section_restrict_mask,
+                                            bool need_hit_term_frequency)
+      : DocHitInfoIteratorTermMain(
+            main_index, term, term_start_index, unnormalized_term_length,
+            section_restrict_mask, need_hit_term_frequency) {}
+
+  std::string ToString() const override;
+
+ protected:
+  libtextclassifier3::Status RetrieveMoreHits() override;
+
+ private:
+  // Whether or not posting_list_accessor_ holds a posting list chain for
+  // 'term' or for a term for which 'term' is a prefix. This is necessary to
+  // determine whether to return hits that are not from a prefix section (hits
+  // not from a prefix section should only be returned if exact_ is true).
+  bool exact_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_MAIN_H_
diff --git a/icing/index/main/main-index-merger.cc b/icing/index/main/main-index-merger.cc
new file mode 100644
index 0000000..c26a6d7
--- /dev/null
+++ b/icing/index/main/main-index-merger.cc
@@ -0,0 +1,305 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/main/main-index-merger.h"
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <unordered_map>
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/posting_list/index-block.h"
+#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+class HitSelector {
+ public:
+  // Returns whether or not term_id_hit_pair has the same term_id, document_id
+  // and section_id as the previously selected hits.
+  bool IsEquivalentHit(const TermIdHitPair& term_id_hit_pair) {
+    return prev_.term_id() == term_id_hit_pair.term_id() &&
+           prev_.hit().document_id() == term_id_hit_pair.hit().document_id() &&
+           prev_.hit().section_id() == term_id_hit_pair.hit().section_id();
+  }
+
+  // Merges term_id_hit_pair with previously added hits.
+  void SelectIfBetter(const TermIdHitPair& term_id_hit_pair) {
+    if (term_id_hit_pair.hit().is_prefix_hit()) {
+      SelectPrefixHitIfBetter(term_id_hit_pair);
+    } else {
+      SelectExactHitIfBetter(term_id_hit_pair);
+    }
+    prev_ = term_id_hit_pair;
+  }
+
+  // Adds all valid, selected hits to hits starting at position pos in hits.
+  // Returns the offset in hits after the position of the last added hit.
+  // This function may add between 0-2 hits depending on whether the HitSelector
+  // holds both a valid exact hit and a valid prefix hit, one of those or none.
+  size_t InsertSelectedHits(size_t pos, std::vector<TermIdHitPair>* hits) {
+    // Given the prefix/exact hits for a given term+docid+sectionid, push needed
+    // hits into hits array at offset pos. Return new pos.
+    if (best_prefix_hit_.hit().is_valid() && best_exact_hit_.hit().is_valid()) {
+      (*hits)[pos++] = best_exact_hit_;
+      const Hit& prefix_hit = best_prefix_hit_.hit();
+      // The prefix hit has score equal to the sum of the scores, capped at
+      // kMaxTermFrequency.
+      Hit::TermFrequency final_term_frequency = std::min(
+          static_cast<int>(Hit::kMaxTermFrequency),
+          prefix_hit.term_frequency() + best_exact_hit_.hit().term_frequency());
+      best_prefix_hit_ = TermIdHitPair(
+          best_prefix_hit_.term_id(),
+          Hit(prefix_hit.section_id(), prefix_hit.document_id(),
+              final_term_frequency, prefix_hit.is_in_prefix_section(),
+              prefix_hit.is_prefix_hit()));
+      (*hits)[pos++] = best_prefix_hit_;
+      // Ensure sorted.
+      if (best_prefix_hit_.hit() < best_exact_hit_.hit()) {
+        std::swap((*hits)[pos - 1], (*hits)[pos - 2]);
+      }
+    } else if (best_prefix_hit_.hit().is_valid()) {
+      (*hits)[pos++] = best_prefix_hit_;
+    } else if (best_exact_hit_.hit().is_valid()) {
+      (*hits)[pos++] = best_exact_hit_;
+    }
+
+    return pos;
+  }
+
+  void Reset() {
+    best_prefix_hit_ = TermIdHitPair();
+    best_exact_hit_ = TermIdHitPair();
+    prev_ = TermIdHitPair();
+  }
+
+ private:
+  void SelectPrefixHitIfBetter(const TermIdHitPair& term_id_hit_pair) {
+    if (!best_prefix_hit_.hit().is_valid()) {
+      best_prefix_hit_ = term_id_hit_pair;
+    } else {
+      const Hit& hit = term_id_hit_pair.hit();
+      // Create a new prefix hit with term_frequency as the sum of the term
+      // frequencies. The term frequency is capped at kMaxTermFrequency.
+      Hit::TermFrequency final_term_frequency = std::min(
+          static_cast<int>(Hit::kMaxTermFrequency),
+          hit.term_frequency() + best_prefix_hit_.hit().term_frequency());
+      best_prefix_hit_ = TermIdHitPair(
+          term_id_hit_pair.term_id(),
+          Hit(hit.section_id(), hit.document_id(), final_term_frequency,
+              best_prefix_hit_.hit().is_in_prefix_section(),
+              best_prefix_hit_.hit().is_prefix_hit()));
+    }
+  }
+
+  void SelectExactHitIfBetter(const TermIdHitPair& term_id_hit_pair) {
+    if (!best_exact_hit_.hit().is_valid()) {
+      best_exact_hit_ = term_id_hit_pair;
+    } else {
+      const Hit& hit = term_id_hit_pair.hit();
+      // Create a new exact hit with term_frequency as the sum of the term
+      // frequencies. The term frequency is capped at kMaxHitScore.
+      Hit::TermFrequency final_term_frequency = std::min(
+          static_cast<int>(Hit::kMaxTermFrequency),
+          hit.term_frequency() + best_exact_hit_.hit().term_frequency());
+      best_exact_hit_ = TermIdHitPair(
+          term_id_hit_pair.term_id(),
+          Hit(hit.section_id(), hit.document_id(), final_term_frequency,
+              best_exact_hit_.hit().is_in_prefix_section(),
+              best_exact_hit_.hit().is_prefix_hit()));
+    }
+  }
+
+  TermIdHitPair best_prefix_hit_;
+  TermIdHitPair best_exact_hit_;
+  TermIdHitPair prev_;
+};
+
+class HitComparator {
+ public:
+  explicit HitComparator(
+      const TermIdCodec& term_id_codec,
+      const std::unordered_map<uint32_t, int>& main_tvi_to_block_index)
+      : term_id_codec_(&term_id_codec),
+        main_tvi_to_block_index_(&main_tvi_to_block_index) {}
+
+  bool operator()(const TermIdHitPair& lhs, const TermIdHitPair& rhs) const {
+    // Primary sort by index block. This acheives two things:
+    // 1. It reduces the number of flash writes by grouping together new hits
+    // for terms whose posting lists might share the same index block.
+    // 2. More importantly, this ensures that newly added backfill branch points
+    // will be populated first (because all newly added terms have an invalid
+    // block index of 0) before any new hits are added to the postings lists
+    // that they backfill from.
+    int lhs_index_block = GetIndexBlock(lhs.term_id());
+    int rhs_index_block = GetIndexBlock(rhs.term_id());
+    if (lhs_index_block == rhs_index_block) {
+      // Secondary sort by term_id and hit.
+      return lhs.value() < rhs.value();
+    }
+    return lhs_index_block < rhs_index_block;
+  }
+
+ private:
+  int GetIndexBlock(uint32_t term_id) const {
+    auto term_info_or = term_id_codec_->DecodeTermInfo(term_id);
+    if (!term_info_or.ok()) {
+      ICING_LOG(WARNING)
+          << "Unable to decode term-info during merge. This shouldn't happen.";
+      return kInvalidBlockIndex;
+    }
+    TermIdCodec::DecodedTermInfo term_info =
+        std::move(term_info_or).ValueOrDie();
+    auto itr = main_tvi_to_block_index_->find(term_info.tvi);
+    if (itr == main_tvi_to_block_index_->end()) {
+      return kInvalidBlockIndex;
+    }
+    return itr->second;
+  }
+
+  const TermIdCodec* term_id_codec_;
+  const std::unordered_map<uint32_t, int>* main_tvi_to_block_index_;
+};
+
+// A helper function to dedupe hits stored in hits. Suppose that the lite index
+// contained a single document with two hits in a single prefix section: "foot"
+// and "fool". When expanded, there would be four hits:
+// {"fo", docid0, sectionid0}
+// {"fo", docid0, sectionid0}
+// {"foot", docid0, sectionid0}
+// {"fool", docid0, sectionid0}
+//
+// The first two are duplicates of each other. So, this function will dedupe
+// and shrink hits to be:
+// {"fo", docid0, sectionid0}
+// {"foot", docid0, sectionid0}
+// {"fool", docid0, sectionid0}
+//
+// When two or more prefix hits are duplicates, merge into one hit with term
+// frequency as the sum of the term frequencies. If there is both an exact and
+// prefix hit for the same term, keep the exact hit as it is, update the prefix
+// hit so that its term frequency is the sum of the term frequencies.
+void DedupeHits(
+    std::vector<TermIdHitPair>* hits, const TermIdCodec& term_id_codec,
+    const std::unordered_map<uint32_t, int>& main_tvi_to_block_index) {
+  // Now all terms are grouped together and all hits for a term are sorted.
+  // Merge equivalent hits into one.
+  std::sort(hits->begin(), hits->end(),
+            HitComparator(term_id_codec, main_tvi_to_block_index));
+  size_t current_offset = 0;
+  HitSelector hit_selector;
+  for (const TermIdHitPair& term_id_hit_pair : *hits) {
+    if (!hit_selector.IsEquivalentHit(term_id_hit_pair)) {
+      // We've reached a new hit. Insert the previously selected hits that we
+      // had accumulated and reset to add this new hit.
+      current_offset = hit_selector.InsertSelectedHits(current_offset, hits);
+      hit_selector.Reset();
+    }
+    // Update best exact and prefix hit.
+    hit_selector.SelectIfBetter(term_id_hit_pair);
+  }
+
+  // Push last.
+  current_offset = hit_selector.InsertSelectedHits(current_offset, hits);
+
+  hits->resize(current_offset);
+}
+
+// Based on experiments with full prefix expansion, the multiplier
+// is ~4x.
+constexpr int kAvgPrefixesPerTerm = 4;
+
+}  // namespace
+
+libtextclassifier3::StatusOr<std::vector<TermIdHitPair>>
+MainIndexMerger::TranslateAndExpandLiteHits(
+    const LiteIndex& lite_index, const TermIdCodec& term_id_codec,
+    const MainIndex::LexiconMergeOutputs& lexicon_merge_outputs) {
+  std::vector<TermIdHitPair> hits;
+  if (lite_index.empty()) {
+    return hits;
+  }
+  // Reserve enough space for the average number of prefixes per term and the
+  // terms themselves.
+  hits.reserve(lite_index.size() * (kAvgPrefixesPerTerm + 1));
+
+  // Translate lite tvis to main tvis.
+  for (const TermIdHitPair& term_id_hit_pair : lite_index) {
+    uint32_t cur_term_id = term_id_hit_pair.term_id();
+    ICING_ASSIGN_OR_RETURN(TermIdCodec::DecodedTermInfo cur_decoded_term,
+                           term_id_codec.DecodeTermInfo(cur_term_id));
+    Hit hit(term_id_hit_pair.hit());
+
+    // 1. Translate and push original.
+    auto itr =
+        lexicon_merge_outputs.other_tvi_to_main_tvi.find(cur_decoded_term.tvi);
+    if (itr == lexicon_merge_outputs.other_tvi_to_main_tvi.cend()) {
+      // b/37273773
+      return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+          "Trying to translate lite tvi %u that was never added to the lexicon",
+          cur_decoded_term.tvi));
+    }
+    ICING_ASSIGN_OR_RETURN(uint32_t term_id,
+                           term_id_codec.EncodeTvi(itr->second, TviType::MAIN));
+    hits.emplace_back(term_id, hit);
+
+    // 2. Expand hits in prefix sections.
+    if (hit.is_in_prefix_section()) {
+      // Hit was in a prefix section. Push prefixes. Turn on prefix bit.
+      auto itr_prefixes =
+          lexicon_merge_outputs.other_tvi_to_prefix_main_tvis.find(
+              cur_decoded_term.tvi);
+      if (itr_prefixes ==
+          lexicon_merge_outputs.other_tvi_to_prefix_main_tvis.end()) {
+        ICING_VLOG(1) << "No necessary prefix expansion for "
+                      << cur_decoded_term.tvi;
+        continue;
+      }
+      // The tvis of all prefixes of this hit's term that appear in the main
+      // lexicon are between [prefix_tvis_buf[offset],
+      // prefix_tvis_buf[offset+len]).
+      size_t offset = itr_prefixes->second.first;
+      size_t len = itr_prefixes->second.second;
+      size_t offset_end_exclusive = offset + len;
+      Hit prefix_hit(hit.section_id(), hit.document_id(), hit.term_frequency(),
+                     /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
+      for (; offset < offset_end_exclusive; ++offset) {
+        // Take the tvi (in the main lexicon) of each prefix term.
+        uint32_t prefix_main_tvi =
+            lexicon_merge_outputs.prefix_tvis_buf[offset];
+        // Convert it to a term_id.
+        ICING_ASSIGN_OR_RETURN(
+            uint32_t prefix_term_id,
+            term_id_codec.EncodeTvi(prefix_main_tvi, TviType::MAIN));
+        // Create add an element for this prefix TermId and prefix Hit to hits.
+        hits.emplace_back(prefix_term_id, prefix_hit);
+      }
+    }
+  }
+  // 3. Remove any duplicate hits.
+  DedupeHits(&hits, term_id_codec,
+             lexicon_merge_outputs.main_tvi_to_block_index);
+  return hits;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/main/main-index-merger.h b/icing/index/main/main-index-merger.h
new file mode 100644
index 0000000..1413a8f
--- /dev/null
+++ b/icing/index/main/main-index-merger.h
@@ -0,0 +1,49 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_MAIN_MAIN_INDEX_MERGER_H_
+#define ICING_INDEX_MAIN_MAIN_INDEX_MERGER_H_
+
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/lite/lite-index.h"
+#include "icing/index/main/main-index.h"
+#include "icing/index/term-id-codec.h"
+
+namespace icing {
+namespace lib {
+
+// Class used to merge hits from the lite_index and lite_lexicon into main_index
+// and main_lexicon.
+class MainIndexMerger {
+ public:
+  // Retrieves all hits in the lite index, translates the term ids of each
+  // LiteIndex::Element and expands prefix hits based on the mapping from
+  // lexicon_merge_outputs.other_tvi_to_prefix_main_tvis.
+  //
+  // RETURNS:
+  //  - OK on success
+  //  - INVALID_ARGUMENT if one of the elements in the lite index has a term_id
+  //  that exceeds the max TermId
+  static libtextclassifier3::StatusOr<std::vector<TermIdHitPair>>
+  TranslateAndExpandLiteHits(
+      const LiteIndex& lite_index, const TermIdCodec& term_id_codec,
+      const MainIndex::LexiconMergeOutputs& lexicon_merge_outputs);
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_MAIN_MAIN_INDEX_MERGER_H_
diff --git a/icing/index/main/main-index-merger_test.cc b/icing/index/main/main-index-merger_test.cc
new file mode 100644
index 0000000..37e14fc
--- /dev/null
+++ b/icing/index/main/main-index-merger_test.cc
@@ -0,0 +1,382 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "icing/index/main/main-index-merger.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/main/doc-hit-info-iterator-term-main.h"
+#include "icing/index/main/main-index-merger.h"
+#include "icing/index/main/main-index.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/index/term-property-id.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/schema/section.h"
+#include "icing/store/namespace-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::UnorderedElementsAre;
+
+class MainIndexMergerTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    index_dir_ = GetTestTempDir() + "/test_dir";
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(index_dir_.c_str()));
+
+    std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+    LiteIndex::Options options(lite_index_file_name,
+                               /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+                               /*hit_buffer_sort_at_indexing=*/true,
+                               /*hit_buffer_sort_threshold_bytes=*/1024 * 8);
+    ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
+                               LiteIndex::Create(options, &icing_filesystem_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        term_id_codec_,
+        TermIdCodec::Create(
+            IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+            IcingDynamicTrie::max_value_index(options.lexicon_options)));
+  }
+
+  void TearDown() override {
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
+  }
+
+  std::string index_dir_;
+  Filesystem filesystem_;
+  IcingFilesystem icing_filesystem_;
+  std::unique_ptr<LiteIndex> lite_index_;
+  std::unique_ptr<TermIdCodec> term_id_codec_;
+};
+
+constexpr NamespaceId kNamespace0 = 0;
+
+TEST_F(MainIndexMergerTest, TranslateTermNotAdded) {
+  // 1. Index two docs in the Lite Index:
+  // - Doc0 {"foot" is_in_prefix_section=FALSE}
+  // - Doc1 {"fool", is_in_prefix_section=FALSE}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_term_id,
+      term_id_codec_->EncodeTvi(foot_tvi, TviType::LITE));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t fool_tvi,
+      lite_index_->InsertTerm("fool", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t fool_term_id,
+      term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE));
+
+  Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
+  Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
+
+  // 2. Build up a fake LexiconMergeOutputs
+  // This is some made up number that doesn't matter for this test.
+  uint32_t foot_main_tvi = 5;
+
+  // Only create a mapping for 'foot'. Leave out the mapping for 'fool'
+  MainIndex::LexiconMergeOutputs lexicon_outputs;
+  lexicon_outputs.other_tvi_to_main_tvi.emplace(foot_tvi, foot_main_tvi);
+
+  // 3. TranslateAndExpand should fail because 'fool' doesn't have a main tvi
+  // mapping.
+  ASSERT_THAT(MainIndexMerger::TranslateAndExpandLiteHits(
+                  *lite_index_, *term_id_codec_, lexicon_outputs),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(MainIndexMergerTest, PrefixExpansion) {
+  // 1. Index two docs in the Lite Index:
+  // - Doc0 {"foot" is_in_prefix_section=FALSE}
+  // - Doc1 {"fool", is_in_prefix_section=TRUE}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_term_id,
+      term_id_codec_->EncodeTvi(foot_tvi, TviType::LITE));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t fool_tvi,
+      lite_index_->InsertTerm("fool", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t fool_term_id,
+      term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE));
+
+  Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
+  Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
+
+  // 2. Build up a fake LexiconMergeOutputs
+  // This is some made up number that doesn't matter for this test.
+  uint32_t foo_main_tvi = 12;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foo_term_id,
+      term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN));
+  Hit doc1_prefix_hit(/*section_id=*/0, /*document_id=*/1,
+                      Hit::kDefaultTermFrequency,
+                      /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
+
+  uint32_t foot_main_tvi = 5;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_main_term_id,
+      term_id_codec_->EncodeTvi(foot_main_tvi, TviType::MAIN));
+  uint32_t fool_main_tvi = 10;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t fool_main_term_id,
+      term_id_codec_->EncodeTvi(fool_main_tvi, TviType::MAIN));
+
+  MainIndex::LexiconMergeOutputs lexicon_outputs;
+  // Map "fool" to it's prefix hit for "foo".
+  lexicon_outputs.other_tvi_to_prefix_main_tvis.emplace(fool_tvi,
+                                                        std::make_pair(0, 1));
+  lexicon_outputs.prefix_tvis_buf.push_back(foo_main_tvi);
+  lexicon_outputs.other_tvi_to_main_tvi.emplace(foot_tvi, foot_main_tvi);
+  lexicon_outputs.other_tvi_to_main_tvi.emplace(fool_tvi, fool_main_tvi);
+
+  // 3. TranslateAndExpand should;
+  //   a. Translate lite term ids to main term ids based on the map
+  //   b. Expand 'fool' to have a hit for 'foo'
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
+      MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
+                                                  lexicon_outputs));
+  EXPECT_THAT(
+      expanded_term_id_hit_pairs,
+      UnorderedElementsAre(TermIdHitPair(foot_main_term_id, doc0_hit),
+                           TermIdHitPair(fool_main_term_id, doc1_hit),
+                           TermIdHitPair(foo_term_id, doc1_prefix_hit)));
+}
+
+TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentTermFrequencies) {
+  // 1. Index one doc in the Lite Index:
+  // - Doc0 {"foot" "foo" is_in_prefix_section=TRUE}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_term_id,
+      term_id_codec_->EncodeTvi(foot_tvi, TviType::LITE));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foo_tvi,
+      lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+
+  Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
+                    /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
+  Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0,
+                   Hit::kDefaultTermFrequency,
+                   /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, foo_doc0_hit));
+
+  // 2. Build up a fake LexiconMergeOutputs
+  // This is some made up number that doesn't matter for this test.
+  uint32_t foo_main_tvi = 12;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foo_main_term_id,
+      term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN));
+  // The prefix hit for 'foot' should have the same term frequency as the exact
+  // hit for 'foot'. The final prefix hit has term frequency equal to 58.
+  Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0,
+                      /*term_frequency=*/58,
+                      /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
+
+  uint32_t foot_main_tvi = 5;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_main_term_id,
+      term_id_codec_->EncodeTvi(foot_main_tvi, TviType::MAIN));
+
+  MainIndex::LexiconMergeOutputs lexicon_outputs;
+  // Map "foot" to it's prefix hit for "foo".
+  lexicon_outputs.other_tvi_to_prefix_main_tvis.emplace(foot_tvi,
+                                                        std::make_pair(0, 1));
+  lexicon_outputs.prefix_tvis_buf.push_back(foo_main_tvi);
+  lexicon_outputs.other_tvi_to_main_tvi.emplace(foot_tvi, foot_main_tvi);
+  lexicon_outputs.other_tvi_to_main_tvi.emplace(foo_tvi, foo_main_tvi);
+
+  // 3. TranslateAndExpand should;
+  //   a. Translate lite term ids to main term ids based on the map
+  //   b. Expand 'foot' to have a hit for 'foo'
+  //   c. Keep both the exact hit for 'foo' and the prefix hit for 'foot', the
+  //   latter with term frequency as the sum of the term frequencies.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
+      MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
+                                                  lexicon_outputs));
+  EXPECT_THAT(
+      expanded_term_id_hit_pairs,
+      UnorderedElementsAre(TermIdHitPair(foot_main_term_id, foot_doc0_hit),
+                           TermIdHitPair(foo_main_term_id, foo_doc0_hit),
+                           TermIdHitPair(foo_main_term_id, doc0_prefix_hit)));
+}
+
+TEST_F(MainIndexMergerTest, DedupeWithExactSameTermFrequencies) {
+  // 1. Index one doc in the Lite Index:
+  // - Doc0 {"foot" "foo" is_in_prefix_section=TRUE}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_term_id,
+      term_id_codec_->EncodeTvi(foot_tvi, TviType::LITE));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foo_tvi,
+      lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+
+  Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
+                    /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
+  Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
+                   /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, foo_doc0_hit));
+  // The prefix hit should take the sum as term_frequency - 114.
+  Hit prefix_foo_doc0_hit(/*section_id=*/0, /*document_id=*/0,
+                          /*term_frequency=*/114,
+                          /*is_in_prefix_section=*/true,
+                          /*is_prefix_hit=*/true);
+
+  // 2. Build up a fake LexiconMergeOutputs
+  // This is some made up number that doesn't matter for this test.
+  uint32_t foo_main_tvi = 12;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foo_main_term_id,
+      term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN));
+
+  uint32_t foot_main_tvi = 5;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_main_term_id,
+      term_id_codec_->EncodeTvi(foot_main_tvi, TviType::MAIN));
+
+  MainIndex::LexiconMergeOutputs lexicon_outputs;
+  // Map "foot" to it's prefix hit for "foo".
+  lexicon_outputs.other_tvi_to_prefix_main_tvis.emplace(foot_tvi,
+                                                        std::make_pair(0, 1));
+  lexicon_outputs.prefix_tvis_buf.push_back(foo_main_tvi);
+  lexicon_outputs.other_tvi_to_main_tvi.emplace(foot_tvi, foot_main_tvi);
+  lexicon_outputs.other_tvi_to_main_tvi.emplace(foo_tvi, foo_main_tvi);
+
+  // 3. TranslateAndExpand should;
+  //   a. Translate lite term ids to main term ids based on the map
+  //   b. Expand 'foot' to have a hit for 'foo'
+  //   c. Keep both the exact hit for 'foo' and the prefix hit for 'foot', the
+  //   latter with term frequency as the sum of the term frequencies.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
+      MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
+                                                  lexicon_outputs));
+  EXPECT_THAT(expanded_term_id_hit_pairs,
+              UnorderedElementsAre(
+                  TermIdHitPair(foot_main_term_id, foot_doc0_hit),
+                  TermIdHitPair(foo_main_term_id, foo_doc0_hit),
+                  TermIdHitPair(foo_main_term_id, prefix_foo_doc0_hit)));
+}
+
+TEST_F(MainIndexMergerTest, DedupePrefixExpansion) {
+  // 1. Index one doc in the Lite Index:
+  // - Doc0 {"foot" "fool" is_in_prefix_section=TRUE}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_term_id,
+      term_id_codec_->EncodeTvi(foot_tvi, TviType::LITE));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t fool_tvi,
+      lite_index_->InsertTerm("fool", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t fool_term_id,
+      term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE));
+
+  Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0,
+                    /*term_frequency=*/Hit::kMaxTermFrequency,
+                    /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
+  Hit fool_doc0_hit(/*section_id=*/0, /*document_id=*/0,
+                    Hit::kDefaultTermFrequency,
+                    /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, fool_doc0_hit));
+
+  // 2. Build up a fake LexiconMergeOutputs
+  // This is some made up number that doesn't matter for this test.
+  uint32_t foo_main_tvi = 12;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foo_term_id,
+      term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN));
+  // The prefix hit should take the sum as term frequency - 256, capped at
+  // kMaxTermFrequency.
+  Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0,
+                      /*term_frequency=*/Hit::kMaxTermFrequency,
+                      /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
+
+  uint32_t foot_main_tvi = 5;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t foot_main_term_id,
+      term_id_codec_->EncodeTvi(foot_main_tvi, TviType::MAIN));
+  uint32_t fool_main_tvi = 10;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t fool_main_term_id,
+      term_id_codec_->EncodeTvi(fool_main_tvi, TviType::MAIN));
+
+  MainIndex::LexiconMergeOutputs lexicon_outputs;
+  // Map "fool" to it's prefix hit for "foo" and "foot" to it's prefix hit for
+  // "foo".
+  lexicon_outputs.other_tvi_to_prefix_main_tvis.emplace(fool_tvi,
+                                                        std::make_pair(0, 1));
+  lexicon_outputs.prefix_tvis_buf.push_back(foo_main_tvi);
+  lexicon_outputs.other_tvi_to_prefix_main_tvis.emplace(foot_tvi,
+                                                        std::make_pair(1, 1));
+  lexicon_outputs.prefix_tvis_buf.push_back(foo_main_tvi);
+  lexicon_outputs.other_tvi_to_main_tvi.emplace(foot_tvi, foot_main_tvi);
+  lexicon_outputs.other_tvi_to_main_tvi.emplace(fool_tvi, fool_main_tvi);
+
+  // 3. TranslateAndExpand should;
+  //   a. Translate lite term ids to main term ids based on the map
+  //   b. Expand 'foot' and 'fool' to have hits for 'foo'
+  //   c. Merge the prefix hits from 'foot' and 'fool', taking the sum as
+  //      term frequency.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
+      MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
+                                                  lexicon_outputs));
+  EXPECT_THAT(
+      expanded_term_id_hit_pairs,
+      UnorderedElementsAre(TermIdHitPair(foot_main_term_id, foot_doc0_hit),
+                           TermIdHitPair(fool_main_term_id, fool_doc0_hit),
+                           TermIdHitPair(foo_term_id, doc0_prefix_hit)));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
new file mode 100644
index 0000000..aae60c6
--- /dev/null
+++ b/icing/index/main/main-index.cc
@@ -0,0 +1,858 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "icing/index/main/main-index.h"
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <unordered_set>
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/index/term-property-id.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Finds the shortest,valid prefix term with prefix hits in lexicon for which
+// "prefix" is a prefix.
+// Returns a valid FindTermResult with found=true if either:
+//   1. prefix exists as a term in lexicon.
+//   2. the shortest, valid prefix in the lexicon exists and contains prefix
+//      hits.
+// Returns a FindTermResult with found=false and undefined values of tvi and
+// exact if no term was found.
+struct FindTermResult {
+  // TVI of the term that was found. Undefined if found=false.
+  uint32_t tvi;
+  // Whether or not a valid term with prefix hits was found.
+  bool found;
+  // Whether or not that term is equal to 'prefix'
+  bool exact;
+};
+FindTermResult FindShortestValidTermWithPrefixHits(
+    const IcingDynamicTrie* lexicon, const std::string& prefix) {
+  // For prefix indexing: when we are doing a prefix match for "prefix", find
+  // the tvi to the equivalent posting list. prefix's own posting list might not
+  // exist but one of its children acts as a proxy.
+  IcingDynamicTrie::PropertyReader hits_in_prefix_section(
+      *lexicon, GetHasHitsInPrefixSectionPropertyId());
+  uint32_t tvi = 0;
+  bool found = false;
+  bool exact = false;
+  for (IcingDynamicTrie::Iterator it(*lexicon, prefix.c_str()); it.IsValid();
+       it.Advance()) {
+    PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
+    memcpy(&posting_list_id, it.GetValue(), sizeof(posting_list_id));
+
+    // Posting list id might be invalid if this is also a backfill term.
+    // Suppose that the main index has two pre-existing prefix hits "foot" and
+    // "fool" - it will have a branch point posting list for "foo". Then, let's
+    // suppose that the other index adds hits for "foul", "four" and "far". This
+    // will result in branch points for "fo" and "f".
+    // If "fo" was added before "f", then the iterator would first give us "fo".
+    // "fo" will have an invalid posting_list_id because it hasn't been
+    // backfilled yet, so we need to continue iterating to "foo".
+    if (posting_list_id.is_valid()) {
+      exact = (prefix.size() == strlen(it.GetKey()));
+      tvi = it.GetValueIndex();
+      // Found it. Does it have prefix hits?
+      found = exact || hits_in_prefix_section.HasProperty(tvi);
+      break;
+    }
+  }
+  FindTermResult result = {tvi, found, exact};
+  return result;
+}
+
+std::string MakeFlashIndexFilename(const std::string& base_dir) {
+  return base_dir + "/main_index";
+}
+
+}  // namespace
+
+MainIndex::MainIndex(const std::string& index_directory,
+                     const Filesystem* filesystem,
+                     const IcingFilesystem* icing_filesystem)
+    : base_dir_(index_directory),
+      filesystem_(filesystem),
+      icing_filesystem_(icing_filesystem),
+      posting_list_hit_serializer_(
+          std::make_unique<PostingListHitSerializer>()) {}
+
+libtextclassifier3::StatusOr<std::unique_ptr<MainIndex>> MainIndex::Create(
+    const std::string& index_directory, const Filesystem* filesystem,
+    const IcingFilesystem* icing_filesystem) {
+  ICING_RETURN_ERROR_IF_NULL(filesystem);
+  ICING_RETURN_ERROR_IF_NULL(icing_filesystem);
+  std::unique_ptr<MainIndex> main_index(
+      new MainIndex(index_directory, filesystem, icing_filesystem));
+  ICING_RETURN_IF_ERROR(main_index->Init());
+  return main_index;
+}
+
+/* static */ libtextclassifier3::StatusOr<int> MainIndex::ReadFlashIndexMagic(
+    const Filesystem* filesystem, const std::string& index_directory) {
+  return FlashIndexStorage::ReadHeaderMagic(
+      filesystem, MakeFlashIndexFilename(index_directory));
+}
+
+// TODO(b/139087650) : Migrate off of IcingFilesystem.
+libtextclassifier3::Status MainIndex::Init() {
+  if (!filesystem_->CreateDirectoryRecursively(base_dir_.c_str())) {
+    return absl_ports::InternalError("Unable to create main index directory.");
+  }
+  std::string flash_index_file = MakeFlashIndexFilename(base_dir_);
+  ICING_ASSIGN_OR_RETURN(
+      FlashIndexStorage flash_index,
+      FlashIndexStorage::Create(flash_index_file, filesystem_,
+                                posting_list_hit_serializer_.get()));
+  flash_index_storage_ =
+      std::make_unique<FlashIndexStorage>(std::move(flash_index));
+
+  std::string lexicon_file = base_dir_ + "/main-lexicon";
+  IcingDynamicTrie::RuntimeOptions runtime_options;
+  main_lexicon_ = std::make_unique<IcingDynamicTrie>(
+      lexicon_file, runtime_options, icing_filesystem_);
+  IcingDynamicTrie::Options lexicon_options;
+  if (!main_lexicon_->CreateIfNotExist(lexicon_options) ||
+      !main_lexicon_->Init()) {
+    return absl_ports::InternalError("Failed to initialize lexicon trie");
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<int64_t> MainIndex::GetElementsSize() const {
+  IndexStorageInfoProto storage_info = GetStorageInfo(IndexStorageInfoProto());
+  if (storage_info.main_index_storage_size() == -1 ||
+      storage_info.main_index_lexicon_size() == -1) {
+    return absl_ports::AbortedError(
+        "Failed to get size of MainIndex's members.");
+  }
+  return storage_info.main_index_storage_size() +
+         storage_info.main_index_lexicon_size();
+}
+
+IndexStorageInfoProto MainIndex::GetStorageInfo(
+    IndexStorageInfoProto storage_info) const {
+  storage_info.set_main_index_lexicon_size(
+      IcingFilesystem::SanitizeFileSize(main_lexicon_->GetElementsSize()));
+  storage_info.set_main_index_storage_size(
+      Filesystem::SanitizeFileSize(flash_index_storage_->GetElementsSize()));
+  storage_info.set_main_index_block_size(flash_index_storage_->block_size());
+  storage_info.set_num_blocks(flash_index_storage_->num_blocks());
+  storage_info.set_min_free_fraction(flash_index_storage_->min_free_fraction());
+  return storage_info;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>>
+MainIndex::GetAccessorForExactTerm(const std::string& term) {
+  PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
+  if (!main_lexicon_->Find(term.c_str(), &posting_list_id)) {
+    return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+        "Term %s is not present in main lexicon.", term.c_str()));
+  }
+  return PostingListHitAccessor::CreateFromExisting(
+      flash_index_storage_.get(), posting_list_hit_serializer_.get(),
+      posting_list_id);
+}
+
+libtextclassifier3::StatusOr<MainIndex::GetPrefixAccessorResult>
+MainIndex::GetAccessorForPrefixTerm(const std::string& prefix) {
+  bool exact = false;
+  // For prefix indexing: when we are doing a prefix match for
+  // "prefix", find the tvi to the equivalent posting list. prefix's
+  // own posting list might not exist but its shortest child acts as a proxy.
+  //
+  // For example, if there are only two hits in the index are prefix hits for
+  // "bar" and "bat", then both will appear on a posting list for "ba". "b"
+  // won't have a posting list, but "ba" will suffice.
+  IcingDynamicTrie::PropertyReader hits_in_prefix_section(
+      *main_lexicon_, GetHasHitsInPrefixSectionPropertyId());
+  IcingDynamicTrie::Iterator main_itr(*main_lexicon_, prefix.c_str());
+  if (!main_itr.IsValid()) {
+    return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+        "Term: %s is not present in the main lexicon.", prefix.c_str()));
+  }
+  exact = (prefix.length() == strlen(main_itr.GetKey()));
+
+  if (!exact && !hits_in_prefix_section.HasProperty(main_itr.GetValueIndex())) {
+    // Found it, but it doesn't have prefix hits. Exit early. No need to
+    // retrieve the posting list because there's nothing there for us.
+    return absl_ports::NotFoundError("The term doesn't have any prefix hits.");
+  }
+  PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
+  memcpy(&posting_list_id, main_itr.GetValue(), sizeof(posting_list_id));
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<PostingListHitAccessor> pl_accessor,
+      PostingListHitAccessor::CreateFromExisting(
+          flash_index_storage_.get(), posting_list_hit_serializer_.get(),
+          posting_list_id));
+  return GetPrefixAccessorResult(std::move(pl_accessor), exact);
+}
+
+// TODO(tjbarron): Implement a method PropertyReadersAll.HasAnyProperty().
+bool IsTermInNamespaces(
+    const IcingDynamicTrie::PropertyReadersAll& property_reader,
+    uint32_t value_index, const std::vector<NamespaceId>& namespace_ids) {
+  if (namespace_ids.empty()) {
+    return true;
+  }
+  for (NamespaceId namespace_id : namespace_ids) {
+    if (property_reader.HasProperty(GetNamespacePropertyId(namespace_id),
+                                    value_index)) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+libtextclassifier3::StatusOr<std::vector<TermMetadata>>
+MainIndex::FindTermsByPrefix(
+    const std::string& prefix, TermMatchType::Code scoring_match_type,
+    SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+    const SuggestionResultChecker* suggestion_result_checker) {
+  // Finds all the terms that start with the given prefix in the lexicon.
+  IcingDynamicTrie::Iterator term_iterator(*main_lexicon_, prefix.c_str());
+
+  std::vector<TermMetadata> term_metadata_list;
+  while (term_iterator.IsValid()) {
+    int score = 0;
+    DocumentId last_document_id = kInvalidDocumentId;
+    bool is_last_document_in_desired = false;
+
+    PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
+    memcpy(&posting_list_id, term_iterator.GetValue(), sizeof(posting_list_id));
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<PostingListHitAccessor> pl_accessor,
+        PostingListHitAccessor::CreateFromExisting(
+            flash_index_storage_.get(), posting_list_hit_serializer_.get(),
+            posting_list_id));
+    ICING_ASSIGN_OR_RETURN(std::vector<Hit> hits,
+                           pl_accessor->GetNextHitsBatch());
+    while (!hits.empty()) {
+      for (const Hit& hit : hits) {
+        // Check whether this Hit is desired.
+        DocumentId document_id = hit.document_id();
+        bool is_new_document = document_id != last_document_id;
+        if (is_new_document) {
+          last_document_id = document_id;
+          is_last_document_in_desired =
+              suggestion_result_checker->BelongsToTargetResults(
+                  document_id, hit.section_id());
+        }
+        if (!is_last_document_in_desired) {
+          // The document is removed or expired or not belongs to target
+          // namespaces.
+          continue;
+        }
+        if (scoring_match_type == TermMatchType::EXACT_ONLY &&
+            hit.is_prefix_hit()) {
+          continue;
+        }
+
+        // Score the hit by the strategy
+        if (score_by ==
+            SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE) {
+          // Give 1 to all match terms and return them in arbitrary order
+          score = 1;
+          break;
+        } else if (score_by == SuggestionScoringSpecProto::
+                                   SuggestionRankingStrategy::DOCUMENT_COUNT &&
+                   is_new_document) {
+          ++score;
+        } else if (score_by == SuggestionScoringSpecProto::
+                                   SuggestionRankingStrategy::TERM_FREQUENCY) {
+          if (hit.has_term_frequency()) {
+            score += hit.term_frequency();
+          } else {
+            ++score;
+          }
+        }
+      }
+      if (score_by ==
+              SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE &&
+          score == 1) {
+        // The term is desired and no need to be scored.
+        break;
+      }
+      ICING_ASSIGN_OR_RETURN(hits, pl_accessor->GetNextHitsBatch());
+    }
+    if (score > 0) {
+      term_metadata_list.push_back(TermMetadata(term_iterator.GetKey(), score));
+    }
+
+    term_iterator.Advance();
+  }
+  return term_metadata_list;
+}
+
+libtextclassifier3::StatusOr<MainIndex::LexiconMergeOutputs>
+MainIndex::AddBackfillBranchPoints(const IcingDynamicTrie& other_lexicon) {
+  // Maps new branching points in main lexicon to the term such that
+  // branching_point_term is a prefix of term and there are no terms smaller
+  // than term and greater than branching_point_term.
+  std::string prefix;
+  LexiconMergeOutputs outputs;
+  for (IcingDynamicTrie::Iterator other_term_itr(other_lexicon, /*prefix=*/"");
+       other_term_itr.IsValid(); other_term_itr.Advance()) {
+    // If term were inserted in the main lexicon, what new branching would it
+    // create? (It always creates at most one.)
+    int prefix_len = main_lexicon_->FindNewBranchingPrefixLength(
+        other_term_itr.GetKey(), /*utf8=*/true);
+    if (prefix_len <= 0) {
+      continue;
+    }
+    prefix.assign(other_term_itr.GetKey(), prefix_len);
+
+    // Figure out backfill tvi. Might not exist since all children terms could
+    // only contain hits from non-prefix sections.
+    //
+    // Ex. Suppose that the main lexicon contains "foot" and "fool" and that
+    // we're adding "foul". The new branching prefix will be "fo". The backfill
+    // prefix will be "foo" - all hits in prefix section on "foo" will need to
+    // be added to the new "fo" posting list later.
+    FindTermResult result =
+        FindShortestValidTermWithPrefixHits(main_lexicon_.get(), prefix);
+    if (!result.found || result.exact) {
+      continue;
+    }
+
+    // This is a new prefix that will need backfilling from its next-in-line
+    // posting list. This new prefix will have to have a posting list eventually
+    // so insert a default PostingListIdentifier as a placeholder.
+    uint32_t branching_prefix_tvi;
+    bool new_key;
+    PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
+    libtextclassifier3::Status status = main_lexicon_->Insert(
+        prefix.c_str(), &posting_list_id, &branching_prefix_tvi,
+        /*replace=*/false, &new_key);
+    if (!status.ok()) {
+      ICING_LOG(DBG) << "Could not insert branching prefix\n"
+                     << status.error_message();
+      return status;
+    }
+
+    // Backfills only contain prefix hits by default. So set these here but
+    // could be overridden when adding hits from the other index later.
+    if (!main_lexicon_->SetProperty(branching_prefix_tvi,
+                                    GetHasNoExactHitsPropertyId()) ||
+        !main_lexicon_->SetProperty(branching_prefix_tvi,
+                                    GetHasHitsInPrefixSectionPropertyId())) {
+      return absl_ports::InternalError("Setting prefix prop failed");
+    }
+
+    outputs.backfill_map[branching_prefix_tvi] = result.tvi;
+  }
+  return outputs;
+}
+
+libtextclassifier3::StatusOr<MainIndex::LexiconMergeOutputs>
+MainIndex::AddTerms(const IcingDynamicTrie& other_lexicon,
+                    LexiconMergeOutputs&& outputs) {
+  IcingDynamicTrie::PropertyReadersAll new_term_prop_readers(other_lexicon);
+  for (IcingDynamicTrie::Iterator other_term_itr(other_lexicon, /*prefix=*/"");
+       other_term_itr.IsValid(); other_term_itr.Advance()) {
+    uint32_t new_main_tvi;
+    PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
+    libtextclassifier3::Status status = main_lexicon_->Insert(
+        other_term_itr.GetKey(), &posting_list_id, &new_main_tvi,
+        /*replace=*/false);
+    if (!status.ok()) {
+      ICING_LOG(DBG) << "Could not insert term: " << other_term_itr.GetKey()
+                     << "\n"
+                     << status.error_message();
+      return status;
+    }
+
+    // Copy the properties from the other lexicon over to the main lexicon.
+    uint32_t other_tvi = other_term_itr.GetValueIndex();
+    if (!CopyProperties(new_term_prop_readers, other_lexicon, other_tvi,
+                        new_main_tvi)) {
+      return absl_ports::InternalError(absl_ports::StrCat(
+          "Could not insert term: ", other_term_itr.GetKey()));
+    }
+
+    // Add other to main mapping.
+    outputs.other_tvi_to_main_tvi.emplace(other_tvi, new_main_tvi);
+
+    memcpy(&posting_list_id, main_lexicon_->GetValueAtIndex(new_main_tvi),
+           sizeof(posting_list_id));
+    if (posting_list_id.block_index() != kInvalidBlockIndex) {
+      outputs.main_tvi_to_block_index[new_main_tvi] =
+          posting_list_id.block_index();
+    }
+  }
+  return std::move(outputs);
+}
+
+libtextclassifier3::StatusOr<MainIndex::LexiconMergeOutputs>
+MainIndex::AddBranchPoints(const IcingDynamicTrie& other_lexicon,
+                           LexiconMergeOutputs&& outputs) {
+  IcingDynamicTrie::PropertyReader has_prefix_prop_reader(
+      other_lexicon, GetHasHitsInPrefixSectionPropertyId());
+  if (!has_prefix_prop_reader.Exists()) {
+    return std::move(outputs);
+  }
+  std::string prefix;
+  for (IcingDynamicTrie::Iterator other_term_itr(other_lexicon, /*prefix=*/"");
+       other_term_itr.IsValid(); other_term_itr.Advance()) {
+    // Only expand terms that have hits in prefix sections.
+    if (!has_prefix_prop_reader.HasProperty(other_term_itr.GetValueIndex())) {
+      continue;
+    }
+
+    // Get prefixes where there is already a branching point in the main
+    // lexicon. We skip prefixes which don't already have a branching point.
+    std::vector<int> prefix_lengths = main_lexicon_->FindBranchingPrefixLengths(
+        other_term_itr.GetKey(), /*utf8=*/true);
+
+    int buf_start = outputs.prefix_tvis_buf.size();
+    // Add prefixes.
+    for (int prefix_length : prefix_lengths) {
+      if (prefix_length <= 0) {
+        continue;
+      }
+
+      prefix.assign(other_term_itr.GetKey(), prefix_length);
+      uint32_t prefix_tvi;
+      bool new_key;
+      PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
+      libtextclassifier3::Status status =
+          main_lexicon_->Insert(prefix.c_str(), &posting_list_id, &prefix_tvi,
+                                /*replace=*/false, &new_key);
+      if (!status.ok()) {
+        ICING_LOG(DBG) << "Could not insert prefix: " << prefix << "\n"
+                       << status.error_message();
+        return status;
+      }
+
+      // Prefix tvi will have hits in prefix section.
+      if (!main_lexicon_->SetProperty(prefix_tvi,
+                                      GetHasHitsInPrefixSectionPropertyId())) {
+        return absl_ports::InternalError(
+            "Setting has hits in prefix section prop failed");
+      }
+
+      // If it hasn't been added by non-prefix term insertions in
+      // AddBackfillBranchPoints and AddTerms, it is a prefix-only term.
+      if (new_key && !main_lexicon_->SetProperty(
+                         prefix_tvi, GetHasNoExactHitsPropertyId())) {
+        return absl_ports::InternalError("Setting no exact hits prop failed");
+      }
+
+      outputs.prefix_tvis_buf.push_back(prefix_tvi);
+
+      memcpy(&posting_list_id, main_lexicon_->GetValueAtIndex(prefix_tvi),
+             sizeof(posting_list_id));
+      if (posting_list_id.block_index() != kInvalidBlockIndex) {
+        outputs.main_tvi_to_block_index[prefix_tvi] =
+            posting_list_id.block_index();
+      }
+    }
+
+    // Any prefixes added? Then add to map.
+    if (buf_start < outputs.prefix_tvis_buf.size()) {
+      outputs.other_tvi_to_prefix_main_tvis[other_term_itr.GetValueIndex()] = {
+          buf_start, outputs.prefix_tvis_buf.size() - buf_start};
+    }
+  }
+  return std::move(outputs);
+}
+
+bool MainIndex::CopyProperties(
+    const IcingDynamicTrie::PropertyReadersAll& prop_reader,
+    const IcingDynamicTrie& other_lexicon, uint32_t other_tvi,
+    uint32_t new_main_tvi) {
+  for (uint32_t property_id = 0; property_id < prop_reader.size();
+       ++property_id) {
+    if (property_id == GetHasNoExactHitsPropertyId()) {
+      // HasNoExactHitsProperty is an inverse. If other_lexicon has exact hits
+      // for this term, then HasNoExactHits needs to be set to false in
+      // main_lexicon. If other_lexicon has no exact hits for this term, then
+      // HasNoExactHits in the main_lexicon should not be modified.
+      if (!prop_reader.HasProperty(property_id, other_tvi) &&
+          !main_lexicon_->ClearProperty(new_main_tvi, property_id)) {
+        ICING_LOG(ERROR) << "Clearing HasNoExactHitsProperty failed";
+        return false;
+      }
+    } else {
+      // If other_lexicon has this property set for this term, then that
+      // property needs to be set for the main_lexicon. If other_lexicon
+      // doesn't have this property set, then the property in the main lexicon
+      // should not be modified.
+      if (prop_reader.HasProperty(property_id, other_tvi) &&
+          !main_lexicon_->SetProperty(new_main_tvi, property_id)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+libtextclassifier3::Status MainIndex::AddHits(
+    const TermIdCodec& term_id_codec,
+    std::unordered_map<uint32_t, uint32_t>&& backfill_map,
+    std::vector<TermIdHitPair>&& hits, DocumentId last_added_document_id) {
+  if (hits.empty()) {
+    flash_index_storage_->set_last_indexed_docid(last_added_document_id);
+    return libtextclassifier3::Status::OK;
+  }
+  uint32_t cur_term_id = hits[0].term_id();
+  ICING_ASSIGN_OR_RETURN(TermIdCodec::DecodedTermInfo cur_decoded_term,
+                         term_id_codec.DecodeTermInfo(cur_term_id));
+  // Iterate through all hits. If these hits are for a term that also needs
+  // backfill, then backfill first and then add the new hits.
+  size_t k_start = 0;
+  size_t k_end = 0;
+  while (k_start < hits.size()) {
+    uint32_t term_id = hits[k_end].term_id();
+    while (term_id == cur_term_id && ++k_end < hits.size()) {
+      term_id = hits[k_end].term_id();
+    }
+
+    // Look for backfill.
+    PostingListIdentifier backfill_posting_list_id =
+        PostingListIdentifier::kInvalid;
+    auto itr = backfill_map.find(cur_decoded_term.tvi);
+    if (itr != backfill_map.end()) {
+      const void* value = main_lexicon_->GetValueAtIndex(itr->second);
+      memcpy(&backfill_posting_list_id, value,
+             sizeof(backfill_posting_list_id));
+      backfill_map.erase(itr);
+    }
+    ICING_RETURN_IF_ERROR(AddHitsForTerm(cur_decoded_term.tvi,
+                                         backfill_posting_list_id,
+                                         &hits[k_start], k_end - k_start));
+    cur_term_id = term_id;
+    ICING_ASSIGN_OR_RETURN(cur_decoded_term,
+                           term_id_codec.DecodeTermInfo(cur_term_id));
+    k_start = k_end;
+  }
+
+  // Now copy remaining backfills.
+  ICING_VLOG(1) << "Remaining backfills " << backfill_map.size();
+  for (auto other_tvi_main_tvi_pair : backfill_map) {
+    PostingListIdentifier backfill_posting_list_id =
+        PostingListIdentifier::kInvalid;
+    memcpy(&backfill_posting_list_id,
+           main_lexicon_->GetValueAtIndex(other_tvi_main_tvi_pair.second),
+           sizeof(backfill_posting_list_id));
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<PostingListHitAccessor> hit_accum,
+        PostingListHitAccessor::Create(flash_index_storage_.get(),
+                                       posting_list_hit_serializer_.get()));
+    ICING_RETURN_IF_ERROR(
+        AddPrefixBackfillHits(backfill_posting_list_id, hit_accum.get()));
+    PostingListAccessor::FinalizeResult result =
+        std::move(*hit_accum).Finalize();
+    if (result.id.is_valid()) {
+      main_lexicon_->SetValueAtIndex(other_tvi_main_tvi_pair.first, &result.id);
+    }
+  }
+  flash_index_storage_->set_last_indexed_docid(last_added_document_id);
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status MainIndex::AddHitsForTerm(
+    uint32_t tvi, PostingListIdentifier backfill_posting_list_id,
+    const TermIdHitPair* hit_elements, size_t len) {
+  // 1. Create a PostingListHitAccessor - either from the pre-existing block, if
+  // one exists, or from scratch.
+  PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
+  memcpy(&posting_list_id, main_lexicon_->GetValueAtIndex(tvi),
+         sizeof(posting_list_id));
+  std::unique_ptr<PostingListHitAccessor> pl_accessor;
+  if (posting_list_id.is_valid()) {
+    if (posting_list_id.block_index() >= flash_index_storage_->num_blocks()) {
+      ICING_LOG(ERROR) << "Index dropped hits. Invalid block index "
+                       << posting_list_id.block_index()
+                       << " >= " << flash_index_storage_->num_blocks();
+      // TODO(b/159918304) : Consider revising the checksumming strategy in the
+      // main index. Providing some mechanism to check for corruption - either
+      // during initialization or some later time would allow us to avoid
+      // whack-a-mole with odd corruption issues like this one (b/62820689).
+      return absl_ports::InternalError(
+          "Valid posting list has an invalid block index!");
+    }
+    ICING_ASSIGN_OR_RETURN(
+        pl_accessor, PostingListHitAccessor::CreateFromExisting(
+                         flash_index_storage_.get(),
+                         posting_list_hit_serializer_.get(), posting_list_id));
+  } else {
+    // New posting list.
+    ICING_ASSIGN_OR_RETURN(
+        pl_accessor,
+        PostingListHitAccessor::Create(flash_index_storage_.get(),
+                                       posting_list_hit_serializer_.get()));
+  }
+
+  // 2. Backfill any hits if necessary.
+  if (backfill_posting_list_id.is_valid()) {
+    ICING_RETURN_IF_ERROR(
+        AddPrefixBackfillHits(backfill_posting_list_id, pl_accessor.get()));
+  }
+
+  // 3. Add all the new hits.
+  for (int i = len - 1; i >= 0; --i) {
+    Hit hit = hit_elements[i].hit();
+    ICING_RETURN_IF_ERROR(pl_accessor->PrependHit(hit));
+  }
+
+  // 4. Finalize this posting list and put its identifier in the lexicon.
+  PostingListAccessor::FinalizeResult result =
+      std::move(*pl_accessor).Finalize();
+  if (result.id.is_valid()) {
+    main_lexicon_->SetValueAtIndex(tvi, &result.id);
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status MainIndex::AddPrefixBackfillHits(
+    PostingListIdentifier backfill_posting_list_id,
+    PostingListHitAccessor* hit_accum) {
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<PostingListHitAccessor> backfill_accessor,
+      PostingListHitAccessor::CreateFromExisting(
+          flash_index_storage_.get(), posting_list_hit_serializer_.get(),
+          backfill_posting_list_id));
+  std::vector<Hit> backfill_hits;
+  ICING_ASSIGN_OR_RETURN(std::vector<Hit> tmp,
+                         backfill_accessor->GetNextHitsBatch());
+  while (!tmp.empty()) {
+    std::copy(tmp.begin(), tmp.end(), std::back_inserter(backfill_hits));
+    ICING_ASSIGN_OR_RETURN(tmp, backfill_accessor->GetNextHitsBatch());
+  }
+
+  Hit last_added_hit;
+  // The hits in backfill_hits are in the reverse order of how they were added.
+  // Iterate in reverse to add them to this new posting list in the correct
+  // order.
+  for (auto itr = backfill_hits.rbegin(); itr != backfill_hits.rend(); ++itr) {
+    const Hit& hit = *itr;
+    // Skip hits from non-prefix-enabled sections.
+    if (!hit.is_in_prefix_section()) {
+      continue;
+    }
+
+    // A backfill hit is a prefix hit in a prefix section.
+    const Hit backfill_hit(hit.section_id(), hit.document_id(),
+                           hit.term_frequency(),
+                           /*is_in_prefix_section=*/true,
+                           /*is_prefix_hit=*/true);
+    if (backfill_hit == last_added_hit) {
+      // Skip duplicate values due to overriding of the is_prefix flag.
+      continue;
+    }
+    last_added_hit = backfill_hit;
+    ICING_RETURN_IF_ERROR(hit_accum->PrependHit(backfill_hit));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+std::string MainIndex::GetDebugInfo(DebugInfoVerbosity::Code verbosity) const {
+  std::string res;
+
+  // Lexicon.
+  std::string lexicon_info;
+  main_lexicon_->GetDebugInfo(verbosity, &lexicon_info);
+
+  IcingStringUtil::SStringAppendF(&res, 0,
+                                  "last_added_document_id: %u\n"
+                                  "\n"
+                                  "main_lexicon_info:\n%s\n",
+                                  last_added_document_id(),
+                                  lexicon_info.c_str());
+
+  if (verbosity == DebugInfoVerbosity::BASIC) {
+    return res;
+  }
+
+  std::string flash_index_storage_info;
+  flash_index_storage_->GetDebugInfo(verbosity, &flash_index_storage_info);
+  IcingStringUtil::SStringAppendF(&res, 0, "flash_index_storage_info:\n%s\n",
+                                  flash_index_storage_info.c_str());
+  return res;
+}
+
+libtextclassifier3::Status MainIndex::Optimize(
+    const std::vector<DocumentId>& document_id_old_to_new) {
+  std::string temporary_index_dir_path = base_dir_ + "_temp";
+  if (!filesystem_->DeleteDirectoryRecursively(
+          temporary_index_dir_path.c_str())) {
+    ICING_LOG(ERROR) << "Recursively deleting " << temporary_index_dir_path;
+    return absl_ports::InternalError(
+        "Unable to delete temp directory to prepare to build new index.");
+  }
+
+  DestructibleDirectory temporary_index_dir(
+      filesystem_, std::move(temporary_index_dir_path));
+  if (!temporary_index_dir.is_valid()) {
+    return absl_ports::InternalError(
+        "Unable to create temp directory to build new index.");
+  }
+
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<MainIndex> new_index,
+                         MainIndex::Create(temporary_index_dir.dir(),
+                                           filesystem_, icing_filesystem_));
+  ICING_RETURN_IF_ERROR(TransferIndex(document_id_old_to_new, new_index.get()));
+  ICING_RETURN_IF_ERROR(new_index->PersistToDisk());
+  new_index = nullptr;
+  flash_index_storage_ = nullptr;
+  main_lexicon_ = nullptr;
+
+  if (!filesystem_->SwapFiles(temporary_index_dir.dir().c_str(),
+                              base_dir_.c_str())) {
+    return absl_ports::InternalError(
+        "Unable to apply new index due to failed swap!");
+  }
+
+  // Reinitialize the index so that flash_index_storage_ and main_lexicon_ are
+  // properly updated.
+  return Init();
+}
+
+libtextclassifier3::StatusOr<DocumentId> MainIndex::TransferAndAddHits(
+    const std::vector<DocumentId>& document_id_old_to_new, const char* term,
+    PostingListHitAccessor& old_pl_accessor, MainIndex* new_index) {
+  std::vector<Hit> new_hits;
+  bool has_no_exact_hits = true;
+  bool has_hits_in_prefix_section = false;
+  // The largest document id after translating hits.
+  DocumentId largest_document_id = kInvalidDocumentId;
+  ICING_ASSIGN_OR_RETURN(std::vector<Hit> tmp,
+                         old_pl_accessor.GetNextHitsBatch());
+  while (!tmp.empty()) {
+    for (const Hit& hit : tmp) {
+      // A safety check to add robustness to the codebase, so to make sure that
+      // we never access invalid memory, in case that hit from the posting list
+      // is corrupted.
+      if (hit.document_id() < 0 ||
+          hit.document_id() >= document_id_old_to_new.size()) {
+        continue;
+      }
+      DocumentId new_document_id = document_id_old_to_new[hit.document_id()];
+      // Transfer the document id of the hit, if the document is not deleted
+      // or outdated.
+      if (new_document_id != kInvalidDocumentId) {
+        if (hit.is_in_prefix_section()) {
+          has_hits_in_prefix_section = true;
+        }
+        if (!hit.is_prefix_hit()) {
+          has_no_exact_hits = false;
+        }
+        if (largest_document_id == kInvalidDocumentId ||
+            new_document_id > largest_document_id) {
+          largest_document_id = new_document_id;
+        }
+        new_hits.push_back(Hit::TranslateHit(hit, new_document_id));
+      }
+    }
+    ICING_ASSIGN_OR_RETURN(tmp, old_pl_accessor.GetNextHitsBatch());
+  }
+  // A term without exact hits indicates that it is a purely backfill term. If
+  // the term is not branching in the new trie, it means backfilling is no
+  // longer necessary, so that we can skip.
+  if (new_hits.empty() ||
+      (has_no_exact_hits && !new_index->main_lexicon_->IsBranchingTerm(term))) {
+    return largest_document_id;
+  }
+
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<PostingListHitAccessor> hit_accum,
+                         PostingListHitAccessor::Create(
+                             new_index->flash_index_storage_.get(),
+                             new_index->posting_list_hit_serializer_.get()));
+  for (auto itr = new_hits.rbegin(); itr != new_hits.rend(); ++itr) {
+    ICING_RETURN_IF_ERROR(hit_accum->PrependHit(*itr));
+  }
+  PostingListAccessor::FinalizeResult result = std::move(*hit_accum).Finalize();
+  if (!result.id.is_valid()) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to add translated hits for term: ", term));
+  }
+  uint32_t tvi;
+  libtextclassifier3::Status status =
+      new_index->main_lexicon_->Insert(term, &result.id, &tvi,
+                                       /*replace=*/false);
+  if (!status.ok()) {
+    ICING_LOG(DBG) << "Could not transfer main index for term: " << term << "\n"
+                   << status.error_message();
+    return status;
+  }
+  if (has_no_exact_hits && !new_index->main_lexicon_->SetProperty(
+                               tvi, GetHasNoExactHitsPropertyId())) {
+    return absl_ports::InternalError("Setting prefix prop failed");
+  }
+  if (has_hits_in_prefix_section &&
+      !new_index->main_lexicon_->SetProperty(
+          tvi, GetHasHitsInPrefixSectionPropertyId())) {
+    return absl_ports::InternalError("Setting prefix prop failed");
+  }
+  return largest_document_id;
+}
+
+libtextclassifier3::Status MainIndex::TransferIndex(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    MainIndex* new_index) {
+  DocumentId largest_document_id = kInvalidDocumentId;
+  for (IcingDynamicTrie::Iterator term_itr(*main_lexicon_, /*prefix=*/"",
+                                           /*reverse=*/true);
+       term_itr.IsValid(); term_itr.Advance()) {
+    PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
+    memcpy(&posting_list_id, term_itr.GetValue(), sizeof(posting_list_id));
+    if (posting_list_id == PostingListIdentifier::kInvalid) {
+      // Why?
+      ICING_LOG(ERROR)
+          << "Got invalid posting_list_id from previous main index";
+      continue;
+    }
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<PostingListHitAccessor> pl_accessor,
+        PostingListHitAccessor::CreateFromExisting(
+            flash_index_storage_.get(), posting_list_hit_serializer_.get(),
+            posting_list_id));
+    ICING_ASSIGN_OR_RETURN(
+        DocumentId curr_largest_document_id,
+        TransferAndAddHits(document_id_old_to_new, term_itr.GetKey(),
+                           *pl_accessor, new_index));
+    if (curr_largest_document_id == kInvalidDocumentId) {
+      continue;
+    }
+    if (largest_document_id == kInvalidDocumentId ||
+        curr_largest_document_id > largest_document_id) {
+      largest_document_id = curr_largest_document_id;
+    }
+  }
+  new_index->flash_index_storage_->set_last_indexed_docid(largest_document_id);
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h
new file mode 100644
index 0000000..9e570d5
--- /dev/null
+++ b/icing/index/main/main-index.h
@@ -0,0 +1,350 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_MAIN_MAIN_INDEX_H_
+#define ICING_INDEX_MAIN_MAIN_INDEX_H_
+
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/index/main/posting-list-hit-accessor.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/index/term-metadata.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/suggestion-result-checker.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+class MainIndex {
+ public:
+  // RETURNS:
+  //  - valid instance of MainIndex, on success.
+  //  - INTERNAL error if unable to create the lexicon or flash storage.
+  static libtextclassifier3::StatusOr<std::unique_ptr<MainIndex>> Create(
+      const std::string& index_directory, const Filesystem* filesystem,
+      const IcingFilesystem* icing_filesystem);
+
+  // Reads magic from existing flash index storage file header. We need this
+  // during Icing initialization phase to determine the version.
+  //
+  // RETURNS:
+  //   - On success, a valid magic.
+  //   - NOT_FOUND if the flash index doesn't exist.
+  //   - INTERNAL on I/O error.
+  static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic(
+      const Filesystem* filesystem, const std::string& index_directory);
+
+  // Get a PostingListHitAccessor that holds the posting list chain for 'term'.
+  //
+  // RETURNS:
+  //  - On success, a valid PostingListHitAccessor
+  //  - NOT_FOUND if term is not present in the main index.
+  libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>>
+  GetAccessorForExactTerm(const std::string& term);
+
+  // Get a PostingListHitAccessor for 'prefix'.
+  //
+  // RETURNS:
+  //  - On success, a result containing a valid PostingListHitAccessor.
+  //  - NOT_FOUND if neither 'prefix' nor any terms for which 'prefix' is a
+  //    prefix are present in the main index.
+  struct GetPrefixAccessorResult {
+    // A PostingListHitAccessor that holds the posting list chain for the term
+    // that best represents 'prefix' in the main index.
+    std::unique_ptr<PostingListHitAccessor> accessor;
+    // True if the returned posting list chain is for 'prefix' or false if the
+    // returned posting list chain is for a term for which 'prefix' is a prefix.
+    bool exact;
+
+    explicit GetPrefixAccessorResult(
+        std::unique_ptr<PostingListHitAccessor> accessor_in, bool exact_in)
+        : accessor(std::move(accessor_in)), exact(exact_in) {}
+  };
+  libtextclassifier3::StatusOr<GetPrefixAccessorResult>
+  GetAccessorForPrefixTerm(const std::string& prefix);
+
+  // Finds terms with the given prefix in the given result checker. The
+  // input prefix must be normalized, otherwise inaccurate results may be
+  // returned. If scoring_match_type is EXACT, only exact hit will be counted
+  // and it is PREFIX, both prefix and exact hits will be counted. Results are
+  // not sorted specifically and are in lexigraphical order. Number of results
+  // are no more than 'num_to_return'.
+  //
+  // Returns:
+  //   A list of TermMetadata on success
+  //   INTERNAL_ERROR if failed to access term data.
+  libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindTermsByPrefix(
+      const std::string& prefix, TermMatchType::Code scoring_match_type,
+      SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+      const SuggestionResultChecker* suggestion_result_checker);
+
+  struct LexiconMergeOutputs {
+    // Maps from main_lexicon tvi for new branching point to the main_lexicon
+    // tvi for posting list whose hits must be backfilled.
+    std::unordered_map<uint32_t, uint32_t> backfill_map;
+
+    // Maps from lexicon tvis to main_lexicon tvis.
+    std::unordered_map<uint32_t, uint32_t> other_tvi_to_main_tvi;
+
+    // Maps from main lexicon tvi to the block index. Tvis with no entry do not
+    // have an allocated posting list.
+    std::unordered_map<uint32_t, int> main_tvi_to_block_index;
+
+    // Maps from the lexicon tvi to the beginning position in
+    // prefix_tvis_buf and the length.
+    std::unordered_map<uint32_t, std::pair<int, int>>
+        other_tvi_to_prefix_main_tvis;
+
+    // Stores tvis that are mapped to by other_tvi_to_prefix_tvis.
+    std::vector<uint32_t> prefix_tvis_buf;
+  };
+
+  // Merge the lexicon into the main lexicon and populate the data
+  // structures necessary to translate lite tvis to main tvis, track backfilling
+  // and expanding lite terms to prefix terms.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL on IO error while writing to the main lexicon.
+  libtextclassifier3::StatusOr<LexiconMergeOutputs> MergeLexicon(
+      const IcingDynamicTrie& other_lexicon) {
+    // Backfill branch points need to be added first so that the backfill_map
+    // can be correctly populated.
+    ICING_ASSIGN_OR_RETURN(LexiconMergeOutputs outputs,
+                           AddBackfillBranchPoints(other_lexicon));
+    ICING_ASSIGN_OR_RETURN(outputs,
+                           AddTerms(other_lexicon, std::move(outputs)));
+    // Non-backfill branch points need to be added last so that the mapping of
+    // newly added terms to prefix terms can be correctly populated (prefix
+    // terms might be branch points between two new terms or between a
+    // pre-existing term and a new term).
+    ICING_ASSIGN_OR_RETURN(outputs,
+                           AddBranchPoints(other_lexicon, std::move(outputs)));
+    return outputs;
+  }
+
+  // Add hits to the main index and backfill from existing posting lists to new
+  // backfill branch points.
+  //
+  // The backfill_map maps from main_lexicon tvi for a newly added branching
+  // point to the main_lexicon tvi for the posting list whose hits must be
+  // backfilled. backfill_map should be populated as part of LexiconMergeOutputs
+  // in MergeLexicon and be blindly passed to this function.
+  //
+  // RETURNS:
+  //  - OK on success
+  //  - INVALID_ARGUMENT if one of the elements in the lite index has a term_id
+  //  exceeds the max TermId, is not valid or is not less than pre-existing hits
+  //  in the main index.
+  //  - INTERNAL_ERROR if unable to mmap necessary IndexBlocks
+  //  - RESOURCE_EXHAUSTED error if unable to grow the index
+  libtextclassifier3::Status AddHits(
+      const TermIdCodec& term_id_codec,
+      std::unordered_map<uint32_t, uint32_t>&& backfill_map,
+      std::vector<TermIdHitPair>&& hits, DocumentId last_added_document_id);
+
+  libtextclassifier3::Status PersistToDisk() {
+    if (main_lexicon_->Sync() && flash_index_storage_->PersistToDisk()) {
+      return libtextclassifier3::Status::OK;
+    }
+    return absl_ports::InternalError("Unable to sync main index components.");
+  }
+
+  DocumentId last_added_document_id() const {
+    return flash_index_storage_->get_last_indexed_docid();
+  }
+
+  libtextclassifier3::Status Reset() {
+    ICING_RETURN_IF_ERROR(flash_index_storage_->Reset());
+    main_lexicon_->Clear();
+    return libtextclassifier3::Status::OK;
+  }
+
+  void Warm() { main_lexicon_->Warm(); }
+
+  // Returns:
+  //  - elements size of lexicon and index, on success
+  //  - INTERNAL on IO error
+  libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+
+  // Takes the provided storage_info, populates the fields related to the main
+  // index and returns that storage_info.
+  //
+  // If an IO error occurs while trying to calculate the value for a field, then
+  // that field will be set to -1.
+  IndexStorageInfoProto GetStorageInfo(
+      IndexStorageInfoProto storage_info) const;
+
+  // Returns debug information for the main index in out.
+  // verbosity = BASIC, simplest debug information - just the lexicon
+  // verbosity = DETAILED, more detailed debug information including raw
+  // postings lists.
+  std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity) const;
+
+  // Reduces internal file sizes by reclaiming space of deleted documents.
+  //
+  // This method will update the last_added_docid of the index to the largest
+  // document id that still appears in the index after compaction.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error, this indicates that the index may be in an
+  //                               invalid state and should be cleared.
+  libtextclassifier3::Status Optimize(
+      const std::vector<DocumentId>& document_id_old_to_new);
+
+ private:
+  explicit MainIndex(const std::string& index_directory,
+                     const Filesystem* filesystem,
+                     const IcingFilesystem* icing_filesystem);
+
+  libtextclassifier3::Status Init();
+
+  // Helpers for merging the lexicon
+  // Add all 'backfill' branch points. Backfill branch points are prefix
+  // branch points that are a prefix of terms that existed in the lexicon
+  // to the merge.
+  //
+  // For example, if the main lexicon only contains "foot" and is then merged
+  // with a lite lexicon containing only "fool", then a backfill branch point
+  // for "foo" will be added to contain prefix hits from both the pre-existing
+  // posting list for "foot" and the new posting list for "fool".
+  //
+  // Populates LexiconMergeOutputs.backfill_map
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL on IO error while writing to the main lexicon.
+  libtextclassifier3::StatusOr<LexiconMergeOutputs> AddBackfillBranchPoints(
+      const IcingDynamicTrie& other_lexicon);
+
+  // Add all terms from the lexicon.
+  //
+  // Populates LexiconMergeOutputs.other_tvi_to_main_tvi
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL on IO error while writing to the main lexicon.
+  libtextclassifier3::StatusOr<LexiconMergeOutputs> AddTerms(
+      const IcingDynamicTrie& other_lexicon, LexiconMergeOutputs&& outputs);
+
+  // Add all branch points for terms added from the lexicon.
+  // For example, if the main lexicon is empty and is then merged with a
+  // lexicon containing only "foot" and "fool", then a branch point for "foo"
+  // will be added to contain prefix hits from both "foot" and "fool".
+  //
+  // Populates LexiconMergeOutputs.other_tvi_to_prefix_main_tvis and
+  // LexiconMergeOutputs.prefix_tvis_buf;
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL on IO error while writing to the main lexicon.
+  libtextclassifier3::StatusOr<LexiconMergeOutputs> AddBranchPoints(
+      const IcingDynamicTrie& other_lexicon, LexiconMergeOutputs&& outputs);
+
+  // Copies all properties from old_tvi in the other lexicon to the new_tvi in
+  // the main lexicon.
+  // Returns true on success, false if an IO error is encountered.
+  bool CopyProperties(const IcingDynamicTrie::PropertyReadersAll& prop_reader,
+                      const IcingDynamicTrie& other_lexicon, uint32_t other_tvi,
+                      uint32_t new_main_tvi);
+
+  // Add all hits between [hit_elements, hit_elements + len) to main_index,
+  // updating the entry in the main lexicon at trie_value_index to point to the
+  // resulting posting list. Hits are sorted in descending document id order, so
+  // they should be to posting lists in reverse (starting at hit_elements
+  // + len - 1) and working backwards. Therefore, hit_elements must be in sorted
+  // order.
+  //
+  // trie_value_index may point to a valid posting list id if there is a
+  // pre-existing posting list to append to.
+  //
+  // If backfill_posting_list_id is valid, then the hits from the posting list
+  // identified by backfill_posting_list_id should be added to the new posting
+  // list before the hits in hit_elements.
+  //
+  // RETURNS:
+  //  - OK on success
+  //  - INVALID_ARGUMENT if posting_list_id stored at trie_value_index is valid
+  //  but points out of bounds in the IndexBlock referred to by
+  //  id.block_index(), if one of the hits from [hit_elements,hit_elements+len)
+  //  is not valid, or if one of the hits from [hit_elements,hit_elements+len)
+  //  is not less than the previously added hits.
+  //  - INTERNAL_ERROR if posting_list_id stored at trie_value_index is valid
+  //  but points to an invalid block index or if unable to mmap the IndexBlock.
+  //  - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
+  //  posting list.
+  libtextclassifier3::Status AddHitsForTerm(
+      uint32_t tvi, PostingListIdentifier backfill_posting_list_id,
+      const TermIdHitPair* hit_elements, size_t len);
+
+  // Adds all prefix hits or hits from prefix sections present on the posting
+  // list identified by backfill_posting_list_id to hit_accum.
+  //
+  // RETURNS:
+  //  - OK, on success
+  //  - INVALID_ARGUMENT if backfill_posting_list_id points out of bounds in the
+  //  IndexBlock referred to by id.block_index()
+  //  - INTERNAL_ERROR if unable to mmap the block identified by
+  //  backfill_posting_list_id or if the posting list identified by
+  //  backfill_posting_list_id has been corrupted.
+  //  - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
+  //  posting list.
+  libtextclassifier3::Status AddPrefixBackfillHits(
+      PostingListIdentifier backfill_posting_list_id,
+      PostingListHitAccessor* hit_accum);
+
+  // Transfer hits from old_pl_accessor to new_index for term.
+  //
+  // Returns:
+  //   largest document id added to the translated posting list, on success
+  //   INTERNAL_ERROR on IO error
+  static libtextclassifier3::StatusOr<DocumentId> TransferAndAddHits(
+      const std::vector<DocumentId>& document_id_old_to_new, const char* term,
+      PostingListHitAccessor& old_pl_accessor, MainIndex* new_index);
+
+  // Transfer hits from the current main index to new_index.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status TransferIndex(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      MainIndex* new_index);
+
+  std::string base_dir_;
+  const Filesystem* filesystem_;
+  const IcingFilesystem* icing_filesystem_;
+  std::unique_ptr<PostingListHitSerializer> posting_list_hit_serializer_;
+  std::unique_ptr<FlashIndexStorage> flash_index_storage_;
+  std::unique_ptr<IcingDynamicTrie> main_lexicon_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_MAIN_MAIN_INDEX_H_
diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc
new file mode 100644
index 0000000..fa96e6c
--- /dev/null
+++ b/icing/index/main/main-index_test.cc
@@ -0,0 +1,710 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/main/main-index.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/index/main/doc-hit-info-iterator-term-main.h"
+#include "icing/index/main/main-index-merger.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/index/term-property-id.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/schema/section.h"
+#include "icing/store/namespace-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::NiceMock;
+using ::testing::Return;
+using ::testing::SizeIs;
+
+std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
+  std::vector<DocHitInfo> infos;
+  while (iterator->Advance().ok()) {
+    infos.push_back(iterator->doc_hit_info());
+  }
+  return infos;
+}
+
+std::vector<DocHitInfo> GetExactHits(
+    MainIndex* main_index, int term_start_index, int unnormalized_term_length,
+    const std::string& term, SectionIdMask section_mask = kSectionIdMaskAll) {
+  auto iterator = std::make_unique<DocHitInfoIteratorTermMainExact>(
+      main_index, term, term_start_index, unnormalized_term_length,
+      section_mask, /*need_hit_term_frequency=*/true);
+  return GetHits(std::move(iterator));
+}
+
+std::vector<DocHitInfo> GetPrefixHits(
+    MainIndex* main_index, int term_start_index, int unnormalized_term_length,
+    const std::string& term, SectionIdMask section_mask = kSectionIdMaskAll) {
+  auto iterator = std::make_unique<DocHitInfoIteratorTermMainPrefix>(
+      main_index, term, term_start_index, unnormalized_term_length,
+      section_mask, /*need_hit_term_frequency=*/true);
+  return GetHits(std::move(iterator));
+}
+
+libtextclassifier3::Status Merge(const LiteIndex& lite_index,
+                                 const TermIdCodec& term_id_codec,
+                                 MainIndex* main_index) {
+  ICING_ASSIGN_OR_RETURN(MainIndex::LexiconMergeOutputs outputs,
+                         main_index->MergeLexicon(lite_index.lexicon()));
+  ICING_ASSIGN_OR_RETURN(std::vector<TermIdHitPair> term_id_hit_pairs,
+                         MainIndexMerger::TranslateAndExpandLiteHits(
+                             lite_index, term_id_codec, outputs));
+  return main_index->AddHits(term_id_codec, std::move(outputs.backfill_map),
+                             std::move(term_id_hit_pairs),
+                             lite_index.last_added_document_id());
+}
+
+class MainIndexTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    index_dir_ = GetTestTempDir() + "/test_dir";
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(index_dir_.c_str()));
+
+    std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+    LiteIndex::Options options(lite_index_file_name,
+                               /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+                               /*hit_buffer_sort_at_indexing=*/true,
+                               /*hit_buffer_sort_threshold_bytes=*/1024 * 8);
+    ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
+                               LiteIndex::Create(options, &icing_filesystem_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        term_id_codec_,
+        TermIdCodec::Create(
+            IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+            IcingDynamicTrie::max_value_index(options.lexicon_options)));
+  }
+
+  void TearDown() override {
+    term_id_codec_.reset();
+    lite_index_.reset();
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
+  }
+
+  std::string index_dir_;
+  Filesystem filesystem_;
+  IcingFilesystem icing_filesystem_;
+  std::unique_ptr<LiteIndex> lite_index_;
+  std::unique_ptr<TermIdCodec> term_id_codec_;
+};
+
+constexpr NamespaceId kNamespace0 = 0;
+
+TEST_F(MainIndexTest, MainIndexCreateIOFailure) {
+  // Create the index with mock filesystem. By default, Mock will return false,
+  // so the first attempted file operation will fail.
+  NiceMock<IcingMockFilesystem> mock_icing_filesystem;
+  ON_CALL(mock_icing_filesystem, CreateDirectoryRecursively)
+      .WillByDefault(Return(false));
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  EXPECT_THAT(MainIndex::Create(main_index_file_name, &filesystem_,
+                                &mock_icing_filesystem),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(MainIndexTest, MainIndexGetAccessorForPrefixTermNotFound) {
+  // Create the main index. It should have no entries in its lexicon.
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(main_index_file_name, &filesystem_,
+                        &icing_filesystem_));
+  EXPECT_THAT(main_index->GetAccessorForPrefixTerm("foo"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(MainIndexTest, MainIndexGetAccessorForPrefixReturnsValidAccessor) {
+  // 1. Index one doc in the Lite Index:
+  // - Doc0 {"foot" is_in_prefix_section=true}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+  Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
+
+  // 2. Create the main index. It should have no entries in its lexicon.
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(main_index_file_name, &filesystem_,
+                        &icing_filesystem_));
+
+  // 3. Merge the index. The main index should contain "foo".
+  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+  // GetAccessorForPrefixTerm should return a valid accessor for "foo".
+  EXPECT_THAT(main_index->GetAccessorForPrefixTerm("foo"), IsOk());
+}
+
+TEST_F(MainIndexTest, MainIndexGetAccessorForPrefixReturnsNotFound) {
+  // 1. Index one doc in the Lite Index:
+  // - Doc0 {"foot" is_in_prefix_section=false}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::EXACT_ONLY, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+  Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
+
+  // 2. Create the main index. It should have no entries in its lexicon.
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(main_index_file_name, &filesystem_,
+                        &icing_filesystem_));
+
+  // 3. Merge the index. The main index should return not found when we search
+  // prefix contain "foo".
+  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+  // GetAccessorForPrefixTerm should return a valid accessor for "foo".
+  EXPECT_THAT(main_index->GetAccessorForPrefixTerm("foo"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(MainIndexTest, MainIndexGetAccessorForExactTermNotFound) {
+  // Create the main index. It should have no entries in its lexicon.
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(main_index_file_name, &filesystem_,
+                        &icing_filesystem_));
+  EXPECT_THAT(main_index->GetAccessorForExactTerm("foo"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(MainIndexTest, MainIndexGetAccessorForExactReturnsValidAccessor) {
+  // 1. Index one doc in the Lite Index:
+  // - Doc0 {"foo" is_in_prefix_section=false}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index_->InsertTerm("foo", TermMatchType::EXACT_ONLY, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+  Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
+
+  // 2. Create the main index. It should have no entries in its lexicon.
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(main_index_file_name, &filesystem_,
+                        &icing_filesystem_));
+
+  // 3. Merge the index. The main index should contain "foo".
+  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+  // GetAccessorForPrefixTerm should return a valid accessor for "foo".
+  EXPECT_THAT(main_index->GetAccessorForExactTerm("foo"), IsOk());
+}
+
+TEST_F(MainIndexTest, MergeIndexToEmpty) {
+  // 1. Index three docs in the Lite Index:
+  // - Doc0 {"foot", "fool", "far" is_in_prefix_section=false}
+  // - Doc1 {"foot", "fool" is_in_prefix_section=true}
+  // - Doc2 {"fool", "far" is_in_prefix_section=false}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tvi, lite_index_->InsertTerm("fool", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t fool_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tvi,
+      lite_index_->InsertTerm("far", TermMatchType::EXACT_ONLY, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t far_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+  Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
+  ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
+  ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc0_hit));
+
+  Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
+  ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
+
+  Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc2_hit));
+  ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc2_hit));
+
+  // 2. Create the main index. It should have no entries in its lexicon.
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(main_index_file_name, &filesystem_,
+                        &icing_filesystem_));
+
+  std::vector<DocHitInfo> hits =
+      GetExactHits(main_index.get(), /*term_start_index=*/0,
+                   /*unnormalized_term_length=*/0, "foot");
+  EXPECT_THAT(hits, IsEmpty());
+  hits = GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+                       /*unnormalized_term_length=*/0, "fo");
+  EXPECT_THAT(hits, IsEmpty());
+
+  // 3. Merge the index. The main index should contain "fool", "foot"
+  // and "far" as well as a branch points for "foo" and "f". "fa" and "fo"
+  // should not be present because it is not a branch point.
+  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+  // Get hits from an exact posting list.
+  hits = GetExactHits(main_index.get(), /*term_start_index=*/0,
+                      /*unnormalized_term_length=*/0, "foot");
+  // We should get hits for "foot" in doc1 and doc0
+  EXPECT_THAT(
+      hits,
+      ElementsAre(
+          EqualsDocHitInfo(doc1_hit.document_id(),
+                           std::vector<SectionId>{doc1_hit.section_id()}),
+          EqualsDocHitInfo(doc0_hit.document_id(),
+                           std::vector<SectionId>{doc0_hit.section_id()})));
+
+  // Get hits from a branching point posting list. "fo" should redirect to "foo"
+  hits = GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+                       /*unnormalized_term_length=*/0, "fo");
+  // We should get hits for "foot" in doc1 and "fool" in doc1. We shouldn't get
+  // the hits for "foot" in doc0 and "fool" in doc0 and doc2 because they
+  // weren't hits in prefix sections.
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
+                        doc1_hit.document_id(),
+                        std::vector<SectionId>{doc1_hit.section_id()})));
+}
+
+TEST_F(MainIndexTest, MergeIndexToPreexisting) {
+  // 1. Index three docs in the Lite Index:
+  // - Doc0 {"foot", "fool", "far" is_in_prefix_section=false}
+  // - Doc1 {"foot", "fool" is_in_prefix_section=true}
+  // - Doc2 {"fool", "far" is_in_prefix_section=false}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tvi, lite_index_->InsertTerm("fool", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t fool_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tvi,
+      lite_index_->InsertTerm("far", TermMatchType::EXACT_ONLY, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t far_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+  Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
+  ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
+  ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc0_hit));
+
+  Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
+  ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
+
+  Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc2_hit));
+  ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc2_hit));
+
+  // 2. Create the main index. It should have no entries in its lexicon.
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(main_index_file_name, &filesystem_,
+                        &icing_filesystem_));
+
+  // 3. Merge the index. The main index should contain "fool", "foot"
+  // and "far" as well as a branch points for "foo" and "f". "fa" and "fo"
+  // should not be present because it is not a branch point.
+  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+
+  // 4. Index two docs in a new Lite Index:
+  // - Doc3 {"foot", "four", "foul", "fall" is_in_prefix_section=false}
+  // - Doc4 {"four", "foul" is_in_prefix_section=true}
+  std::string lite_index_file_name2 = index_dir_ + "/test_file.lite-idx.index2";
+  LiteIndex::Options options(lite_index_file_name2,
+                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+                             /*hit_buffer_sort_at_indexing=*/true,
+                             /*hit_buffer_sort_threshold_bytes=*/1024 * 8);
+  ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
+                             LiteIndex::Create(options, &icing_filesystem_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::EXACT_ONLY, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tvi, lite_index_->InsertTerm("four", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t four_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tvi, lite_index_->InsertTerm("foul", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foul_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tvi,
+      lite_index_->InsertTerm("fall", TermMatchType::EXACT_ONLY, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t fall_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+  Hit doc3_hit(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc3_hit));
+  ICING_ASSERT_OK(lite_index_->AddHit(four_term_id, doc3_hit));
+  ICING_ASSERT_OK(lite_index_->AddHit(foul_term_id, doc3_hit));
+  ICING_ASSERT_OK(lite_index_->AddHit(fall_term_id, doc3_hit));
+
+  Hit doc4_hit(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(four_term_id, doc4_hit));
+  ICING_ASSERT_OK(lite_index_->AddHit(foul_term_id, doc4_hit));
+
+  // 3. Merge the index. The main index should now contain "foul", "four"
+  // and "fall", a branch points for "fou" and backfill points for "fo".
+  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+  // Get hits from an exact posting list the existed before the merge.
+  std::vector<DocHitInfo> hits =
+      GetExactHits(main_index.get(), /*term_start_index=*/0,
+                   /*unnormalized_term_length=*/0, "foot");
+
+  // We should get hits for "foot" in doc3, doc1 and doc0
+  EXPECT_THAT(
+      hits,
+      ElementsAre(
+          EqualsDocHitInfo(doc3_hit.document_id(),
+                           std::vector<SectionId>{doc3_hit.section_id()}),
+          EqualsDocHitInfo(doc1_hit.document_id(),
+                           std::vector<SectionId>{doc1_hit.section_id()}),
+          EqualsDocHitInfo(doc0_hit.document_id(),
+                           std::vector<SectionId>{doc0_hit.section_id()})));
+  // Get hits from backfill posting list.
+  hits = GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+                       /*unnormalized_term_length=*/0, "fo");
+  // We should get hits for "four" and "foul" in doc4 and hits for "foot" and
+  // "fool" in doc1. We shouldn't get the hits for "foot" in doc0 and doc3,
+  // "fool" in doc0 and doc2 or the hits for "four" and "foul" in doc4 because
+  // they weren't hits in prefix sections.
+  EXPECT_THAT(
+      hits,
+      ElementsAre(
+          EqualsDocHitInfo(doc4_hit.document_id(),
+                           std::vector<SectionId>{doc4_hit.section_id()}),
+          EqualsDocHitInfo(doc1_hit.document_id(),
+                           std::vector<SectionId>{doc1_hit.section_id()})));
+}
+
+TEST_F(MainIndexTest, ExactRetrievedInPrefixSearch) {
+  // 1. Index two docs in the Lite Index:
+  // - Doc0 {"foot" is_in_prefix_section=true}
+  // - Doc1 {"foo" is_in_prefix_section=false}
+  // - Doc2 {"foot" is_in_prefix_section=false}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tvi,
+      lite_index_->InsertTerm("foo", TermMatchType::EXACT_ONLY, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+  Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
+
+  Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit));
+
+  Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc2_hit));
+
+  // 2. Create the main index. It should have no entries in its lexicon.
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(main_index_file_name, &filesystem_,
+                        &icing_filesystem_));
+
+  // 3. Merge the lite lexicon. The main lexicon should contain "foot" and
+  // "foo".
+  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+  std::vector<DocHitInfo> hits =
+      GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+                    /*unnormalized_term_length=*/0, "foo");
+  // We should get hits for "foo" in doc1 and doc0, but not in doc2 because it
+  // is not a prefix hit.
+  EXPECT_THAT(
+      hits,
+      ElementsAre(
+          EqualsDocHitInfo(doc1_hit.document_id(),
+                           std::vector<SectionId>{doc1_hit.section_id()}),
+          EqualsDocHitInfo(doc0_hit.document_id(),
+                           std::vector<SectionId>{doc0_hit.section_id()})));
+}
+
+TEST_F(MainIndexTest, PrefixNotRetrievedInExactSearch) {
+  // 1. Index two docs in the Lite Index:
+  // - Doc0 {"foot" is_in_prefix_section=true}
+  // - Doc1 {"foo" is_in_prefix_section=false}
+  // - Doc1 {"foo" is_in_prefix_section=true}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tvi, lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+  Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
+
+  Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit));
+
+  Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc2_hit));
+
+  // 2. Create the main index. It should have no entries in its lexicon.
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(main_index_file_name, &filesystem_,
+                        &icing_filesystem_));
+
+  // 3. Merge the lite lexicon. The main lexicon should contain "foot" and
+  // "foo".
+  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+  std::vector<DocHitInfo> hits =
+      GetExactHits(main_index.get(), /*term_start_index=*/0,
+                   /*unnormalized_term_length=*/0, "foo");
+
+  // We should get hits for "foo" in doc2 and doc1, but not in doc0 because it
+  // is not an exact hit.
+  EXPECT_THAT(
+      hits,
+      ElementsAre(
+          EqualsDocHitInfo(doc2_hit.document_id(),
+                           std::vector<SectionId>{doc2_hit.section_id()}),
+          EqualsDocHitInfo(doc1_hit.document_id(),
+                           std::vector<SectionId>{doc1_hit.section_id()})));
+}
+
+TEST_F(MainIndexTest,
+       SearchChainedPostingListsShouldMergeSectionsAndTermFrequency) {
+  // Index 2048 document with 3 hits in each document. When merged into the main
+  // index, this will 1) lead to a chained posting list and 2) split at least
+  // one document's hits across multiple posting lists.
+  const std::string term = "foot";
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index_->InsertTerm(term, TermMatchType::EXACT_ONLY, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+  for (DocumentId document_id = 0; document_id < 2048; ++document_id) {
+    Hit::TermFrequency term_frequency = static_cast<Hit::TermFrequency>(
+        document_id % Hit::kMaxTermFrequency + 1);
+    Hit doc_hit0(
+        /*section_id=*/0, /*document_id=*/document_id, term_frequency,
+        /*is_in_prefix_section=*/false);
+    ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit0));
+
+    Hit doc_hit1(
+        /*section_id=*/1, /*document_id=*/document_id, term_frequency,
+        /*is_in_prefix_section=*/false);
+    ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit1));
+
+    Hit doc_hit2(
+        /*section_id=*/2, /*document_id=*/document_id, term_frequency,
+        /*is_in_prefix_section=*/false);
+    ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit2));
+  }
+
+  // 2. Create the main index. It should have no entries in its lexicon.
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(main_index_file_name, &filesystem_,
+                        &icing_filesystem_));
+
+  // 3. Merge the lite index.
+  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+  // Get hits for all documents containing "foot" - which should be all of them.
+
+  auto iterator = std::make_unique<DocHitInfoIteratorTermMainExact>(
+      main_index.get(), term, /*term_start_index=*/0,
+      /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+      /*need_hit_term_frequency=*/true);
+
+  DocumentId expected_document_id = 2047;
+  while (iterator->Advance().ok()) {
+    EXPECT_THAT(iterator->doc_hit_info(),
+                EqualsDocHitInfo(expected_document_id,
+                                 std::vector<SectionId>{0, 1, 2}));
+
+    std::vector<TermMatchInfo> matched_terms_stats;
+    iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+
+    Hit::TermFrequency expected_term_frequency =
+        static_cast<Hit::TermFrequency>(
+            expected_document_id % Hit::kMaxTermFrequency + 1);
+    ASSERT_THAT(matched_terms_stats, SizeIs(1));
+    EXPECT_THAT(matched_terms_stats[0].term, Eq(term));
+    EXPECT_THAT(matched_terms_stats[0].term_frequencies[0],
+                Eq(expected_term_frequency));
+    EXPECT_THAT(matched_terms_stats[0].term_frequencies[1],
+                Eq(expected_term_frequency));
+    EXPECT_THAT(matched_terms_stats[0].term_frequencies[2],
+                Eq(expected_term_frequency));
+    --expected_document_id;
+  }
+  EXPECT_THAT(expected_document_id, Eq(-1));
+}
+
+TEST_F(MainIndexTest, MergeIndexBackfilling) {
+  // 1. Index one doc in the Lite Index:
+  // - Doc0 {"fool" is_in_prefix_section=true}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index_->InsertTerm("fool", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t fool_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+  Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/true);
+  ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
+
+  // 2. Create the main index. It should have no entries in its lexicon.
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(main_index_file_name, &filesystem_,
+                        &icing_filesystem_));
+
+  // 3. Merge the index. The main index should contain "fool".
+  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+
+  // 4. Index two docs in a new Lite Index:
+  // - Doc1 {"foot" is_in_prefix_section=false}
+  std::string lite_index_file_name2 = index_dir_ + "/test_file.lite-idx.index2";
+  LiteIndex::Options options(lite_index_file_name2,
+                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+                             /*hit_buffer_sort_at_indexing=*/true,
+                             /*hit_buffer_sort_threshold_bytes=*/1024 * 8);
+  ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
+                             LiteIndex::Create(options, &icing_filesystem_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::EXACT_ONLY, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+  Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
+
+  // 5. Merge the index. The main index should now contain "fool", "foot"
+  // and a backfill point for "foo".
+  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+  // Get hits from an exact posting list the existed before the merge.
+  std::vector<DocHitInfo> hits =
+      GetExactHits(main_index.get(), /*term_start_index=*/0,
+                   /*unnormalized_term_length=*/0, "foo");
+  EXPECT_THAT(hits, IsEmpty());
+
+  // Get hits from backfill posting list.
+  hits = GetPrefixHits(main_index.get(), /*term_start_index=*/0,
+                       /*unnormalized_term_length=*/0, "foo");
+  // We should get a hit for "fool" in doc0.
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
+                        doc0_hit.document_id(),
+                        std::vector<SectionId>{doc0_hit.section_id()})));
+}
+
+TEST_F(MainIndexTest, OneHitInTheFirstPageForTwoPagesMainIndex) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index_->InsertTerm("foo", TermMatchType::EXACT_ONLY, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  SectionId section_id = 0;
+  // Based on debugging logs, 2038 documents in the following setting will
+  // result in two pages in the posting list chain, and the first page only
+  // contains one hit.
+  uint32_t num_docs = 2038;
+  for (DocumentId document_id = 0; document_id < num_docs; ++document_id) {
+    Hit doc_hit(section_id, document_id, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false);
+    ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit));
+  }
+
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(main_index_file_name, &filesystem_,
+                        &icing_filesystem_));
+
+  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+  std::vector<DocHitInfo> hits =
+      GetExactHits(main_index.get(), /*term_start_index=*/0,
+                   /*unnormalized_term_length=*/0, "foo");
+  ASSERT_THAT(hits, SizeIs(num_docs));
+  for (DocumentId document_id = num_docs - 1; document_id >= 0; --document_id) {
+    ASSERT_THAT(
+        hits[num_docs - 1 - document_id],
+        EqualsDocHitInfo(document_id, std::vector<SectionId>{section_id}));
+  }
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/main/posting-list-hit-accessor.cc b/icing/index/main/posting-list-hit-accessor.cc
new file mode 100644
index 0000000..3d5476b
--- /dev/null
+++ b/icing/index/main/posting-list-hit-accessor.cc
@@ -0,0 +1,123 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/main/posting-list-hit-accessor.h"
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>>
+PostingListHitAccessor::Create(FlashIndexStorage *storage,
+                               PostingListHitSerializer *serializer) {
+  uint32_t max_posting_list_bytes = storage->max_posting_list_bytes();
+  ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
+                         PostingListUsed::CreateFromUnitializedRegion(
+                             serializer, max_posting_list_bytes));
+  return std::unique_ptr<PostingListHitAccessor>(new PostingListHitAccessor(
+      storage, serializer, std::move(in_memory_posting_list)));
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>>
+PostingListHitAccessor::CreateFromExisting(
+    FlashIndexStorage *storage, PostingListHitSerializer *serializer,
+    PostingListIdentifier existing_posting_list_id) {
+  // Our in_memory_posting_list_ will start as empty.
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<PostingListHitAccessor> pl_accessor,
+                         Create(storage, serializer));
+  ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
+                         storage->GetPostingList(existing_posting_list_id));
+  pl_accessor->preexisting_posting_list_ =
+      std::make_unique<PostingListHolder>(std::move(holder));
+  return pl_accessor;
+}
+
+// Returns the next batch of hits for the provided posting list.
+libtextclassifier3::StatusOr<std::vector<Hit>>
+PostingListHitAccessor::GetNextHitsBatch() {
+  if (preexisting_posting_list_ == nullptr) {
+    if (has_reached_posting_list_chain_end_) {
+      return std::vector<Hit>();
+    }
+    return absl_ports::FailedPreconditionError(
+        "Cannot retrieve hits from a PostingListHitAccessor that was not "
+        "created from a preexisting posting list.");
+  }
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<Hit> batch,
+      serializer_->GetHits(&preexisting_posting_list_->posting_list));
+  uint32_t next_block_index = kInvalidBlockIndex;
+  // Posting lists will only be chained when they are max-sized, in which case
+  // next_block_index will point to the next block for the next posting list.
+  // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
+  // to the next free list block, which is not relevant here.
+  if (preexisting_posting_list_->posting_list.size_in_bytes() ==
+      storage_->max_posting_list_bytes()) {
+    next_block_index = preexisting_posting_list_->next_block_index;
+  }
+
+  if (next_block_index != kInvalidBlockIndex) {
+    // Since we only have to deal with next block for max-sized posting list
+    // block, max_num_posting_lists is 1 and posting_list_index_bits is
+    // BitsToStore(1).
+    PostingListIdentifier next_posting_list_id(
+        next_block_index, /*posting_list_index=*/0,
+        /*posting_list_index_bits=*/BitsToStore(1));
+    ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
+                           storage_->GetPostingList(next_posting_list_id));
+    preexisting_posting_list_ =
+        std::make_unique<PostingListHolder>(std::move(holder));
+  } else {
+    has_reached_posting_list_chain_end_ = true;
+    preexisting_posting_list_.reset();
+  }
+  return batch;
+}
+
+libtextclassifier3::Status PostingListHitAccessor::PrependHit(const Hit &hit) {
+  PostingListUsed &active_pl = (preexisting_posting_list_ != nullptr)
+                                   ? preexisting_posting_list_->posting_list
+                                   : in_memory_posting_list_;
+  libtextclassifier3::Status status = serializer_->PrependHit(&active_pl, hit);
+  if (!absl_ports::IsResourceExhausted(status)) {
+    return status;
+  }
+  // There is no more room to add hits to this current posting list! Therefore,
+  // we need to either move those hits to a larger posting list or flush this
+  // posting list and create another max-sized posting list in the chain.
+  if (preexisting_posting_list_ != nullptr) {
+    ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
+  } else {
+    ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
+  }
+
+  // Re-add hit. Should always fit since we just cleared
+  // in_memory_posting_list_. It's fine to explicitly reference
+  // in_memory_posting_list_ here because there's no way of reaching this line
+  // while preexisting_posting_list_ is still in use.
+  return serializer_->PrependHit(&in_memory_posting_list_, hit);
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/main/posting-list-hit-accessor.h b/icing/index/main/posting-list-hit-accessor.h
new file mode 100644
index 0000000..7b72437
--- /dev/null
+++ b/icing/index/main/posting-list-hit-accessor.h
@@ -0,0 +1,101 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_POSTING_LIST_HIT_ACCESSOR_H_
+#define ICING_INDEX_POSTING_LIST_HIT_ACCESSOR_H_
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-accessor.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
+
+namespace icing {
+namespace lib {
+
+// This class is used to provide a simple abstraction for adding hits to posting
+// lists. PostingListHitAccessor handles 1) selection of properly-sized posting
+// lists for the accumulated hits during Finalize() and 2) chaining of max-sized
+// posting lists.
+class PostingListHitAccessor : public PostingListAccessor {
+ public:
+  // Creates an empty PostingListHitAccessor.
+  //
+  // RETURNS:
+  //   - On success, a valid unique_ptr instance of PostingListHitAccessor
+  //   - INVALID_ARGUMENT error if storage has an invalid block_size.
+  static libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>>
+  Create(FlashIndexStorage* storage, PostingListHitSerializer* serializer);
+
+  // Create a PostingListHitAccessor with an existing posting list identified by
+  // existing_posting_list_id.
+  //
+  // The PostingListHitAccessor will add hits to this posting list until it is
+  // necessary either to 1) chain the posting list (if it is max-sized) or 2)
+  // move its hits to a larger posting list.
+  //
+  // RETURNS:
+  //   - On success, a valid unique_ptr instance of PostingListHitAccessor
+  //   - INVALID_ARGUMENT if storage has an invalid block_size.
+  static libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>>
+  CreateFromExisting(FlashIndexStorage* storage,
+                     PostingListHitSerializer* serializer,
+                     PostingListIdentifier existing_posting_list_id);
+
+  PostingListSerializer* GetSerializer() override { return serializer_; }
+
+  // Retrieve the next batch of hits for the posting list chain
+  //
+  // RETURNS:
+  //   - On success, a vector of hits in the posting list chain
+  //   - INTERNAL if called on an instance of PostingListHitAccessor that was
+  //     created via PostingListHitAccessor::Create, if unable to read the next
+  //     posting list in the chain or if the posting list has been corrupted
+  //     somehow.
+  libtextclassifier3::StatusOr<std::vector<Hit>> GetNextHitsBatch();
+
+  // Prepend one hit. This may result in flushing the posting list to disk (if
+  // the PostingListHitAccessor holds a max-sized posting list that is full) or
+  // freeing a pre-existing posting list if it is too small to fit all hits
+  // necessary.
+  //
+  // RETURNS:
+  //   - OK, on success
+  //   - INVALID_ARGUMENT if !hit.is_valid() or if hit is not less than the
+  //   previously added hit.
+  //   - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
+  //   posting list.
+  libtextclassifier3::Status PrependHit(const Hit& hit);
+
+ private:
+  explicit PostingListHitAccessor(FlashIndexStorage* storage,
+                                  PostingListHitSerializer* serializer,
+                                  PostingListUsed in_memory_posting_list)
+      : PostingListAccessor(storage, std::move(in_memory_posting_list)),
+        serializer_(serializer) {}
+
+  PostingListHitSerializer* serializer_;  // Does not own.
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_POSTING_LIST_HIT_ACCESSOR_H_
diff --git a/icing/index/main/posting-list-hit-accessor_test.cc b/icing/index/main/posting-list-hit-accessor_test.cc
new file mode 100644
index 0000000..1127814
--- /dev/null
+++ b/icing/index/main/posting-list-hit-accessor_test.cc
@@ -0,0 +1,366 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/main/posting-list-hit-accessor.h"
+
+#include <cstdint>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/index-block.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/hit-test-utils.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::Lt;
+using ::testing::SizeIs;
+
+class PostingListHitAccessorTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/test_dir";
+    file_name_ = test_dir_ + "/test_file.idx.index";
+
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
+
+    serializer_ = std::make_unique<PostingListHitSerializer>();
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FlashIndexStorage flash_index_storage,
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+    flash_index_storage_ =
+        std::make_unique<FlashIndexStorage>(std::move(flash_index_storage));
+  }
+
+  void TearDown() override {
+    flash_index_storage_.reset();
+    serializer_.reset();
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+  }
+
+  Filesystem filesystem_;
+  std::string test_dir_;
+  std::string file_name_;
+  std::unique_ptr<PostingListHitSerializer> serializer_;
+  std::unique_ptr<FlashIndexStorage> flash_index_storage_;
+};
+
+TEST_F(PostingListHitAccessorTest, HitsAddAndRetrieveProperly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListHitAccessor> pl_accessor,
+      PostingListHitAccessor::Create(flash_index_storage_.get(),
+                                     serializer_.get()));
+  // Add some hits! Any hits!
+  std::vector<Hit> hits1 =
+      CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
+  for (const Hit& hit : hits1) {
+    ICING_ASSERT_OK(pl_accessor->PrependHit(hit));
+  }
+  PostingListAccessor::FinalizeResult result =
+      std::move(*pl_accessor).Finalize();
+  ICING_EXPECT_OK(result.status);
+  EXPECT_THAT(result.id.block_index(), Eq(1));
+  EXPECT_THAT(result.id.posting_list_index(), Eq(0));
+
+  // Retrieve some hits.
+  ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+                             flash_index_storage_->GetPostingList(result.id));
+  EXPECT_THAT(serializer_->GetHits(&pl_holder.posting_list),
+              IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
+  EXPECT_THAT(pl_holder.next_block_index, Eq(kInvalidBlockIndex));
+}
+
+TEST_F(PostingListHitAccessorTest, PreexistingPLKeepOnSameBlock) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListHitAccessor> pl_accessor,
+      PostingListHitAccessor::Create(flash_index_storage_.get(),
+                                     serializer_.get()));
+  // Add a single hit. This will fit in a min-sized posting list.
+  Hit hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency);
+  ICING_ASSERT_OK(pl_accessor->PrependHit(hit1));
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_EXPECT_OK(result1.status);
+  // Should have been allocated to the first block.
+  EXPECT_THAT(result1.id.block_index(), Eq(1));
+  EXPECT_THAT(result1.id.posting_list_index(), Eq(0));
+
+  // Add one more hit. The minimum size for a posting list must be able to fit
+  // at least two hits, so this should NOT cause the previous pl to be
+  // reallocated.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_accessor,
+      PostingListHitAccessor::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/1);
+  ICING_ASSERT_OK(pl_accessor->PrependHit(hit2));
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor).Finalize();
+  ICING_EXPECT_OK(result2.status);
+  // Should have been allocated to the same posting list as the first hit.
+  EXPECT_THAT(result2.id, Eq(result1.id));
+
+  // The posting list at result2.id should hold all of the hits that have been
+  // added.
+  ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+                             flash_index_storage_->GetPostingList(result2.id));
+  EXPECT_THAT(serializer_->GetHits(&pl_holder.posting_list),
+              IsOkAndHolds(ElementsAre(hit2, hit1)));
+}
+
+TEST_F(PostingListHitAccessorTest, PreexistingPLReallocateToLargerPL) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListHitAccessor> pl_accessor,
+      PostingListHitAccessor::Create(flash_index_storage_.get(),
+                                     serializer_.get()));
+  // The smallest posting list size is 15 bytes. The first four hits will be
+  // compressed to one byte each and will be able to fit in the 5 byte padded
+  // region. The last hit will fit in one of the special hits. The posting list
+  // will be ALMOST_FULL and can fit at most 2 more hits.
+  std::vector<Hit> hits1 =
+      CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
+  for (const Hit& hit : hits1) {
+    ICING_ASSERT_OK(pl_accessor->PrependHit(hit));
+  }
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_EXPECT_OK(result1.status);
+  // Should have been allocated to the first block.
+  EXPECT_THAT(result1.id.block_index(), Eq(1));
+  EXPECT_THAT(result1.id.posting_list_index(), Eq(0));
+
+  // Now let's add some more hits!
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_accessor,
+      PostingListHitAccessor::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  // The current posting list can fit at most 2 more hits. Adding 12 more hits
+  // should result in these hits being moved to a larger posting list.
+  std::vector<Hit> hits2 = CreateHits(
+      /*start_docid=*/hits1.back().document_id() + 1, /*num_hits=*/12,
+      /*desired_byte_length=*/1);
+
+  for (const Hit& hit : hits2) {
+    ICING_ASSERT_OK(pl_accessor->PrependHit(hit));
+  }
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor).Finalize();
+  ICING_EXPECT_OK(result2.status);
+  // Should have been allocated to the second (new) block because the posting
+  // list should have grown beyond the size that the first block maintains.
+  EXPECT_THAT(result2.id.block_index(), Eq(2));
+  EXPECT_THAT(result2.id.posting_list_index(), Eq(0));
+
+  // The posting list at result2.id should hold all of the hits that have been
+  // added.
+  for (const Hit& hit : hits2) {
+    hits1.push_back(hit);
+  }
+  ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+                             flash_index_storage_->GetPostingList(result2.id));
+  EXPECT_THAT(serializer_->GetHits(&pl_holder.posting_list),
+              IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
+}
+
+TEST_F(PostingListHitAccessorTest, MultiBlockChainsBlocksProperly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListHitAccessor> pl_accessor,
+      PostingListHitAccessor::Create(flash_index_storage_.get(),
+                                     serializer_.get()));
+  // Add some hits! Any hits!
+  std::vector<Hit> hits1 =
+      CreateHits(/*num_hits=*/5000, /*desired_byte_length=*/1);
+  for (const Hit& hit : hits1) {
+    ICING_ASSERT_OK(pl_accessor->PrependHit(hit));
+  }
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_EXPECT_OK(result1.status);
+  PostingListIdentifier second_block_id = result1.id;
+  // Should have been allocated to the second block, which holds a max-sized
+  // posting list.
+  EXPECT_THAT(second_block_id, Eq(PostingListIdentifier(
+                                   /*block_index=*/2, /*posting_list_index=*/0,
+                                   /*posting_list_index_bits=*/0)));
+
+  // Now let's retrieve them!
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListHolder pl_holder,
+      flash_index_storage_->GetPostingList(second_block_id));
+  // This pl_holder will only hold a posting list with the hits that didn't fit
+  // on the first block.
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Hit> second_block_hits,
+                             serializer_->GetHits(&pl_holder.posting_list));
+  ASSERT_THAT(second_block_hits, SizeIs(Lt(hits1.size())));
+  auto first_block_hits_start = hits1.rbegin() + second_block_hits.size();
+  EXPECT_THAT(second_block_hits,
+              ElementsAreArray(hits1.rbegin(), first_block_hits_start));
+
+  // Now retrieve all of the hits that were on the first block.
+  uint32_t first_block_id = pl_holder.next_block_index;
+  EXPECT_THAT(first_block_id, Eq(1));
+
+  PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
+                              /*posting_list_index_bits=*/0);
+  ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
+                             flash_index_storage_->GetPostingList(pl_id));
+  EXPECT_THAT(
+      serializer_->GetHits(&pl_holder.posting_list),
+      IsOkAndHolds(ElementsAreArray(first_block_hits_start, hits1.rend())));
+}
+
+TEST_F(PostingListHitAccessorTest, PreexistingMultiBlockReusesBlocksProperly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListHitAccessor> pl_accessor,
+      PostingListHitAccessor::Create(flash_index_storage_.get(),
+                                     serializer_.get()));
+  // Add some hits! Any hits!
+  std::vector<Hit> hits1 =
+      CreateHits(/*num_hits=*/5000, /*desired_byte_length=*/1);
+  for (const Hit& hit : hits1) {
+    ICING_ASSERT_OK(pl_accessor->PrependHit(hit));
+  }
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_EXPECT_OK(result1.status);
+  PostingListIdentifier first_add_id = result1.id;
+  EXPECT_THAT(first_add_id, Eq(PostingListIdentifier(
+                                /*block_index=*/2, /*posting_list_index=*/0,
+                                /*posting_list_index_bits=*/0)));
+
+  // Now add a couple more hits. These should fit on the existing, not full
+  // second block.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_accessor,
+      PostingListHitAccessor::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), first_add_id));
+  std::vector<Hit> hits2 = CreateHits(
+      /*start_docid=*/hits1.back().document_id() + 1, /*num_hits=*/50,
+      /*desired_byte_length=*/1);
+
+  for (const Hit& hit : hits2) {
+    ICING_ASSERT_OK(pl_accessor->PrependHit(hit));
+  }
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor).Finalize();
+  ICING_EXPECT_OK(result2.status);
+  PostingListIdentifier second_add_id = result2.id;
+  EXPECT_THAT(second_add_id, Eq(first_add_id));
+
+  // We should be able to retrieve all 5050 hits.
+  for (const Hit& hit : hits2) {
+    hits1.push_back(hit);
+  }
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListHolder pl_holder,
+      flash_index_storage_->GetPostingList(second_add_id));
+  // This pl_holder will only hold a posting list with the hits that didn't fit
+  // on the first block.
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Hit> second_block_hits,
+                             serializer_->GetHits(&pl_holder.posting_list));
+  ASSERT_THAT(second_block_hits, SizeIs(Lt(hits1.size())));
+  auto first_block_hits_start = hits1.rbegin() + second_block_hits.size();
+  EXPECT_THAT(second_block_hits,
+              ElementsAreArray(hits1.rbegin(), first_block_hits_start));
+
+  // Now retrieve all of the hits that were on the first block.
+  uint32_t first_block_id = pl_holder.next_block_index;
+  EXPECT_THAT(first_block_id, Eq(1));
+
+  PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
+                              /*posting_list_index_bits=*/0);
+  ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
+                             flash_index_storage_->GetPostingList(pl_id));
+  EXPECT_THAT(
+      serializer_->GetHits(&pl_holder.posting_list),
+      IsOkAndHolds(ElementsAreArray(first_block_hits_start, hits1.rend())));
+}
+
+TEST_F(PostingListHitAccessorTest, InvalidHitReturnsInvalidArgument) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListHitAccessor> pl_accessor,
+      PostingListHitAccessor::Create(flash_index_storage_.get(),
+                                     serializer_.get()));
+  Hit invalid_hit;
+  EXPECT_THAT(pl_accessor->PrependHit(invalid_hit),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListHitAccessorTest, HitsNotDecreasingReturnsInvalidArgument) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListHitAccessor> pl_accessor,
+      PostingListHitAccessor::Create(flash_index_storage_.get(),
+                                     serializer_.get()));
+  Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency);
+  ICING_ASSERT_OK(pl_accessor->PrependHit(hit1));
+
+  Hit hit2(/*section_id=*/6, /*document_id=*/1, Hit::kDefaultTermFrequency);
+  EXPECT_THAT(pl_accessor->PrependHit(hit2),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  Hit hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency);
+  EXPECT_THAT(pl_accessor->PrependHit(hit3),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListHitAccessorTest, NewPostingListNoHitsAdded) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListHitAccessor> pl_accessor,
+      PostingListHitAccessor::Create(flash_index_storage_.get(),
+                                     serializer_.get()));
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  EXPECT_THAT(result1.status,
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListHitAccessorTest, PreexistingPostingListNoHitsAdded) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListHitAccessor> pl_accessor,
+      PostingListHitAccessor::Create(flash_index_storage_.get(),
+                                     serializer_.get()));
+  Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency);
+  ICING_ASSERT_OK(pl_accessor->PrependHit(hit1));
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result1.status);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListHitAccessor> pl_accessor2,
+      PostingListHitAccessor::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor2).Finalize();
+  ICING_ASSERT_OK(result2.status);
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/main/posting-list-hit-serializer.cc b/icing/index/main/posting-list-hit-serializer.cc
new file mode 100644
index 0000000..e14a0c0
--- /dev/null
+++ b/icing/index/main/posting-list-hit-serializer.cc
@@ -0,0 +1,714 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/main/posting-list-hit-serializer.h"
+
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <vector>
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/index/icing-bit-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+uint32_t GetTermFrequencyByteSize(const Hit& hit) {
+  return hit.has_term_frequency() ? sizeof(Hit::TermFrequency) : 0;
+}
+
+}  // namespace
+
+uint32_t PostingListHitSerializer::GetBytesUsed(
+    const PostingListUsed* posting_list_used) const {
+  // The special hits will be included if they represent actual hits. If they
+  // represent the hit offset or the invalid hit sentinel, they are not
+  // included.
+  return posting_list_used->size_in_bytes() -
+         GetStartByteOffset(posting_list_used);
+}
+
+uint32_t PostingListHitSerializer::GetMinPostingListSizeToFit(
+    const PostingListUsed* posting_list_used) const {
+  if (IsFull(posting_list_used) || IsAlmostFull(posting_list_used)) {
+    // If in either the FULL state or ALMOST_FULL state, this posting list *is*
+    // the minimum size posting list that can fit these hits. So just return the
+    // size of the posting list.
+    return posting_list_used->size_in_bytes();
+  }
+
+  // In NOT_FULL status BytesUsed contains no special hits. The minimum sized
+  // posting list that would be guaranteed to fit these hits would be
+  // ALMOST_FULL, with kInvalidHit in special_hit(0), the uncompressed Hit in
+  // special_hit(1) and the n compressed hits in the compressed region.
+  // BytesUsed contains one uncompressed Hit and n compressed hits. Therefore,
+  // fitting these hits into a posting list would require BytesUsed plus one
+  // extra hit.
+  return GetBytesUsed(posting_list_used) + sizeof(Hit);
+}
+
+void PostingListHitSerializer::Clear(PostingListUsed* posting_list_used) const {
+  // Safe to ignore return value because posting_list_used->size_in_bytes() is
+  // a valid argument.
+  SetStartByteOffset(posting_list_used,
+                     /*offset=*/posting_list_used->size_in_bytes());
+}
+
+libtextclassifier3::Status PostingListHitSerializer::MoveFrom(
+    PostingListUsed* dst, PostingListUsed* src) const {
+  ICING_RETURN_ERROR_IF_NULL(dst);
+  ICING_RETURN_ERROR_IF_NULL(src);
+  if (GetMinPostingListSizeToFit(src) > dst->size_in_bytes()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "src MinPostingListSizeToFit %d must be larger than size %d.",
+        GetMinPostingListSizeToFit(src), dst->size_in_bytes()));
+  }
+
+  if (!IsPostingListValid(dst)) {
+    return absl_ports::FailedPreconditionError(
+        "Dst posting list is in an invalid state and can't be used!");
+  }
+  if (!IsPostingListValid(src)) {
+    return absl_ports::InvalidArgumentError(
+        "Cannot MoveFrom an invalid src posting list!");
+  }
+
+  // Pop just enough hits that all of src's compressed hits fit in
+  // dst posting_list's compressed area. Then we can memcpy that area.
+  std::vector<Hit> hits;
+  while (IsFull(src) || IsAlmostFull(src) ||
+         (dst->size_in_bytes() - kSpecialHitsSize < GetBytesUsed(src))) {
+    if (!GetHitsInternal(src, /*limit=*/1, /*pop=*/true, &hits).ok()) {
+      return absl_ports::AbortedError(
+          "Unable to retrieve hits from src posting list.");
+    }
+  }
+
+  // memcpy the area and set up start byte offset.
+  Clear(dst);
+  memcpy(dst->posting_list_buffer() + dst->size_in_bytes() - GetBytesUsed(src),
+         src->posting_list_buffer() + GetStartByteOffset(src),
+         GetBytesUsed(src));
+  // Because we popped all hits from src outside of the compressed area and we
+  // guaranteed that GetBytesUsed(src) is less than dst->size_in_bytes() -
+  // kSpecialHitSize. This is guaranteed to be a valid byte offset for the
+  // NOT_FULL state, so ignoring the value is safe.
+  SetStartByteOffset(dst, dst->size_in_bytes() - GetBytesUsed(src));
+
+  // Put back remaining hits.
+  for (size_t i = 0; i < hits.size(); i++) {
+    const Hit& hit = hits[hits.size() - i - 1];
+    // PrependHit can return either INVALID_ARGUMENT - if hit is invalid or not
+    // less than the previous hit - or RESOURCE_EXHAUSTED. RESOURCE_EXHAUSTED
+    // should be impossible because we've already assured that there is enough
+    // room above.
+    ICING_RETURN_IF_ERROR(PrependHit(dst, hit));
+  }
+
+  Clear(src);
+  return libtextclassifier3::Status::OK;
+}
+
+uint32_t PostingListHitSerializer::GetPadEnd(
+    const PostingListUsed* posting_list_used, uint32_t offset) const {
+  Hit::Value pad;
+  uint32_t pad_end = offset;
+  while (pad_end < posting_list_used->size_in_bytes()) {
+    size_t pad_len = VarInt::Decode(
+        posting_list_used->posting_list_buffer() + pad_end, &pad);
+    if (pad != 0) {
+      // No longer a pad.
+      break;
+    }
+    pad_end += pad_len;
+  }
+  return pad_end;
+}
+
+bool PostingListHitSerializer::PadToEnd(PostingListUsed* posting_list_used,
+                                        uint32_t start, uint32_t end) const {
+  if (end > posting_list_used->size_in_bytes()) {
+    ICING_LOG(ERROR) << "Cannot pad a region that ends after size!";
+    return false;
+  }
+  // In VarInt a value of 0 encodes to 0.
+  memset(posting_list_used->posting_list_buffer() + start, 0, end - start);
+  return true;
+}
+
+libtextclassifier3::Status PostingListHitSerializer::PrependHitToAlmostFull(
+    PostingListUsed* posting_list_used, const Hit& hit) const {
+  // Get delta between first hit and the new hit. Try to fit delta
+  // in the padded area and put new hit at the special position 1.
+  // Calling ValueOrDie is safe here because 1 < kNumSpecialData.
+  Hit cur = GetSpecialHit(posting_list_used, /*index=*/1).ValueOrDie();
+  if (cur.value() <= hit.value()) {
+    return absl_ports::InvalidArgumentError(
+        "Hit being prepended must be strictly less than the most recent Hit");
+  }
+  uint64_t delta = cur.value() - hit.value();
+  uint8_t delta_buf[VarInt::kMaxEncodedLen64];
+  size_t delta_len = VarInt::Encode(delta, delta_buf);
+  uint32_t cur_term_frequency_bytes = GetTermFrequencyByteSize(cur);
+
+  uint32_t pad_end = GetPadEnd(posting_list_used,
+                               /*offset=*/kSpecialHitsSize);
+
+  if (pad_end >= kSpecialHitsSize + delta_len + cur_term_frequency_bytes) {
+    // Pad area has enough space for delta and term_frequency of existing hit
+    // (cur). Write delta at pad_end - delta_len - cur_term_frequency_bytes.
+    uint8_t* delta_offset = posting_list_used->posting_list_buffer() + pad_end -
+                            delta_len - cur_term_frequency_bytes;
+    memcpy(delta_offset, delta_buf, delta_len);
+    // Now copy term_frequency.
+    Hit::TermFrequency term_frequency = cur.term_frequency();
+    uint8_t* term_frequency_offset = delta_offset + delta_len;
+    memcpy(term_frequency_offset, &term_frequency, cur_term_frequency_bytes);
+
+    // Now first hit is the new hit, at special position 1. Safe to ignore the
+    // return value because 1 < kNumSpecialData.
+    SetSpecialHit(posting_list_used, /*index=*/1, hit);
+    // Safe to ignore the return value because sizeof(Hit) is a valid argument.
+    SetStartByteOffset(posting_list_used, /*offset=*/sizeof(Hit));
+  } else {
+    // No space for delta. We put the new hit at special position 0
+    // and go to the full state. Safe to ignore the return value because 1 <
+    // kNumSpecialData.
+    SetSpecialHit(posting_list_used, /*index=*/0, hit);
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+void PostingListHitSerializer::PrependHitToEmpty(
+    PostingListUsed* posting_list_used, const Hit& hit) const {
+  // First hit to be added. Just add verbatim, no compression.
+  if (posting_list_used->size_in_bytes() == kSpecialHitsSize) {
+    // Safe to ignore the return value because 1 < kNumSpecialData
+    SetSpecialHit(posting_list_used, /*index=*/1, hit);
+    // Safe to ignore the return value because sizeof(Hit) is a valid argument.
+    SetStartByteOffset(posting_list_used, /*offset=*/sizeof(Hit));
+  } else {
+    // Since this is the first hit, size != kSpecialHitsSize and
+    // size % sizeof(Hit) == 0, we know that there is room to fit 'hit' into
+    // the compressed region, so ValueOrDie is safe.
+    uint32_t offset =
+        PrependHitUncompressed(posting_list_used, hit,
+                               /*offset=*/posting_list_used->size_in_bytes())
+            .ValueOrDie();
+    // Safe to ignore the return value because PrependHitUncompressed is
+    // guaranteed to return a valid offset.
+    SetStartByteOffset(posting_list_used, offset);
+  }
+}
+
+libtextclassifier3::Status PostingListHitSerializer::PrependHitToNotFull(
+    PostingListUsed* posting_list_used, const Hit& hit, uint32_t offset) const {
+  // First hit in compressed area. It is uncompressed. See if delta
+  // between the first hit and new hit will still fit in the
+  // compressed area.
+  if (offset + sizeof(Hit::Value) > posting_list_used->size_in_bytes()) {
+    // The first hit in the compressed region *should* be uncompressed, but
+    // somehow there isn't enough room between offset and the end of the
+    // compressed area to fit an uncompressed hit. This should NEVER happen.
+    return absl_ports::FailedPreconditionError(
+        "Posting list is in an invalid state.");
+  }
+  Hit::Value cur_value;
+  memcpy(&cur_value, posting_list_used->posting_list_buffer() + offset,
+         sizeof(Hit::Value));
+  if (cur_value <= hit.value()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Hit %d being prepended must be strictly less than the most recent "
+        "Hit %d",
+        hit.value(), cur_value));
+  }
+  uint64_t delta = cur_value - hit.value();
+  uint8_t delta_buf[VarInt::kMaxEncodedLen64];
+  size_t delta_len = VarInt::Encode(delta, delta_buf);
+  uint32_t hit_term_frequency_bytes = GetTermFrequencyByteSize(hit);
+
+  // offset now points to one past the end of the first hit.
+  offset += sizeof(Hit::Value);
+  if (kSpecialHitsSize + sizeof(Hit::Value) + delta_len +
+          hit_term_frequency_bytes <=
+      offset) {
+    // Enough space for delta in compressed area.
+
+    // Prepend delta.
+    offset -= delta_len;
+    memcpy(posting_list_used->posting_list_buffer() + offset, delta_buf,
+           delta_len);
+
+    // Prepend new hit with (possibly) its term_frequency. We know that there is
+    // room for 'hit' because of the if statement above, so calling ValueOrDie
+    // is safe.
+    offset =
+        PrependHitUncompressed(posting_list_used, hit, offset).ValueOrDie();
+    // offset is guaranteed to be valid here. So it's safe to ignore the return
+    // value. The if above will guarantee that offset >= kSpecialHitSize and <
+    // posting_list_used->size_in_bytes() because the if ensures that there is
+    // enough room between offset and kSpecialHitSize to fit the delta of the
+    // previous hit, any term_frequency and the uncompressed hit.
+    SetStartByteOffset(posting_list_used, offset);
+  } else if (kSpecialHitsSize + delta_len <= offset) {
+    // Only have space for delta. The new hit must be put in special
+    // position 1.
+
+    // Prepend delta.
+    offset -= delta_len;
+    memcpy(posting_list_used->posting_list_buffer() + offset, delta_buf,
+           delta_len);
+
+    // Prepend pad. Safe to ignore the return value of PadToEnd because offset
+    // must be less than posting_list_used->size_in_bytes(). Otherwise, this
+    // function already would have returned FAILED_PRECONDITION.
+    PadToEnd(posting_list_used, /*start=*/kSpecialHitsSize,
+             /*end=*/offset);
+
+    // Put new hit in special position 1. Safe to ignore return value because 1
+    // < kNumSpecialData.
+    SetSpecialHit(posting_list_used, /*index=*/1, hit);
+
+    // State almost_full. Safe to ignore the return value because sizeof(Hit) is
+    // a valid argument.
+    SetStartByteOffset(posting_list_used, /*offset=*/sizeof(Hit));
+  } else {
+    // Very rare case where delta is larger than sizeof(Hit::Value)
+    // (i.e. varint delta encoding expanded required storage). We
+    // move first hit to special position 1 and put new hit in
+    // special position 0.
+    Hit cur(cur_value);
+    // offset is < kSpecialHitsSize + delta_len. delta_len is at most 5 bytes.
+    // Therefore, offset must be less than kSpecialHitSize + 5. Since posting
+    // list size must be divisible by sizeof(Hit) (5), it is guaranteed that
+    // offset < size_in_bytes, so it is safe to ignore the return value here.
+    ICING_RETURN_IF_ERROR(
+        ConsumeTermFrequencyIfPresent(posting_list_used, &cur, &offset));
+    // Safe to ignore the return value of PadToEnd because offset must be less
+    // than posting_list_used->size_in_bytes(). Otherwise, this function
+    // already would have returned FAILED_PRECONDITION.
+    PadToEnd(posting_list_used, /*start=*/kSpecialHitsSize,
+             /*end=*/offset);
+    // Safe to ignore the return value here because 0 and 1 < kNumSpecialData.
+    SetSpecialHit(posting_list_used, /*index=*/1, cur);
+    SetSpecialHit(posting_list_used, /*index=*/0, hit);
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status PostingListHitSerializer::PrependHit(
+    PostingListUsed* posting_list_used, const Hit& hit) const {
+  static_assert(sizeof(Hit::Value) <= sizeof(uint64_t),
+                "Hit::Value cannot be larger than 8 bytes because the delta "
+                "must be able to fit in 8 bytes.");
+  if (!hit.is_valid()) {
+    return absl_ports::InvalidArgumentError("Cannot prepend an invalid hit!");
+  }
+  if (!IsPostingListValid(posting_list_used)) {
+    return absl_ports::FailedPreconditionError(
+        "This PostingListUsed is in an invalid state and can't add any hits!");
+  }
+
+  if (IsFull(posting_list_used)) {
+    // State full: no space left.
+    return absl_ports::ResourceExhaustedError("No more room for hits");
+  } else if (IsAlmostFull(posting_list_used)) {
+    return PrependHitToAlmostFull(posting_list_used, hit);
+  } else if (IsEmpty(posting_list_used)) {
+    PrependHitToEmpty(posting_list_used, hit);
+    return libtextclassifier3::Status::OK;
+  } else {
+    uint32_t offset = GetStartByteOffset(posting_list_used);
+    return PrependHitToNotFull(posting_list_used, hit, offset);
+  }
+}
+
+libtextclassifier3::StatusOr<std::vector<Hit>>
+PostingListHitSerializer::GetHits(
+    const PostingListUsed* posting_list_used) const {
+  std::vector<Hit> hits_out;
+  ICING_RETURN_IF_ERROR(GetHits(posting_list_used, &hits_out));
+  return hits_out;
+}
+
+libtextclassifier3::Status PostingListHitSerializer::GetHits(
+    const PostingListUsed* posting_list_used,
+    std::vector<Hit>* hits_out) const {
+  return GetHitsInternal(posting_list_used,
+                         /*limit=*/std::numeric_limits<uint32_t>::max(),
+                         /*pop=*/false, hits_out);
+}
+
+libtextclassifier3::Status PostingListHitSerializer::PopFrontHits(
+    PostingListUsed* posting_list_used, uint32_t num_hits) const {
+  if (num_hits == 1 && IsFull(posting_list_used)) {
+    // The PL is in full status which means that we save 2 uncompressed hits in
+    // the 2 special postions. But full status may be reached by 2 different
+    // statuses.
+    // (1) In "almost full" status
+    // +-----------------+----------------+-------+-----------------+
+    // |Hit::kInvalidVal |1st hit         |(pad)  |(compressed) hits|
+    // +-----------------+----------------+-------+-----------------+
+    // When we prepend another hit, we can only put it at the special
+    // position 0. And we get a full PL
+    // +-----------------+----------------+-------+-----------------+
+    // |new 1st hit      |original 1st hit|(pad)  |(compressed) hits|
+    // +-----------------+----------------+-------+-----------------+
+    // (2) In "not full" status
+    // +-----------------+----------------+------+-------+------------------+
+    // |hits-start-offset|Hit::kInvalidVal|(pad) |1st hit|(compressed) hits |
+    // +-----------------+----------------+------+-------+------------------+
+    // When we prepend another hit, we can reach any of the 3 following
+    // scenarios:
+    // (2.1) not full
+    // if the space of pad and original 1st hit can accommodate the new 1st hit
+    // and the encoded delta value.
+    // +-----------------+----------------+------+-----------+-----------------+
+    // |hits-start-offset|Hit::kInvalidVal|(pad) |new 1st hit|(compressed) hits|
+    // +-----------------+----------------+------+-----------+-----------------+
+    // (2.2) almost full
+    // If the space of pad and original 1st hit cannot accommodate the new 1st
+    // hit and the encoded delta value but can accommodate the encoded delta
+    // value only. We can put the new 1st hit at special position 1.
+    // +-----------------+----------------+-------+-----------------+
+    // |Hit::kInvalidVal |new 1st hit     |(pad)  |(compressed) hits|
+    // +-----------------+----------------+-------+-----------------+
+    // (2.3) full
+    // In very rare case, it cannot even accommodate only the encoded delta
+    // value. we can move the original 1st hit into special position 1 and the
+    // new 1st hit into special position 0. This may happen because we use
+    // VarInt encoding method which may make the encoded value longer (about
+    // 4/3 times of original)
+    // +-----------------+----------------+-------+-----------------+
+    // |new 1st hit      |original 1st hit|(pad)  |(compressed) hits|
+    // +-----------------+----------------+-------+-----------------+
+    // Suppose now the PL is full. But we don't know whether it arrived to
+    // this status from "not full" like (2.3) or from "almost full" like (1).
+    // We'll return to "almost full" status like (1) if we simply pop the new
+    // 1st hit but we want to make the prepending operation "reversible". So
+    // there should be some way to return to "not full" if possible. A simple
+    // way to do it is to pop 2 hits out of the PL to status "almost full" or
+    // "not full".  And add the original 1st hit back. We can return to the
+    // correct original statuses of (2.1) or (1). This makes our prepending
+    // operation reversible.
+    std::vector<Hit> out;
+
+    // Popping 2 hits should never fail because we've just ensured that the
+    // posting list is in the FULL state.
+    ICING_RETURN_IF_ERROR(
+        GetHitsInternal(posting_list_used, /*limit=*/2, /*pop=*/true, &out));
+
+    // PrependHit should never fail because out[1] is a valid hit less than
+    // previous hits in the posting list and because there's no way that the
+    // posting list could run out of room because it previously stored this hit
+    // AND another hit.
+    ICING_RETURN_IF_ERROR(PrependHit(posting_list_used, out[1]));
+  } else if (num_hits > 0) {
+    return GetHitsInternal(posting_list_used, /*limit=*/num_hits, /*pop=*/true,
+                           nullptr);
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status PostingListHitSerializer::GetHitsInternal(
+    const PostingListUsed* posting_list_used, uint32_t limit, bool pop,
+    std::vector<Hit>* out) const {
+  // Put current uncompressed val here.
+  Hit::Value val = Hit::kInvalidValue;
+  uint32_t offset = GetStartByteOffset(posting_list_used);
+  uint32_t count = 0;
+
+  // First traverse the first two special positions.
+  while (count < limit && offset < kSpecialHitsSize) {
+    // Calling ValueOrDie is safe here because offset / sizeof(Hit) <
+    // kNumSpecialData because of the check above.
+    Hit hit = GetSpecialHit(posting_list_used, /*index=*/offset / sizeof(Hit))
+                  .ValueOrDie();
+    val = hit.value();
+    if (out != nullptr) {
+      out->push_back(hit);
+    }
+    offset += sizeof(Hit);
+    count++;
+  }
+
+  // If special position 1 was set then we need to skip padding.
+  if (val != Hit::kInvalidValue && offset == kSpecialHitsSize) {
+    offset = GetPadEnd(posting_list_used, offset);
+  }
+
+  while (count < limit && offset < posting_list_used->size_in_bytes()) {
+    if (val == Hit::kInvalidValue) {
+      // First hit is in compressed area. Put that in val.
+      memcpy(&val, posting_list_used->posting_list_buffer() + offset,
+             sizeof(Hit::Value));
+      offset += sizeof(Hit::Value);
+    } else {
+      // Now we have delta encoded subsequent hits. Decode and push.
+      uint64_t delta;
+      offset += VarInt::Decode(
+          posting_list_used->posting_list_buffer() + offset, &delta);
+      val += delta;
+    }
+    Hit hit(val);
+    libtextclassifier3::Status status =
+        ConsumeTermFrequencyIfPresent(posting_list_used, &hit, &offset);
+    if (!status.ok()) {
+      // This posting list has been corrupted somehow. The first hit of the
+      // posting list claims to have a term frequency, but there's no more room
+      // in the posting list for that term frequency to exist. Return an empty
+      // vector and zero to indicate no hits retrieved.
+      if (out != nullptr) {
+        out->clear();
+      }
+      return absl_ports::InternalError("Posting list has been corrupted!");
+    }
+    if (out != nullptr) {
+      out->push_back(hit);
+    }
+    count++;
+  }
+
+  if (pop) {
+    PostingListUsed* mutable_posting_list_used =
+        const_cast<PostingListUsed*>(posting_list_used);
+    // Modify the posting list so that we pop all hits actually
+    // traversed.
+    if (offset >= kSpecialHitsSize &&
+        offset < posting_list_used->size_in_bytes()) {
+      // In the compressed area. Pop and reconstruct. offset/val is
+      // the last traversed hit, which we must discard. So move one
+      // more forward.
+      uint64_t delta;
+      offset += VarInt::Decode(
+          posting_list_used->posting_list_buffer() + offset, &delta);
+      val += delta;
+
+      // Now val is the first hit of the new posting list.
+      if (kSpecialHitsSize + sizeof(Hit::Value) <= offset) {
+        // val fits in compressed area. Simply copy.
+        offset -= sizeof(Hit::Value);
+        memcpy(mutable_posting_list_used->posting_list_buffer() + offset, &val,
+               sizeof(Hit::Value));
+      } else {
+        // val won't fit in compressed area. Also see if there is a
+        // term_frequency.
+        Hit hit(val);
+        libtextclassifier3::Status status =
+            ConsumeTermFrequencyIfPresent(posting_list_used, &hit, &offset);
+        if (!status.ok()) {
+          // This posting list has been corrupted somehow. The first hit of
+          // the posting list claims to have a term frequency, but there's no
+          // more room in the posting list for that term frequency to exist.
+          // Return an empty vector and zero to indicate no hits retrieved. Do
+          // not pop anything.
+          if (out != nullptr) {
+            out->clear();
+          }
+          return absl_ports::InternalError("Posting list has been corrupted!");
+        }
+        // Okay to ignore the return value here because 1 < kNumSpecialData.
+        SetSpecialHit(mutable_posting_list_used, /*index=*/1, hit);
+
+        // Prepend pad. Safe to ignore the return value of PadToEnd because
+        // offset must be less than posting_list_used->size_in_bytes() thanks to
+        // the if above.
+        PadToEnd(mutable_posting_list_used,
+                 /*start=*/kSpecialHitsSize,
+                 /*end=*/offset);
+        offset = sizeof(Hit);
+      }
+    }
+    // offset is guaranteed to be valid so ignoring the return value of
+    // set_start_byte_offset is safe. It falls into one of four scenarios:
+    // Scenario 1: the above if was false because offset is not <
+    //             posting_list_used->size_in_bytes()
+    //   In this case, offset must be == posting_list_used->size_in_bytes()
+    //   because we reached offset by unwinding hits on the posting list.
+    // Scenario 2: offset is < kSpecialHitSize
+    //   In this case, offset is guaranteed to be either 0 or sizeof(Hit)
+    //   because offset is incremented by sizeof(Hit) within the first while
+    //   loop.
+    // Scenario 3: offset is within the compressed region and the new first hit
+    //   in the posting list (the value that 'val' holds) will fit as an
+    //   uncompressed hit in the compressed region. The resulting offset from
+    //   decompressing val must be >= kSpecialHitSize because otherwise we'd be
+    //   in Scenario 4
+    // Scenario 4: offset is within the compressed region, but the new first hit
+    //   in the posting list is too large to fit as an uncompressed hit in the
+    //   in the compressed region. Therefore, it must be stored in a special hit
+    //   and offset will be sizeof(Hit).
+    SetStartByteOffset(mutable_posting_list_used, offset);
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<Hit> PostingListHitSerializer::GetSpecialHit(
+    const PostingListUsed* posting_list_used, uint32_t index) const {
+  static_assert(sizeof(Hit::Value) >= sizeof(uint32_t), "HitTooSmall");
+  if (index >= kNumSpecialData || index < 0) {
+    return absl_ports::InvalidArgumentError(
+        "Special hits only exist at indices 0 and 1");
+  }
+  Hit val;
+  memcpy(&val, posting_list_used->posting_list_buffer() + index * sizeof(val),
+         sizeof(val));
+  return val;
+}
+
+bool PostingListHitSerializer::SetSpecialHit(PostingListUsed* posting_list_used,
+                                             uint32_t index,
+                                             const Hit& val) const {
+  if (index >= kNumSpecialData || index < 0) {
+    ICING_LOG(ERROR) << "Special hits only exist at indices 0 and 1";
+    return false;
+  }
+  memcpy(posting_list_used->posting_list_buffer() + index * sizeof(val), &val,
+         sizeof(val));
+  return true;
+}
+
+bool PostingListHitSerializer::IsPostingListValid(
+    const PostingListUsed* posting_list_used) const {
+  if (IsAlmostFull(posting_list_used)) {
+    // Special Hit 1 should hold a Hit. Calling ValueOrDie is safe because we
+    // know that 1 < kNumSpecialData.
+    if (!GetSpecialHit(posting_list_used, /*index=*/1)
+             .ValueOrDie()
+             .is_valid()) {
+      ICING_LOG(ERROR)
+          << "Both special hits cannot be invalid at the same time.";
+      return false;
+    }
+  } else if (!IsFull(posting_list_used)) {
+    // NOT_FULL. Special Hit 0 should hold a valid offset. Calling ValueOrDie is
+    // safe because we know that 0 < kNumSpecialData.
+    if (GetSpecialHit(posting_list_used, /*index=*/0).ValueOrDie().value() >
+            posting_list_used->size_in_bytes() ||
+        GetSpecialHit(posting_list_used, /*index=*/0).ValueOrDie().value() <
+            kSpecialHitsSize) {
+      ICING_LOG(ERROR)
+          << "Hit: "
+          << GetSpecialHit(posting_list_used, /*index=*/0).ValueOrDie().value()
+          << " size: " << posting_list_used->size_in_bytes()
+          << " sp size: " << kSpecialHitsSize;
+      return false;
+    }
+  }
+  return true;
+}
+
+uint32_t PostingListHitSerializer::GetStartByteOffset(
+    const PostingListUsed* posting_list_used) const {
+  if (IsFull(posting_list_used)) {
+    return 0;
+  } else if (IsAlmostFull(posting_list_used)) {
+    return sizeof(Hit);
+  } else {
+    // NOT_FULL, calling ValueOrDie is safe because we know that 0 <
+    // kNumSpecialData.
+    return GetSpecialHit(posting_list_used, /*index=*/0).ValueOrDie().value();
+  }
+}
+
+bool PostingListHitSerializer::SetStartByteOffset(
+    PostingListUsed* posting_list_used, uint32_t offset) const {
+  if (offset > posting_list_used->size_in_bytes()) {
+    ICING_LOG(ERROR) << "offset cannot be a value greater than size "
+                     << posting_list_used->size_in_bytes() << ". offset is "
+                     << offset << ".";
+    return false;
+  }
+  if (offset < kSpecialHitsSize && offset > sizeof(Hit)) {
+    ICING_LOG(ERROR) << "offset cannot be a value between (" << sizeof(Hit)
+                     << ", " << kSpecialHitsSize << "). offset is " << offset
+                     << ".";
+    return false;
+  }
+  if (offset < sizeof(Hit) && offset != 0) {
+    ICING_LOG(ERROR) << "offset cannot be a value between (0, " << sizeof(Hit)
+                     << "). offset is " << offset << ".";
+    return false;
+  }
+  if (offset >= kSpecialHitsSize) {
+    // not_full state. Safe to ignore the return value because 0 and 1 are both
+    // < kNumSpecialData.
+    SetSpecialHit(posting_list_used, /*index=*/0, Hit(offset));
+    SetSpecialHit(posting_list_used, /*index=*/1, Hit());
+  } else if (offset == sizeof(Hit)) {
+    // almost_full state. Safe to ignore the return value because 1 is both <
+    // kNumSpecialData.
+    SetSpecialHit(posting_list_used, /*index=*/0, Hit());
+  }
+  // Nothing to do for the FULL state - the offset isn't actually stored
+  // anywhere and both special hits hold valid hits.
+  return true;
+}
+
+libtextclassifier3::StatusOr<uint32_t>
+PostingListHitSerializer::PrependHitUncompressed(
+    PostingListUsed* posting_list_used, const Hit& hit, uint32_t offset) const {
+  if (hit.has_term_frequency()) {
+    if (offset < kSpecialHitsSize + sizeof(Hit)) {
+      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+          "Not enough room to prepend Hit at offset %d.", offset));
+    }
+    offset -= sizeof(Hit);
+    memcpy(posting_list_used->posting_list_buffer() + offset, &hit,
+           sizeof(Hit));
+  } else {
+    if (offset < kSpecialHitsSize + sizeof(Hit::Value)) {
+      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+          "Not enough room to prepend Hit::Value at offset %d.", offset));
+    }
+    offset -= sizeof(Hit::Value);
+    Hit::Value val = hit.value();
+    memcpy(posting_list_used->posting_list_buffer() + offset, &val,
+           sizeof(Hit::Value));
+  }
+  return offset;
+}
+
+libtextclassifier3::Status
+PostingListHitSerializer::ConsumeTermFrequencyIfPresent(
+    const PostingListUsed* posting_list_used, Hit* hit,
+    uint32_t* offset) const {
+  if (!hit->has_term_frequency()) {
+    // No term frequency to consume. Everything is fine.
+    return libtextclassifier3::Status::OK;
+  }
+  if (*offset + sizeof(Hit::TermFrequency) >
+      posting_list_used->size_in_bytes()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "offset %d must not point past the end of the posting list of size %d.",
+        *offset, posting_list_used->size_in_bytes()));
+  }
+  Hit::TermFrequency term_frequency;
+  memcpy(&term_frequency, posting_list_used->posting_list_buffer() + *offset,
+         sizeof(Hit::TermFrequency));
+  *hit = Hit(hit->value(), term_frequency);
+  *offset += sizeof(Hit::TermFrequency);
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/main/posting-list-hit-serializer.h b/icing/index/main/posting-list-hit-serializer.h
new file mode 100644
index 0000000..2986d9c
--- /dev/null
+++ b/icing/index/main/posting-list-hit-serializer.h
@@ -0,0 +1,345 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_MAIN_POSTING_LIST_HIT_SERIALIZER_H_
+#define ICING_INDEX_MAIN_POSTING_LIST_HIT_SERIALIZER_H_
+
+#include <cstdint>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/index/hit/hit.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// A serializer class to serialize hits to PostingListUsed. Layout described in
+// comments in posting-list-hit-serializer.cc.
+class PostingListHitSerializer : public PostingListSerializer {
+ public:
+  static constexpr uint32_t kSpecialHitsSize = kNumSpecialData * sizeof(Hit);
+
+  uint32_t GetDataTypeBytes() const override { return sizeof(Hit); }
+
+  uint32_t GetMinPostingListSize() const override {
+    static constexpr uint32_t kMinPostingListSize = kSpecialHitsSize;
+    static_assert(sizeof(PostingListIndex) <= kMinPostingListSize,
+                  "PostingListIndex must be small enough to fit in a "
+                  "minimum-sized Posting List.");
+
+    return kMinPostingListSize;
+  }
+
+  uint32_t GetMinPostingListSizeToFit(
+      const PostingListUsed* posting_list_used) const override;
+
+  uint32_t GetBytesUsed(
+      const PostingListUsed* posting_list_used) const override;
+
+  void Clear(PostingListUsed* posting_list_used) const override;
+
+  libtextclassifier3::Status MoveFrom(PostingListUsed* dst,
+                                      PostingListUsed* src) const override;
+
+  // Prepend a hit to the posting list.
+  //
+  // RETURNS:
+  //   - INVALID_ARGUMENT if !hit.is_valid() or if hit is not less than the
+  //       previously added hit.
+  //   - RESOURCE_EXHAUSTED if there is no more room to add hit to the posting
+  //       list.
+  libtextclassifier3::Status PrependHit(PostingListUsed* posting_list_used,
+                                        const Hit& hit) const;
+
+  // Prepend hits to the posting list. Hits should be sorted in descending order
+  // (as defined by the less than operator for Hit)
+  //
+  // Returns the number of hits that could be prepended to the posting list. If
+  // keep_prepended is true, whatever could be prepended is kept, otherwise the
+  // posting list is left in its original state.
+  template <class T, Hit (*GetHit)(const T&)>
+  libtextclassifier3::StatusOr<uint32_t> PrependHitArray(
+      PostingListUsed* posting_list_used, const T* array, uint32_t num_hits,
+      bool keep_prepended) const;
+
+  // Retrieves the hits stored in the posting list.
+  //
+  // RETURNS:
+  //   - On success, a vector of hits sorted by the reverse order of prepending.
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::StatusOr<std::vector<Hit>> GetHits(
+      const PostingListUsed* posting_list_used) const;
+
+  // Same as GetHits but appends hits to hits_out.
+  //
+  // RETURNS:
+  //   - On success, a vector of hits sorted by the reverse order of prepending.
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::Status GetHits(const PostingListUsed* posting_list_used,
+                                     std::vector<Hit>* hits_out) const;
+
+  // Undo the last num_hits hits prepended. If num_hits > number of
+  // hits we clear all hits.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::Status PopFrontHits(PostingListUsed* posting_list_used,
+                                          uint32_t num_hits) const;
+
+ private:
+  // Posting list layout formats:
+  //
+  // not_full
+  //
+  // +-----------------+----------------+-------+-----------------+
+  // |hits-start-offset|Hit::kInvalidVal|xxxxxxx|(compressed) hits|
+  // +-----------------+----------------+-------+-----------------+
+  //
+  // almost_full
+  //
+  // +-----------------+----------------+-------+-----------------+
+  // |Hit::kInvalidVal |1st hit         |(pad)  |(compressed) hits|
+  // +-----------------+----------------+-------+-----------------+
+  //
+  // full()
+  //
+  // +-----------------+----------------+-------+-----------------+
+  // |1st hit          |2nd hit         |(pad)  |(compressed) hits|
+  // +-----------------+----------------+-------+-----------------+
+  //
+  // The first two uncompressed hits also implicitly encode information about
+  // the size of the compressed hits region.
+  //
+  // 1. If the posting list is NOT_FULL, then
+  // posting_list_buffer_[0] contains the byte offset of the start of the
+  // compressed hits - and, thus, the size of the compressed hits region is
+  // size_in_bytes - posting_list_buffer_[0].
+  //
+  // 2. If posting list is ALMOST_FULL or FULL, then the compressed hits region
+  // starts somewhere between [kSpecialHitsSize, kSpecialHitsSize + sizeof(Hit)
+  // - 1] and ends at size_in_bytes - 1.
+  //
+  // Hit term frequencies are stored after the hit value, compressed or
+  // uncompressed. For the first two special hits, we always have a
+  // space for the term frequency. For hits in the compressed area, we only have
+  // the term frequency following the hit value of hit.has_term_frequency() is
+  // true. This allows good compression in the common case where hits don't have
+  // a valid term frequency.
+  //
+  // EXAMPLE
+  // Posting list storage. Posting list size: 20 bytes
+  // EMPTY!
+  // +--bytes 0-4--+----- 5-9 ------+---------------- 10-19 -----------------+
+  // |     20      |Hit::kInvalidVal|                 0x000                  |
+  // +-------------+----------------+----------------+-----------------------+
+  //
+  // Add Hit 0x07FFF998 (DocumentId = 12, SectionId = 3, Flags = 0)
+  // NOT FULL!
+  // +--bytes 0-4--+----- 5-9 ------+----- 10-15 -----+-------- 16-19 -------+
+  // |     16      |Hit::kInvalidVal|      0x000      |       0x07FFF998     |
+  // +-------------+----------------+-----------------+----------------------+
+  //
+  // Add Hit 0x07FFF684 (DocumentId = 18, SectionId = 0, Flags = 4,
+  // TermFrequency=125)
+  // (Hit 0x07FFF998 - Hit 0x07FFF684 = 788)
+  // +--bytes 0-4--+----- 5-9 ------+-- 10-12 --+-- 13-16 --+- 17 -+-- 18-19 --+
+  // |      13     |Hit::kInvalidVal|   0x000   | 0x07FFF684| 125  |    788    |
+  // +-------------+----------------+-----------+-----------+------+-----------+
+  //
+  // Add Hit 0x07FFF4D2 (DocumentId = 22, SectionId = 10, Flags = 2)
+  // (Hit 0x07FFF684 - Hit 0x07FFF4D2 = 434)
+  // +--bytes 0-4--+--- 5-9 ----+-- 10 --+-- 11-14 -+- 15-16 -+- 17 -+- 18-19 -+
+  // |      9      |Hit::kInvVal|  0x00  |0x07FFF4D2|   434   | 125  |   788   |
+  // +-------------+------------+--------+----------+---------+------+---------+
+  //
+  // Add Hit 0x07FFF40E (DocumentId = 23, SectionId = 1, Flags = 6,
+  // TermFrequency = 87)
+  // (Hit 0x07FFF684 - Hit 0x07FFF4D2 = 196) ALMOST FULL!
+  // +--bytes 0-4-+---- 5-9 ----+- 10-12 -+- 13-14 -+- 15-16 -+- 17 -+- 18-19 -+
+  // |Hit::kInvVal|0x07FFF40E,87|  0x000  |    196  |   434   |  125 |   788   |
+  // +-------------+------------+---------+---------+---------+------+---------+
+  //
+  // Add Hit 0x07FFF320 (DocumentId = 27, SectionId = 4, Flags = 0)
+  // FULL!
+  // +--bytes 0-4--+---- 5-9 ----+- 10-13 -+-- 14-15 -+- 16-17 -+- 18 -+- 19-20
+  // -+ | 0x07FFF320  |0x07FFF40E,87|  0x000  |    196   |   434   |  125 | 788
+  // |
+  // +-------------+-------------+---------+----------+---------+------+---------+
+
+  // Helpers to determine what state the posting list is in.
+  bool IsFull(const PostingListUsed* posting_list_used) const {
+    return GetSpecialHit(posting_list_used, /*index=*/0)
+               .ValueOrDie()
+               .is_valid() &&
+           GetSpecialHit(posting_list_used, /*index=*/1)
+               .ValueOrDie()
+               .is_valid();
+  }
+
+  bool IsAlmostFull(const PostingListUsed* posting_list_used) const {
+    return !GetSpecialHit(posting_list_used, /*index=*/0)
+                .ValueOrDie()
+                .is_valid();
+  }
+
+  bool IsEmpty(const PostingListUsed* posting_list_used) const {
+    return GetSpecialHit(posting_list_used, /*index=*/0).ValueOrDie().value() ==
+               posting_list_used->size_in_bytes() &&
+           !GetSpecialHit(posting_list_used, /*index=*/1)
+                .ValueOrDie()
+                .is_valid();
+  }
+
+  // Returns false if both special hits are invalid or if the offset value
+  // stored in the special hit is less than kSpecialHitsSize or greater than
+  // posting_list_used->size_in_bytes(). Returns true, otherwise.
+  bool IsPostingListValid(const PostingListUsed* posting_list_used) const;
+
+  // Prepend hit to a posting list that is in the ALMOST_FULL state.
+  // RETURNS:
+  //  - OK, if successful
+  //  - INVALID_ARGUMENT if hit is not less than the previously added hit.
+  libtextclassifier3::Status PrependHitToAlmostFull(
+      PostingListUsed* posting_list_used, const Hit& hit) const;
+
+  // Prepend hit to a posting list that is in the EMPTY state. This will always
+  // succeed because there are no pre-existing hits and no validly constructed
+  // posting list could fail to fit one hit.
+  void PrependHitToEmpty(PostingListUsed* posting_list_used,
+                         const Hit& hit) const;
+
+  // Prepend hit to a posting list that is in the NOT_FULL state.
+  // RETURNS:
+  //  - OK, if successful
+  //  - INVALID_ARGUMENT if hit is not less than the previously added hit.
+  libtextclassifier3::Status PrependHitToNotFull(
+      PostingListUsed* posting_list_used, const Hit& hit,
+      uint32_t offset) const;
+
+  // Returns either 0 (full state), sizeof(Hit) (almost_full state) or
+  // a byte offset between kSpecialHitsSize and
+  // posting_list_used->size_in_bytes() (inclusive) (not_full state).
+  uint32_t GetStartByteOffset(const PostingListUsed* posting_list_used) const;
+
+  // Sets the special hits to properly reflect what offset is (see layout
+  // comment for further details).
+  //
+  // Returns false if offset > posting_list_used->size_in_bytes() or offset is
+  // (kSpecialHitsSize, sizeof(Hit)) or offset is (sizeof(Hit), 0). True,
+  // otherwise.
+  bool SetStartByteOffset(PostingListUsed* posting_list_used,
+                          uint32_t offset) const;
+
+  // Manipulate padded areas. We never store the same hit value twice
+  // so a delta of 0 is a pad byte.
+
+  // Returns offset of first non-pad byte.
+  uint32_t GetPadEnd(const PostingListUsed* posting_list_used,
+                     uint32_t offset) const;
+
+  // Fill padding between offset start and offset end with 0s.
+  // Returns false if end > posting_list_used->size_in_bytes(). True,
+  // otherwise.
+  bool PadToEnd(PostingListUsed* posting_list_used, uint32_t start,
+                uint32_t end) const;
+
+  // Helper for AppendHits/PopFrontHits. Adds limit number of hits to out or all
+  // hits in the posting list if the posting list contains less than limit
+  // number of hits. out can be NULL.
+  //
+  // NOTE: If called with limit=1, pop=true on a posting list that transitioned
+  // from NOT_FULL directly to FULL, GetHitsInternal will not return the posting
+  // list to NOT_FULL. Instead it will leave it in a valid state, but it will be
+  // ALMOST_FULL.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::Status GetHitsInternal(
+      const PostingListUsed* posting_list_used, uint32_t limit, bool pop,
+      std::vector<Hit>* out) const;
+
+  // Retrieves the value stored in the index-th special hit.
+  //
+  // RETURNS:
+  //   - A valid Hit, on success
+  //   - INVALID_ARGUMENT if index is not less than kNumSpecialData
+  libtextclassifier3::StatusOr<Hit> GetSpecialHit(
+      const PostingListUsed* posting_list_used, uint32_t index) const;
+
+  // Sets the value stored in the index-th special hit to val. If index is not
+  // less than kSpecialHitSize / sizeof(Hit), this has no effect.
+  bool SetSpecialHit(PostingListUsed* posting_list_used, uint32_t index,
+                     const Hit& val) const;
+
+  // Prepends hit to the memory region [offset - sizeof(Hit), offset] and
+  // returns the new beginning of the padded region.
+  //
+  // RETURNS:
+  //   - The new beginning of the padded region, if successful.
+  //   - INVALID_ARGUMENT if hit will not fit (uncompressed) between offset and
+  // kSpecialHitsSize
+  libtextclassifier3::StatusOr<uint32_t> PrependHitUncompressed(
+      PostingListUsed* posting_list_used, const Hit& hit,
+      uint32_t offset) const;
+
+  // If hit has a term frequency, consumes the term frequency at offset, updates
+  // hit to include the term frequency and updates offset to reflect that the
+  // term frequency has been consumed.
+  //
+  // RETURNS:
+  //   - OK, if successful
+  //   - INVALID_ARGUMENT if hit has a term frequency and offset +
+  //     sizeof(Hit::TermFrequency) >= posting_list_used->size_in_bytes()
+  libtextclassifier3::Status ConsumeTermFrequencyIfPresent(
+      const PostingListUsed* posting_list_used, Hit* hit,
+      uint32_t* offset) const;
+};
+
+// Inlined functions. Implementation details below. Avert eyes!
+template <class T, Hit (*GetHit)(const T&)>
+libtextclassifier3::StatusOr<uint32_t>
+PostingListHitSerializer::PrependHitArray(PostingListUsed* posting_list_used,
+                                          const T* array, uint32_t num_hits,
+                                          bool keep_prepended) const {
+  if (!IsPostingListValid(posting_list_used)) {
+    return 0;
+  }
+
+  // Prepend hits working backwards from array[num_hits - 1].
+  uint32_t i;
+  for (i = 0; i < num_hits; ++i) {
+    if (!PrependHit(posting_list_used, GetHit(array[num_hits - i - 1])).ok()) {
+      break;
+    }
+  }
+  if (i != num_hits && !keep_prepended) {
+    // Didn't fit. Undo everything and check that we have the same offset as
+    // before. PopFrontHits guarantees that it will remove all 'i' hits so long
+    // as there are at least 'i' hits in the posting list, which we know there
+    // are.
+    ICING_RETURN_IF_ERROR(PopFrontHits(posting_list_used, /*num_hits=*/i));
+  }
+  return i;
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_MAIN_POSTING_LIST_HIT_SERIALIZER_H_
diff --git a/icing/index/main/posting-list-hit-serializer_test.cc b/icing/index/main/posting-list-hit-serializer_test.cc
new file mode 100644
index 0000000..7f0b945
--- /dev/null
+++ b/icing/index/main/posting-list-hit-serializer_test.cc
@@ -0,0 +1,731 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/main/posting-list-hit-serializer.h"
+
+#include <cstdint>
+#include <deque>
+#include <memory>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/hit-test-utils.h"
+
+using testing::ElementsAre;
+using testing::ElementsAreArray;
+using testing::Eq;
+using testing::IsEmpty;
+using testing::Le;
+using testing::Lt;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+struct HitElt {
+  HitElt() = default;
+  explicit HitElt(const Hit &hit_in) : hit(hit_in) {}
+
+  static Hit get_hit(const HitElt &hit_elt) { return hit_elt.hit; }
+
+  Hit hit;
+};
+
+TEST(PostingListHitSerializerTest, PostingListUsedPrependHitNotFull) {
+  PostingListHitSerializer serializer;
+
+  static const int kNumHits = 2551;
+  static const size_t kHitsSize = kNumHits * sizeof(Hit);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, kHitsSize));
+
+  // Make used.
+  Hit hit0(/*section_id=*/0, 0, /*term_frequency=*/56);
+  ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit0));
+  // Size = sizeof(uncompressed hit0)
+  int expected_size = sizeof(Hit);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit0)));
+
+  Hit hit1(/*section_id=*/0, 1, Hit::kDefaultTermFrequency);
+  ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit1));
+  // Size = sizeof(uncompressed hit1)
+  //        + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
+  expected_size += 2 + sizeof(Hit::TermFrequency);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit1, hit0)));
+
+  Hit hit2(/*section_id=*/0, 2, /*term_frequency=*/56);
+  ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit2));
+  // Size = sizeof(uncompressed hit2)
+  //        + sizeof(hit1-hit2)
+  //        + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
+  expected_size += 2;
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
+
+  Hit hit3(/*section_id=*/0, 3, Hit::kDefaultTermFrequency);
+  ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit3));
+  // Size = sizeof(uncompressed hit3)
+  //        + sizeof(hit2-hit3) + sizeof(hit2::term_frequency)
+  //        + sizeof(hit1-hit2)
+  //        + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
+  expected_size += 2 + sizeof(Hit::TermFrequency);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0)));
+}
+
+TEST(PostingListHitSerializerTest, PostingListUsedPrependHitAlmostFull) {
+  PostingListHitSerializer serializer;
+
+  int size = 2 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  // Fill up the compressed region.
+  // Transitions:
+  // Adding hit0: EMPTY -> NOT_FULL
+  // Adding hit1: NOT_FULL -> NOT_FULL
+  // Adding hit2: NOT_FULL -> NOT_FULL
+  Hit hit0(/*section_id=*/0, 0, Hit::kDefaultTermFrequency);
+  Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2);
+  Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2);
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0));
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1));
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit2));
+  // Size used will be 2+2+4=8 bytes
+  int expected_size = sizeof(Hit::Value) + 2 + 2;
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
+
+  // Add one more hit to transition NOT_FULL -> ALMOST_FULL
+  Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/3);
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit3));
+  // Compressed region would be 2+2+3+4=11 bytes, but the compressed region is
+  // only 10 bytes. So instead, the posting list will transition to ALMOST_FULL.
+  // The in-use compressed region will actually shrink from 8 bytes to 7 bytes
+  // because the uncompressed version of hit2 will be overwritten with the
+  // compressed delta of hit2. hit3 will be written to one of the special hits.
+  // Because we're in ALMOST_FULL, the expected size is the size of the pl minus
+  // the one hit used to mark the posting list as ALMOST_FULL.
+  expected_size = size - sizeof(Hit);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0)));
+
+  // Add one more hit to transition ALMOST_FULL -> ALMOST_FULL
+  Hit hit4 = CreateHit(hit3, /*desired_byte_length=*/2);
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit4));
+  // There are currently 7 bytes in use in the compressed region. hit3 will have
+  // a 2-byte delta. That delta will fit in the compressed region (which will
+  // now have 9 bytes in use), hit4 will be placed in one of the special hits
+  // and the posting list will remain in ALMOST_FULL.
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit4, hit3, hit2, hit1, hit0)));
+
+  // Add one more hit to transition ALMOST_FULL -> FULL
+  Hit hit5 = CreateHit(hit4, /*desired_byte_length=*/2);
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit5));
+  // There are currently 9 bytes in use in the compressed region. hit4 will have
+  // a 2-byte delta which will not fit in the compressed region. So hit4 will
+  // remain in one of the special hits and hit5 will occupy the other, making
+  // the posting list FULL.
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(size));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit5, hit4, hit3, hit2, hit1, hit0)));
+
+  // The posting list is FULL. Adding another hit should fail.
+  Hit hit6 = CreateHit(hit5, /*desired_byte_length=*/1);
+  EXPECT_THAT(serializer.PrependHit(&pl_used, hit6),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST(PostingListHitSerializerTest, PostingListUsedMinSize) {
+  PostingListHitSerializer serializer;
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(
+          &serializer, serializer.GetMinPostingListSize()));
+  // PL State: EMPTY
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
+  EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
+
+  // Add a hit, PL should shift to ALMOST_FULL state
+  Hit hit0(/*section_id=*/0, 0, /*term_frequency=*/0,
+           /*is_in_prefix_section=*/false,
+           /*is_prefix_hit=*/true);
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0));
+  // Size = sizeof(uncompressed hit0)
+  int expected_size = sizeof(Hit);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit0)));
+
+  // Add the smallest hit possible - no term_frequency and a delta of 1. PL
+  // should shift to FULL state.
+  Hit hit1(/*section_id=*/0, 0, /*term_frequency=*/0,
+           /*is_in_prefix_section=*/true,
+           /*is_prefix_hit=*/false);
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1));
+  // Size = sizeof(uncompressed hit1) + sizeof(uncompressed hit0)
+  expected_size += sizeof(Hit);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit1, hit0)));
+
+  // Try to add the smallest hit possible. Should fail
+  Hit hit2(/*section_id=*/0, 0, /*term_frequency=*/0,
+           /*is_in_prefix_section=*/false,
+           /*is_prefix_hit=*/false);
+  EXPECT_THAT(serializer.PrependHit(&pl_used, hit2),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit1, hit0)));
+}
+
+TEST(PostingListHitSerializerTest,
+     PostingListPrependHitArrayMinSizePostingList) {
+  PostingListHitSerializer serializer;
+
+  // Min Size = 10
+  int size = serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<HitElt> hits_in;
+  hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency));
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
+  std::reverse(hits_in.begin(), hits_in.end());
+
+  // Add five hits. The PL is in the empty state and an empty min size PL can
+  // only fit two hits. So PrependHitArray should fail.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t num_can_prepend,
+      (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+          &pl_used, &hits_in[0], hits_in.size(), false)));
+  EXPECT_THAT(num_can_prepend, Eq(2));
+
+  int can_fit_hits = num_can_prepend;
+  // The PL has room for 2 hits. We should be able to add them without any
+  // problem, transitioning the PL from EMPTY -> ALMOST_FULL -> FULL
+  const HitElt *hits_in_ptr = hits_in.data() + (hits_in.size() - 2);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      num_can_prepend, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+                           &pl_used, hits_in_ptr, can_fit_hits, false)));
+  EXPECT_THAT(num_can_prepend, Eq(can_fit_hits));
+  EXPECT_THAT(size, Eq(serializer.GetBytesUsed(&pl_used)));
+  std::deque<Hit> hits_pushed;
+  std::transform(hits_in.rbegin(),
+                 hits_in.rend() - hits_in.size() + can_fit_hits,
+                 std::front_inserter(hits_pushed), HitElt::get_hit);
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAreArray(hits_pushed)));
+}
+
+TEST(PostingListHitSerializerTest, PostingListPrependHitArrayPostingList) {
+  PostingListHitSerializer serializer;
+
+  // Size = 30
+  int size = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<HitElt> hits_in;
+  hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency));
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
+  std::reverse(hits_in.begin(), hits_in.end());
+  // The last hit is uncompressed and the four before it should only take one
+  // byte. Total use = 8 bytes.
+  // ----------------------
+  // 29     delta(Hit #1)
+  // 28     delta(Hit #2)
+  // 27     delta(Hit #3)
+  // 26     delta(Hit #4)
+  // 25-22  Hit #5
+  // 21-10  <unused>
+  // 9-5    kSpecialHit
+  // 4-0    Offset=22
+  // ----------------------
+  int byte_size = sizeof(Hit::Value) + hits_in.size() - 1;
+
+  // Add five hits. The PL is in the empty state and should be able to fit all
+  // five hits without issue, transitioning the PL from EMPTY -> NOT_FULL.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t num_could_fit,
+      (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+          &pl_used, &hits_in[0], hits_in.size(), false)));
+  EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
+  EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used)));
+  std::deque<Hit> hits_pushed;
+  std::transform(hits_in.rbegin(), hits_in.rend(),
+                 std::front_inserter(hits_pushed), HitElt::get_hit);
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAreArray(hits_pushed)));
+
+  Hit first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/1);
+  hits_in.clear();
+  hits_in.emplace_back(first_hit);
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2));
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2));
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/3));
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2));
+  std::reverse(hits_in.begin(), hits_in.end());
+  // Size increased by the deltas of these hits (1+2+1+2+3+2) = 11 bytes
+  // ----------------------
+  // 29     delta(Hit #1)
+  // 28     delta(Hit #2)
+  // 27     delta(Hit #3)
+  // 26     delta(Hit #4)
+  // 25     delta(Hit #5)
+  // 24-23  delta(Hit #6)
+  // 22     delta(Hit #7)
+  // 21-20  delta(Hit #8)
+  // 19-17  delta(Hit #9)
+  // 16-15  delta(Hit #10)
+  // 14-11  Hit #11
+  // 10  <unused>
+  // 9-5    kSpecialHit
+  // 4-0    Offset=11
+  // ----------------------
+  byte_size += 11;
+
+  // Add these 6 hits. The PL is currently in the NOT_FULL state and should
+  // remain in the NOT_FULL state.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      num_could_fit, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+                         &pl_used, &hits_in[0], hits_in.size(), false)));
+  EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
+  EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used)));
+  // All hits from hits_in were added.
+  std::transform(hits_in.rbegin(), hits_in.rend(),
+                 std::front_inserter(hits_pushed), HitElt::get_hit);
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAreArray(hits_pushed)));
+
+  first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/3);
+  hits_in.clear();
+  hits_in.emplace_back(first_hit);
+  // ----------------------
+  // 29     delta(Hit #1)
+  // 28     delta(Hit #2)
+  // 27     delta(Hit #3)
+  // 26     delta(Hit #4)
+  // 25     delta(Hit #5)
+  // 24-23  delta(Hit #6)
+  // 22     delta(Hit #7)
+  // 21-20  delta(Hit #8)
+  // 19-17  delta(Hit #9)
+  // 16-15  delta(Hit #10)
+  // 14-12  delta(Hit #11)
+  // 11-10  <unused>
+  // 9-5    Hit #12
+  // 4-0    kSpecialHit
+  // ----------------------
+  byte_size = 25;
+
+  // Add this 1 hit. The PL is currently in the NOT_FULL state and should
+  // transition to the ALMOST_FULL state - even though there is still some
+  // unused space.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      num_could_fit, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+                         &pl_used, &hits_in[0], hits_in.size(), false)));
+  EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
+  EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used)));
+  // All hits from hits_in were added.
+  std::transform(hits_in.rbegin(), hits_in.rend(),
+                 std::front_inserter(hits_pushed), HitElt::get_hit);
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAreArray(hits_pushed)));
+
+  first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/1);
+  hits_in.clear();
+  hits_in.emplace_back(first_hit);
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2));
+  std::reverse(hits_in.begin(), hits_in.end());
+  // ----------------------
+  // 29     delta(Hit #1)
+  // 28     delta(Hit #2)
+  // 27     delta(Hit #3)
+  // 26     delta(Hit #4)
+  // 25     delta(Hit #5)
+  // 24-23  delta(Hit #6)
+  // 22     delta(Hit #7)
+  // 21-20  delta(Hit #8)
+  // 19-17  delta(Hit #9)
+  // 16-15  delta(Hit #10)
+  // 14-12  delta(Hit #11)
+  // 11     delta(Hit #12)
+  // 10     <unused>
+  // 9-5    Hit #13
+  // 4-0    Hit #14
+  // ----------------------
+
+  // Add these 2 hits. The PL is currently in the ALMOST_FULL state. Adding the
+  // first hit should keep the PL in ALMOST_FULL because the delta between Hit
+  // #12 and Hit #13 (1 byte) can fit in the unused area (2 bytes). Adding the
+  // second hit should tranisition to the FULL state because the delta between
+  // Hit #13 and Hit #14 (2 bytes) is larger than the remaining unused area
+  // (1 byte).
+  ICING_ASSERT_OK_AND_ASSIGN(
+      num_could_fit, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+                         &pl_used, &hits_in[0], hits_in.size(), false)));
+  EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
+  EXPECT_THAT(size, Eq(serializer.GetBytesUsed(&pl_used)));
+  // All hits from hits_in were added.
+  std::transform(hits_in.rbegin(), hits_in.rend(),
+                 std::front_inserter(hits_pushed), HitElt::get_hit);
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAreArray(hits_pushed)));
+}
+
+TEST(PostingListHitSerializerTest, PostingListPrependHitArrayTooManyHits) {
+  PostingListHitSerializer serializer;
+
+  static constexpr int kNumHits = 128;
+  static constexpr int kDeltaSize = 1;
+  static constexpr int kTermFrequencySize = 1;
+  static constexpr size_t kHitsSize =
+      ((kNumHits * (kDeltaSize + kTermFrequencySize)) / 5) * 5;
+
+  // Create an array with one too many hits
+  std::vector<Hit> hits_in_too_many =
+      CreateHits(kNumHits + 1, /*desired_byte_length=*/1);
+  std::vector<HitElt> hit_elts_in_too_many;
+  for (const Hit &hit : hits_in_too_many) {
+    hit_elts_in_too_many.emplace_back(hit);
+  }
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(
+          &serializer, serializer.GetMinPostingListSize()));
+
+  // PrependHitArray should fail because hit_elts_in_too_many is far too large
+  // for the minimum size pl.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t num_could_fit,
+      (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+          &pl_used, &hit_elts_in_too_many[0], hit_elts_in_too_many.size(),
+          false)));
+  ASSERT_THAT(num_could_fit, Lt(hit_elts_in_too_many.size()));
+  ASSERT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
+  ASSERT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, kHitsSize));
+  // PrependHitArray should fail because hit_elts_in_too_many is one hit too
+  // large for this pl.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      num_could_fit, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+                         &pl_used, &hit_elts_in_too_many[0],
+                         hit_elts_in_too_many.size(), false)));
+  ASSERT_THAT(num_could_fit, Lt(hit_elts_in_too_many.size()));
+  ASSERT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
+  ASSERT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PostingListHitSerializerTest,
+     PostingListStatusJumpFromNotFullToFullAndBack) {
+  PostingListHitSerializer serializer;
+
+  const uint32_t pl_size = 3 * sizeof(Hit);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
+  ICING_ASSERT_OK(serializer.PrependHit(&pl, Hit(Hit::kInvalidValue - 1, 0)));
+  uint32_t bytes_used = serializer.GetBytesUsed(&pl);
+  // Status not full.
+  ASSERT_THAT(bytes_used,
+              Le(pl_size - PostingListHitSerializer::kSpecialHitsSize));
+  ICING_ASSERT_OK(serializer.PrependHit(&pl, Hit(Hit::kInvalidValue >> 2, 0)));
+  // Status should jump to full directly.
+  ASSERT_THAT(serializer.GetBytesUsed(&pl), Eq(pl_size));
+  ICING_ASSERT_OK(serializer.PopFrontHits(&pl, 1));
+  // Status should return to not full as before.
+  ASSERT_THAT(serializer.GetBytesUsed(&pl), Eq(bytes_used));
+}
+
+TEST(PostingListHitSerializerTest, DeltaOverflow) {
+  PostingListHitSerializer serializer;
+
+  const uint32_t pl_size = 4 * sizeof(Hit);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
+
+  static const Hit::Value kOverflow[4] = {
+      Hit::kInvalidValue >> 2,
+      (Hit::kInvalidValue >> 2) * 2,
+      (Hit::kInvalidValue >> 2) * 3,
+      Hit::kInvalidValue - 1,
+  };
+
+  // Fit at least 4 ordinary values.
+  for (Hit::Value v = 0; v < 4; v++) {
+    ICING_EXPECT_OK(serializer.PrependHit(&pl, Hit(4 - v)));
+  }
+
+  // Cannot fit 4 overflow values.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
+  ICING_EXPECT_OK(serializer.PrependHit(&pl, Hit(kOverflow[3])));
+  ICING_EXPECT_OK(serializer.PrependHit(&pl, Hit(kOverflow[2])));
+
+  // Can fit only one more.
+  ICING_EXPECT_OK(serializer.PrependHit(&pl, Hit(kOverflow[1])));
+  EXPECT_THAT(serializer.PrependHit(&pl, Hit(kOverflow[0])),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST(PostingListHitSerializerTest, MoveFrom) {
+  PostingListHitSerializer serializer;
+
+  int size = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used1,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<Hit> hits1 =
+      CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
+  for (const Hit &hit : hits1) {
+    ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used2,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<Hit> hits2 =
+      CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
+  for (const Hit &hit : hits2) {
+    ICING_ASSERT_OK(serializer.PrependHit(&pl_used2, hit));
+  }
+
+  ICING_ASSERT_OK(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1));
+  EXPECT_THAT(serializer.GetHits(&pl_used2),
+              IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
+  EXPECT_THAT(serializer.GetHits(&pl_used1), IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PostingListHitSerializerTest, MoveFromNullArgumentReturnsInvalidArgument) {
+  PostingListHitSerializer serializer;
+
+  int size = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used1,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<Hit> hits = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
+  for (const Hit &hit : hits) {
+    ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
+  }
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used1, /*src=*/nullptr),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(serializer.GetHits(&pl_used1),
+              IsOkAndHolds(ElementsAreArray(hits.rbegin(), hits.rend())));
+}
+
+TEST(PostingListHitSerializerTest,
+     MoveFromInvalidPostingListReturnsInvalidArgument) {
+  PostingListHitSerializer serializer;
+
+  int size = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used1,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<Hit> hits1 =
+      CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
+  for (const Hit &hit : hits1) {
+    ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used2,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<Hit> hits2 =
+      CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
+  for (const Hit &hit : hits2) {
+    ICING_ASSERT_OK(serializer.PrependHit(&pl_used2, hit));
+  }
+
+  // Write invalid hits to the beginning of pl_used1 to make it invalid.
+  Hit invalid_hit;
+  Hit *first_hit = reinterpret_cast<Hit *>(pl_used1.posting_list_buffer());
+  *first_hit = invalid_hit;
+  ++first_hit;
+  *first_hit = invalid_hit;
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(serializer.GetHits(&pl_used2),
+              IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
+}
+
+TEST(PostingListHitSerializerTest,
+     MoveToInvalidPostingListReturnsFailedPrecondition) {
+  PostingListHitSerializer serializer;
+
+  int size = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used1,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<Hit> hits1 =
+      CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
+  for (const Hit &hit : hits1) {
+    ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used2,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<Hit> hits2 =
+      CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
+  for (const Hit &hit : hits2) {
+    ICING_ASSERT_OK(serializer.PrependHit(&pl_used2, hit));
+  }
+
+  // Write invalid hits to the beginning of pl_used2 to make it invalid.
+  Hit invalid_hit;
+  Hit *first_hit = reinterpret_cast<Hit *>(pl_used2.posting_list_buffer());
+  *first_hit = invalid_hit;
+  ++first_hit;
+  *first_hit = invalid_hit;
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(serializer.GetHits(&pl_used1),
+              IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
+}
+
+TEST(PostingListHitSerializerTest, MoveToPostingListTooSmall) {
+  PostingListHitSerializer serializer;
+
+  int size = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used1,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<Hit> hits1 =
+      CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
+  for (const Hit &hit : hits1) {
+    ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used2,
+      PostingListUsed::CreateFromUnitializedRegion(
+          &serializer, serializer.GetMinPostingListSize()));
+  std::vector<Hit> hits2 =
+      CreateHits(/*num_hits=*/1, /*desired_byte_length=*/2);
+  for (const Hit &hit : hits2) {
+    ICING_ASSERT_OK(serializer.PrependHit(&pl_used2, hit));
+  }
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(serializer.GetHits(&pl_used1),
+              IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
+  EXPECT_THAT(serializer.GetHits(&pl_used2),
+              IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
+}
+
+TEST(PostingListHitSerializerTest, PopHitsWithScores) {
+  PostingListHitSerializer serializer;
+
+  int size = 2 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  // This posting list is 20-bytes. Create four hits that will have deltas of
+  // two bytes each and all of whom will have a non-default score. This posting
+  // list will be almost_full.
+  //
+  // ----------------------
+  // 19     score(Hit #0)
+  // 18-17  delta(Hit #0)
+  // 16     score(Hit #1)
+  // 15-14  delta(Hit #1)
+  // 13     score(Hit #2)
+  // 12-11  delta(Hit #2)
+  // 10     <unused>
+  // 9-5    Hit #3
+  // 4-0    kInvalidHitVal
+  // ----------------------
+  Hit hit0(/*section_id=*/0, /*document_id=*/0, /*score=*/5);
+  Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2);
+  Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2);
+  Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/2);
+  ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit0));
+  ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit1));
+  ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit2));
+  ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit3));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Hit> hits_out,
+                             serializer.GetHits(&pl_used));
+  EXPECT_THAT(hits_out, ElementsAre(hit3, hit2, hit1, hit0));
+
+  // Now, pop the last hit. The posting list should contain the first three
+  // hits.
+  //
+  // ----------------------
+  // 19     score(Hit #0)
+  // 18-17  delta(Hit #0)
+  // 16     score(Hit #1)
+  // 15-14  delta(Hit #1)
+  // 13-10  <unused>
+  // 9-5    Hit #2
+  // 4-0    kInvalidHitVal
+  // ----------------------
+  ICING_ASSERT_OK(serializer.PopFrontHits(&pl_used, 1));
+  ICING_ASSERT_OK_AND_ASSIGN(hits_out, serializer.GetHits(&pl_used));
+  EXPECT_THAT(hits_out, ElementsAre(hit2, hit1, hit0));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/numeric/doc-hit-info-iterator-numeric.h b/icing/index/numeric/doc-hit-info-iterator-numeric.h
new file mode 100644
index 0000000..7cdb230
--- /dev/null
+++ b/icing/index/numeric/doc-hit-info-iterator-numeric.h
@@ -0,0 +1,85 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_DOC_HIT_INFO_ITERATOR_NUMERIC_H_
+#define ICING_INDEX_NUMERIC_DOC_HIT_INFO_ITERATOR_NUMERIC_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class DocHitInfoIteratorNumeric : public DocHitInfoLeafIterator {
+ public:
+  explicit DocHitInfoIteratorNumeric(
+      std::unique_ptr<typename NumericIndex<T>::Iterator> numeric_index_iter)
+      : numeric_index_iter_(std::move(numeric_index_iter)) {}
+
+  libtextclassifier3::Status Advance() override {
+    // If the query property path doesn't exist (i.e. the storage doesn't
+    // exist), then numeric_index_iter_ will be nullptr.
+    if (numeric_index_iter_ == nullptr) {
+      return absl_ports::ResourceExhaustedError("End of iterator");
+    }
+
+    ICING_RETURN_IF_ERROR(numeric_index_iter_->Advance());
+
+    doc_hit_info_ = numeric_index_iter_->GetDocHitInfo();
+    return libtextclassifier3::Status::OK;
+  }
+
+  libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
+    return absl_ports::InvalidArgumentError(
+        "Cannot generate suggestion if the last term is numeric operator.");
+  }
+
+  CallStats GetCallStats() const override {
+    if (numeric_index_iter_ == nullptr) {
+      return CallStats();
+    }
+
+    return CallStats(/*num_leaf_advance_calls_lite_index_in=*/0,
+                     /*num_leaf_advance_calls_main_index_in=*/0,
+                     /*num_leaf_advance_calls_integer_index_in=*/
+                     numeric_index_iter_->GetNumAdvanceCalls(),
+                     /*num_leaf_advance_calls_no_index_in=*/0,
+                     /*num_blocks_inspected_in=*/
+                     numeric_index_iter_->GetNumBlocksInspected());
+  }
+
+  std::string ToString() const override { return "test"; }
+
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats,
+      SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+    // For numeric hit iterator, this should do nothing since there is no term.
+  }
+
+ private:
+  std::unique_ptr<typename NumericIndex<T>::Iterator> numeric_index_iter_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_NUMERIC_DOC_HIT_INFO_ITERATOR_NUMERIC_H_
diff --git a/icing/index/numeric/dummy-numeric-index.h b/icing/index/numeric/dummy-numeric-index.h
new file mode 100644
index 0000000..d18f2aa
--- /dev/null
+++ b/icing/index/numeric/dummy-numeric-index.h
@@ -0,0 +1,351 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_DUMMY_NUMERIC_INDEX_H_
+#define ICING_INDEX_NUMERIC_DUMMY_NUMERIC_INDEX_H_
+
+#include <cstdint>
+#include <functional>
+#include <map>
+#include <memory>
+#include <queue>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/doc-hit-info-iterator-numeric.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// DummyNumericIndex: dummy class to help with testing and unblock e2e
+// integration for numeric search. It stores all numeric index data (keys and
+// hits) in memory without actual persistent storages. All PersistentStorage
+// features do not work as expected, i.e. they don't persist any data into disk
+// and therefore data are volatile.
+template <typename T>
+class DummyNumericIndex : public NumericIndex<T> {
+ public:
+  static libtextclassifier3::StatusOr<std::unique_ptr<DummyNumericIndex<T>>>
+  Create(const Filesystem& filesystem, std::string working_path) {
+    auto dummy_numeric_index = std::unique_ptr<DummyNumericIndex<T>>(
+        new DummyNumericIndex<T>(filesystem, std::move(working_path)));
+    ICING_RETURN_IF_ERROR(dummy_numeric_index->InitializeNewStorage());
+    return dummy_numeric_index;
+  }
+
+  ~DummyNumericIndex() override = default;
+
+  std::unique_ptr<typename NumericIndex<T>::Editor> Edit(
+      std::string_view property_path, DocumentId document_id,
+      SectionId section_id) override {
+    return std::make_unique<Editor>(property_path, document_id, section_id,
+                                    storage_);
+  }
+
+  libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
+      std::string_view property_path, T key_lower, T key_upper,
+      const DocumentStore&, const SchemaStore&, int64_t) const override;
+
+  libtextclassifier3::Status Optimize(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      DocumentId new_last_added_document_id) override;
+
+  libtextclassifier3::Status Clear() override {
+    storage_.clear();
+    last_added_document_id_ = kInvalidDocumentId;
+    return libtextclassifier3::Status::OK;
+  }
+
+  DocumentId last_added_document_id() const override {
+    return last_added_document_id_;
+  }
+
+  void set_last_added_document_id(DocumentId document_id) override {
+    if (last_added_document_id_ == kInvalidDocumentId ||
+        document_id > last_added_document_id_) {
+      last_added_document_id_ = document_id;
+    }
+  }
+
+  int num_property_indices() const override { return storage_.size(); }
+
+ private:
+  class Editor : public NumericIndex<T>::Editor {
+   public:
+    explicit Editor(
+        std::string_view property_path, DocumentId document_id,
+        SectionId section_id,
+        std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>>&
+            storage)
+        : NumericIndex<T>::Editor(property_path, document_id, section_id),
+          storage_(storage) {}
+
+    ~Editor() override = default;
+
+    libtextclassifier3::Status BufferKey(T key) override {
+      seen_keys_.insert(key);
+      return libtextclassifier3::Status::OK;
+    }
+
+    libtextclassifier3::Status IndexAllBufferedKeys() && override;
+
+   private:
+    std::unordered_set<T> seen_keys_;
+    std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>>&
+        storage_;  // Does not own.
+  };
+
+  class Iterator : public NumericIndex<T>::Iterator {
+   public:
+    // We group BasicHits (sorted by document_id) of a key into a Bucket (stored
+    // as std::vector) and store key -> vector in an std::map. When doing range
+    // query, we may access vectors from multiple keys and want to return
+    // BasicHits to callers sorted by document_id. Therefore, this problem is
+    // actually "merge K sorted vectors".
+    // To implement this algorithm via priority_queue, we create this wrapper
+    // class to store iterators of map and vector.
+    class BucketInfo {
+     public:
+      explicit BucketInfo(
+          typename std::map<T, std::vector<BasicHit>>::const_iterator
+              bucket_iter)
+          : bucket_iter_(bucket_iter),
+            vec_iter_(bucket_iter_->second.rbegin()) {}
+
+      bool Advance() { return ++vec_iter_ != bucket_iter_->second.rend(); }
+
+      const BasicHit& GetCurrentBasicHit() const { return *vec_iter_; }
+
+      bool operator<(const BucketInfo& other) const {
+        // std::priority_queue is a max heap and we should return BasicHits in
+        // DocumentId descending order.
+        // - BucketInfo::operator< should have the same order as DocumentId.
+        // - BasicHit encodes inverted document id and its operator< compares
+        //   the encoded raw value directly.
+        // - Therefore, BucketInfo::operator< should compare BasicHit reversely.
+        // - This will make priority_queue return buckets in DocumentId
+        //   descending and SectionId ascending order.
+        // - Whatever direction we sort SectionId by (or pop by priority_queue)
+        //   doesn't matter because all hits for the same DocumentId will be
+        //   merged into a single DocHitInfo.
+        return other.GetCurrentBasicHit() < GetCurrentBasicHit();
+      }
+
+     private:
+      typename std::map<T, std::vector<BasicHit>>::const_iterator bucket_iter_;
+      std::vector<BasicHit>::const_reverse_iterator vec_iter_;
+    };
+
+    explicit Iterator(T key_lower, T key_upper,
+                      std::vector<BucketInfo>&& bucket_info_vec)
+        : NumericIndex<T>::Iterator(key_lower, key_upper),
+          pq_(std::less<BucketInfo>(), std::move(bucket_info_vec)),
+          num_advance_calls_(0) {}
+
+    ~Iterator() override = default;
+
+    libtextclassifier3::Status Advance() override;
+
+    DocHitInfo GetDocHitInfo() const override { return doc_hit_info_; }
+
+    int32_t GetNumAdvanceCalls() const override { return num_advance_calls_; }
+
+    int32_t GetNumBlocksInspected() const override { return 0; }
+
+   private:
+    std::priority_queue<BucketInfo> pq_;
+    DocHitInfo doc_hit_info_;
+
+    int32_t num_advance_calls_;
+  };
+
+  explicit DummyNumericIndex(const Filesystem& filesystem,
+                             std::string&& working_path)
+      : NumericIndex<T>(filesystem, std::move(working_path),
+                        PersistentStorage::WorkingPathType::kDummy),
+        dummy_crcs_buffer_(
+            std::make_unique<uint8_t[]>(sizeof(PersistentStorage::Crcs))),
+        last_added_document_id_(kInvalidDocumentId) {
+    memset(dummy_crcs_buffer_.get(), 0, sizeof(PersistentStorage::Crcs));
+  }
+
+  libtextclassifier3::Status PersistStoragesToDisk(bool force) override {
+    return libtextclassifier3::Status::OK;
+  }
+
+  libtextclassifier3::Status PersistMetadataToDisk(bool force) override {
+    return libtextclassifier3::Status::OK;
+  }
+
+  libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override {
+    return Crc32(0);
+  }
+
+  libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+      bool force) override {
+    return Crc32(0);
+  }
+
+  PersistentStorage::Crcs& crcs() override {
+    return *reinterpret_cast<PersistentStorage::Crcs*>(
+        dummy_crcs_buffer_.get());
+  }
+  const PersistentStorage::Crcs& crcs() const override {
+    return *reinterpret_cast<const PersistentStorage::Crcs*>(
+        dummy_crcs_buffer_.get());
+  }
+
+  std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>> storage_;
+  std::unique_ptr<uint8_t[]> dummy_crcs_buffer_;
+  DocumentId last_added_document_id_;
+};
+
+template <typename T>
+libtextclassifier3::Status
+DummyNumericIndex<T>::Editor::IndexAllBufferedKeys() && {
+  auto property_map_iter = storage_.find(this->property_path_);
+  if (property_map_iter == storage_.end()) {
+    const auto& [inserted_iter, insert_result] =
+        storage_.insert({this->property_path_, {}});
+    if (!insert_result) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Failed to create a new map for property \"",
+                             this->property_path_, "\""));
+    }
+    property_map_iter = inserted_iter;
+  }
+
+  for (const T& key : seen_keys_) {
+    auto key_map_iter = property_map_iter->second.find(key);
+    if (key_map_iter == property_map_iter->second.end()) {
+      const auto& [inserted_iter, insert_result] =
+          property_map_iter->second.insert({key, {}});
+      if (!insert_result) {
+        return absl_ports::InternalError("Failed to create a new map for key");
+      }
+      key_map_iter = inserted_iter;
+    }
+    key_map_iter->second.push_back(
+        BasicHit(this->section_id_, this->document_id_));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+libtextclassifier3::Status DummyNumericIndex<T>::Iterator::Advance() {
+  if (pq_.empty()) {
+    return absl_ports::ResourceExhaustedError("End of iterator");
+  }
+
+  DocumentId document_id = pq_.top().GetCurrentBasicHit().document_id();
+  doc_hit_info_ = DocHitInfo(document_id);
+  // Merge sections with same document_id into a single DocHitInfo
+  while (!pq_.empty() &&
+         pq_.top().GetCurrentBasicHit().document_id() == document_id) {
+    ++num_advance_calls_;
+    doc_hit_info_.UpdateSection(pq_.top().GetCurrentBasicHit().section_id());
+
+    BucketInfo info = pq_.top();
+    pq_.pop();
+
+    if (info.Advance()) {
+      pq_.push(std::move(info));
+    }
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+DummyNumericIndex<T>::GetIterator(std::string_view property_path, T key_lower,
+                                  T key_upper, const DocumentStore&,
+                                  const SchemaStore&, int64_t) const {
+  if (key_lower > key_upper) {
+    return absl_ports::InvalidArgumentError(
+        "key_lower should not be greater than key_upper");
+  }
+
+  auto property_map_iter = storage_.find(std::string(property_path));
+  if (property_map_iter == storage_.end()) {
+    // Return an empty iterator.
+    return std::make_unique<DocHitInfoIteratorNumeric<T>>(nullptr);
+  }
+
+  std::vector<typename Iterator::BucketInfo> bucket_info_vec;
+  for (auto key_map_iter = property_map_iter->second.lower_bound(key_lower);
+       key_map_iter != property_map_iter->second.cend() &&
+       key_map_iter->first <= key_upper;
+       ++key_map_iter) {
+    bucket_info_vec.push_back(typename Iterator::BucketInfo(key_map_iter));
+  }
+
+  return std::make_unique<DocHitInfoIteratorNumeric<T>>(
+      std::make_unique<Iterator>(key_lower, key_upper,
+                                 std::move(bucket_info_vec)));
+}
+
+template <typename T>
+libtextclassifier3::Status DummyNumericIndex<T>::Optimize(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    DocumentId new_last_added_document_id) {
+  std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>>
+      new_storage;
+
+  for (const auto& [property_path, old_property_map] : storage_) {
+    std::map<T, std::vector<BasicHit>> new_property_map;
+    for (const auto& [key, hits] : old_property_map) {
+      for (const BasicHit& hit : hits) {
+        DocumentId old_doc_id = hit.document_id();
+        if (old_doc_id >= document_id_old_to_new.size() ||
+            document_id_old_to_new[old_doc_id] == kInvalidDocumentId) {
+          continue;
+        }
+
+        new_property_map[key].push_back(
+            BasicHit(hit.section_id(), document_id_old_to_new[old_doc_id]));
+      }
+    }
+
+    if (!new_property_map.empty()) {
+      new_storage[property_path] = std::move(new_property_map);
+    }
+  }
+
+  storage_ = std::move(new_storage);
+  last_added_document_id_ = new_last_added_document_id;
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_NUMERIC_DUMMY_NUMERIC_INDEX_H_
diff --git a/icing/index/numeric/integer-index-bucket-util.cc b/icing/index/numeric/integer-index-bucket-util.cc
new file mode 100644
index 0000000..a05baab
--- /dev/null
+++ b/icing/index/numeric/integer-index-bucket-util.cc
@@ -0,0 +1,205 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index-bucket-util.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <utility>
+#include <vector>
+
+#include "icing/index/numeric/integer-index-data.h"
+
+namespace icing {
+namespace lib {
+
+namespace integer_index_bucket_util {
+
+namespace {
+
+// Helper function to determine if data slice [start, end) forms a "full
+// single-range bucket".
+//
+// Full single-range bucket: keys of all data are identical and # of them exceed
+// num_data_threshold.
+//
+// REQUIRES: data slice [start, end) are sorted by key.
+inline bool WouldBeFullSingleRangeBucket(
+    const std::vector<IntegerIndexData>::iterator& start,
+    const std::vector<IntegerIndexData>::iterator& end,
+    int32_t num_data_threshold) {
+  return std::distance(start, end) > num_data_threshold &&
+         start->key() == (end - 1)->key();
+}
+
+// Helper function to determine if a bucket is full single-range.
+//
+// REQUIRES:
+//   bucket.key_lower <= [bucket.start, bucket.end)->key() <= bucket.key_upper
+inline bool IsFullSingleRangeBucket(const DataRangeAndBucketInfo& bucket,
+                                    int32_t num_data_threshold) {
+  return bucket.key_lower == bucket.key_upper &&
+         WouldBeFullSingleRangeBucket(bucket.start, bucket.end,
+                                      num_data_threshold);
+}
+
+// Helper function to append new bucket(s) with corresponding data slice for
+// range [curr_key_lower, last_key] where last_key = (it_end - 1)->key().
+//
+// Also it handles an edge case:
+// If data slice [it_start, it_end) forms a "full single-range bucket" (see
+// WouldBeFullSingleRangeBucket for definition), then we have to put them into a
+// single range bucket [last_key, last_key] instead of [curr_key_lower,
+// last_key]. Also we have to deal with range [curr_key_lower, last_key - 1]:
+// - If the previous bucket exists and it is not a "full single-range bucket",
+//   then merge [curr_key_lower, last_key - 1] into the previous bucket, i.e.
+//   change the previous bucket's key_upper to (last_key - 1). Then we will end
+//   up having:
+//   - [prev_bucket.key_lower, last_key - 1]
+//   - [last_key, last_key]
+// - Otherwise, we have to create [curr_key_lower, last_key - 1] with
+//   empty data. Then we will end up having (Note: prev_bucket.key_upper ==
+//   curr_key_lower - 1):
+//   - [prev_bucket.key_lower, curr_key_lower - 1]
+//   - [curr_key_lower, last_key - 1]
+//   - [last_key, last_key]
+// This will avoid split bucket being called too frequently.
+// For example, original_key_lower = 0, original_key_upper = 50. If we have
+// (num_data_threshold + 1) data with key = 20 and another data with key = 40:
+// - Without this part, we will split them into [[0, 20], [21, 50]]. Then when
+//   adding data with key = 10 next round, we will invoke split again and split
+//   [0, 20] to [[0, 10], [11, 20]].
+// - With this part, we will split them into [[0, 19], [20, 20], [21, 50]],
+//   which will avoid splitting in the next round for key = 20.
+//
+// REQUIRES: it_start < it_end
+void AppendNewBuckets(const std::vector<IntegerIndexData>::iterator& it_start,
+                      const std::vector<IntegerIndexData>::iterator& it_end,
+                      int64_t curr_key_lower, int32_t num_data_threshold,
+                      std::vector<DataRangeAndBucketInfo>& results) {
+  int64_t last_key = (it_end - 1)->key();
+  if (curr_key_lower < last_key &&
+      WouldBeFullSingleRangeBucket(it_start, it_end, num_data_threshold)) {
+    if (!results.empty() &&
+        !IsFullSingleRangeBucket(results.back(), num_data_threshold)) {
+      // Previous bucket is not full single-range, so merge it to now hold the
+      // range [prev_bucket.key_lower, last_key - 1].
+      results.back().key_upper = last_key - 1;
+    } else {
+      // There is either no previous bucket or the previous bucket is full
+      // single-range. So add an empty bucket for the range [curr_key_lower,
+      // last_key - 1].
+      results.push_back(DataRangeAndBucketInfo(it_start, it_start,
+                                               curr_key_lower, last_key - 1));
+    }
+    curr_key_lower = last_key;
+  }
+  results.push_back(
+      DataRangeAndBucketInfo(it_start, it_end, curr_key_lower, last_key));
+}
+
+}  // namespace
+
+std::vector<DataRangeAndBucketInfo> Split(std::vector<IntegerIndexData>& data,
+                                          int64_t original_key_lower,
+                                          int64_t original_key_upper,
+                                          int32_t num_data_threshold) {
+  // Early return if there is no need to split.
+  if (data.size() <= num_data_threshold) {
+    return {DataRangeAndBucketInfo(data.begin(), data.end(), original_key_lower,
+                                   original_key_upper)};
+  }
+
+  // Sort data by key.
+  std::sort(
+      data.begin(), data.end(),
+      [](const IntegerIndexData& lhs, const IntegerIndexData& rhs) -> bool {
+        return lhs.key() < rhs.key();
+      });
+
+  std::vector<DataRangeAndBucketInfo> results;
+  int64_t curr_key_lower = original_key_lower;
+  // Sliding window [it_start, it_end) to separate data into different buckets.
+  auto it_start = data.begin();
+  auto it_end = data.begin();
+  while (it_end != data.end()) {
+    // Attempt to extend it_end by 1, but we have to include all data with the
+    // same key since they cannot be separated into different buckets. Also use
+    // extend_it_end to avoid modifying it_end directly. For some edge cases,
+    // the extension in a single round is extremely large (i.e. a lot of data
+    // have the same key), and we want to separate them. For example:
+    // - key = 0: 5 data
+    // - key = 1: num_data_threshold - 1 data
+    // In the second round, # of data in the sliding window will exceed the
+    // threshold. We want to separate all data with key = 0 into a single bucket
+    // instead of putting key = 0 and key = 1 together. Therefore, using
+    // extend_it_end allow us to preserve it_end of the previous round and be
+    // able to deal with this case.
+    auto extend_it_end = it_end + 1;
+    while (extend_it_end != data.end() &&
+           it_end->key() == extend_it_end->key()) {
+      ++extend_it_end;
+    }
+
+    if (std::distance(it_start, extend_it_end) > num_data_threshold &&
+        it_start != it_end) {
+      // Split data between [it_start, it_end) into range [curr_key_lower,
+      // (it_end - 1)->key()].
+      AppendNewBuckets(it_start, it_end, curr_key_lower, num_data_threshold,
+                       results);
+
+      // it_end at this moment won't be data.end(), so the last element of the
+      // new bucket can't have key == INT64_MAX. Therefore, it is safe to set
+      // curr_key_lower as ((it_end - 1)->key() + 1).
+      curr_key_lower = (it_end - 1)->key() + 1;
+      it_start = it_end;
+    }
+    it_end = extend_it_end;
+  }
+
+  // Handle the final range [curr_key_lower, original_key_upper].
+  if (curr_key_lower <= original_key_upper) {
+    if (it_start != it_end) {
+      AppendNewBuckets(it_start, it_end, curr_key_lower, num_data_threshold,
+                       results);
+
+      // AppendNewBuckets only handles range [curr_key_lower, (it_end -
+      // 1)->key()], so we have to handle range [(it_end - 1)->key() + 1,
+      // original_key_upper] if needed.
+      int64_t last_key = (it_end - 1)->key();
+      if (last_key != std::numeric_limits<int64_t>::max() &&
+          last_key + 1 <= original_key_upper) {
+        if (!results.empty() &&
+            !IsFullSingleRangeBucket(results.back(), num_data_threshold)) {
+          results.back().key_upper = original_key_upper;
+        } else {
+          results.push_back(DataRangeAndBucketInfo(
+              it_start, it_start, last_key + 1, original_key_upper));
+        }
+      }
+    } else {
+      results.push_back(DataRangeAndBucketInfo(it_start, it_end, curr_key_lower,
+                                               original_key_upper));
+    }
+  }
+
+  return results;
+}
+
+}  // namespace integer_index_bucket_util
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/numeric/integer-index-bucket-util.h b/icing/index/numeric/integer-index-bucket-util.h
new file mode 100644
index 0000000..d6fc245
--- /dev/null
+++ b/icing/index/numeric/integer-index-bucket-util.h
@@ -0,0 +1,81 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_INTEGER_INDEX_BUCKET_UTIL_H_
+#define ICING_INDEX_NUMERIC_INTEGER_INDEX_BUCKET_UTIL_H_
+
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "icing/index/numeric/integer-index-data.h"
+
+namespace icing {
+namespace lib {
+
+namespace integer_index_bucket_util {
+
+// A wrapper struct that contains information of a bucket.
+// - The bucket contains data within the iterator [start, end).
+// - Bucket range is [key_lower, key_upper], and all data within [start, end)
+//   should have keys in the bucket range.
+//
+// Note: the caller should make sure the lifecycle of data vector is longer than
+// instances of this wrapper struct.
+struct DataRangeAndBucketInfo {
+  std::vector<IntegerIndexData>::iterator start;
+  std::vector<IntegerIndexData>::iterator end;
+  int64_t key_lower;
+  int64_t key_upper;
+
+  explicit DataRangeAndBucketInfo(
+      std::vector<IntegerIndexData>::iterator start_in,
+      std::vector<IntegerIndexData>::iterator end_in, int64_t key_lower_in,
+      int64_t key_upper_in)
+      : start(std::move(start_in)),
+        end(std::move(end_in)),
+        key_lower(key_lower_in),
+        key_upper(key_upper_in) {}
+};
+
+// Helper function to split data (that are originally in a bucket with range
+// [original_key_lower, original_key_upper]) into different buckets according to
+// num_data_threshold.
+// - The input vector `data` will be sorted by key in ascending order (unless
+//   there's no need to split in which case data is returned unmodified)
+// - Data with the same key will be in the same bucket even if # of them exceed
+//   num_data_threshold.
+// - Range of all buckets will be disjoint, and the range union will be
+//   [original_key_lower, original_key_upper].
+// - Data slice (i.e. [start, end)) can be empty.
+//
+// REQUIRES:
+// - original_key_lower < original_key_upper
+// - num_data_threshold > 0
+// - Keys of all data are in range [original_key_lower, original_key_upper]
+//
+// Returns: a vector of DataRangeAndBucketInfo that contain all bucket info
+//   after splitting. Also the returned vector should contain at least one
+//   bucket, otherwise it is considered an error.
+std::vector<DataRangeAndBucketInfo> Split(std::vector<IntegerIndexData>& data,
+                                          int64_t original_key_lower,
+                                          int64_t original_key_upper,
+                                          int32_t num_data_threshold);
+
+}  // namespace integer_index_bucket_util
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_NUMERIC_INTEGER_INDEX_BUCKET_UTIL_H_
diff --git a/icing/index/numeric/integer-index-bucket-util_test.cc b/icing/index/numeric/integer-index-bucket-util_test.cc
new file mode 100644
index 0000000..82c593e
--- /dev/null
+++ b/icing/index/numeric/integer-index-bucket-util_test.cc
@@ -0,0 +1,1112 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index-bucket-util.h"
+
+#include <limits>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+namespace integer_index_bucket_util {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::Ne;
+using ::testing::SizeIs;
+
+static constexpr DocumentId kDefaultDocumentId = 123;
+static constexpr SectionId kDefaultSectionId = 31;
+
+TEST(IntegerIndexBucketUtilTest, Split_numDataNotDivisibleByThreshold) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)};
+  int64_t key_lower = -10;
+  int64_t key_upper = 10;
+  int32_t num_data_threshold = 3;
+  ASSERT_THAT(data.size() % num_data_threshold, Ne(0));
+
+  // Keys = [-10, -3, -2, 0, 1, 2, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, key_lower, key_upper, num_data_threshold);
+  ASSERT_THAT(results, SizeIs(3));
+  // Bucket 0: key lower = -10, key upper = -2, keys = [-10, -3, -2].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(-2));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+  // Bucket 1: key lower = -1, key upper = 2, keys = [0, 1, 2].
+  EXPECT_THAT(results[1].key_lower, Eq(-1));
+  EXPECT_THAT(results[1].key_upper, Eq(2));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+  // Bucket 2: key lower = 3, key upper = 10, keys = [10].
+  EXPECT_THAT(results[2].key_lower, Eq(3));
+  EXPECT_THAT(results[2].key_upper, Eq(10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[2].start, results[2].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest, Split_numDataDivisibleByThreshold) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)};
+  int64_t key_lower = -10;
+  int64_t key_upper = 10;
+  int32_t num_data_threshold = 3;
+  ASSERT_THAT(data.size() % num_data_threshold, Eq(0));
+
+  // Keys = [-10, -3, -2, 0, 2, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, key_lower, key_upper, num_data_threshold);
+  ASSERT_THAT(results, SizeIs(2));
+  // Bucket 0: key lower = -10, key upper = -2, keys = [-10, -3, -2].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(-2));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+  // Bucket 1: key lower = -1, key upper = 2, keys = [0, 2, 10].
+  EXPECT_THAT(results[1].key_lower, Eq(-1));
+  EXPECT_THAT(results[1].key_upper, Eq(10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest, Split_shouldIncludeOriginalKeyRange) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)};
+  int64_t key_lower = -1000;
+  int64_t key_upper = 1000;
+  int32_t num_data_threshold = 3;
+
+  // Keys = [-10, -3, -2, 0, 1, 2, 10].
+  // Split should include the original key_lower and key_upper even if there is
+  // no key at boundary.
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, key_lower, key_upper, num_data_threshold);
+  ASSERT_THAT(results, SizeIs(3));
+  // Bucket 0: key lower = -1000, key upper = -2, keys = [-10, -3, -2].
+  EXPECT_THAT(results[0].key_lower, Eq(-1000));
+  EXPECT_THAT(results[0].key_upper, Eq(-2));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+  // Bucket 1: key lower = -1, key upper = 2, keys = [0, 1, 2].
+  EXPECT_THAT(results[1].key_lower, Eq(-1));
+  EXPECT_THAT(results[1].key_upper, Eq(2));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+  // Bucket 2: key lower = 3, key upper = 1000, keys = [10].
+  EXPECT_THAT(results[2].key_lower, Eq(3));
+  EXPECT_THAT(results[2].key_upper, Eq(1000));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[2].start, results[2].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest, Split_singleBucketWithoutSplitting) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)};
+  int64_t key_lower = -1000;
+  int64_t key_upper = 1000;
+  int32_t num_data_threshold = 100;
+
+  // Keys = [-10, -3, -2, 0, 1, 2, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, key_lower, key_upper, num_data_threshold);
+  ASSERT_THAT(results, SizeIs(1));
+  // Bucket 0: key lower = -1000, key upper = 1000, keys = [-10, -3, -2, 0, 1,
+  // 2, 10]. Since # of data <= threshold, data vector won't be sorted and thus
+  // [start, end) will have data with the original order.
+  EXPECT_THAT(results[0].key_lower, Eq(-1000));
+  EXPECT_THAT(results[0].key_upper, Eq(1000));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 1),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+}
+
+TEST(IntegerIndexBucketUtilTest, Split_emptyData) {
+  std::vector<IntegerIndexData> empty_data;
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(empty_data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  ASSERT_THAT(results, SizeIs(1));
+  // Bucket 0: key lower = -10, key upper = 10, keys = [].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(10));
+  EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+              IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_sameKeysExceedingThreshold_firstBucket_keyEqualsKeyLower) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+  // Keys = [-10, -10, -10, -10, -10, 0, 3, 5, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  // - Even though # of data with key = -10 exceeds the threshold, they should
+  //   still be in the same bucket.
+  // - They should be separated from key = 0, 3, ....
+  ASSERT_THAT(results, SizeIs(3));
+  // Bucket 0: key lower = -10, key upper = -10, keys = [-10, -10, -10, -10,
+  // -10].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(-10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(
+          IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+          IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+          IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+          IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+          IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10)));
+  // Bucket 1: key lower = -9, key upper = 5, keys = [0, 3, 5].
+  EXPECT_THAT(results[1].key_lower, Eq(-9));
+  EXPECT_THAT(results[1].key_upper, Eq(5));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5)));
+  // Bucket 2: key lower = 6, key upper = 10, keys = [10].
+  EXPECT_THAT(results[2].key_lower, Eq(6));
+  EXPECT_THAT(results[2].key_upper, Eq(10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[2].start, results[2].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_sameKeysExceedingThreshold_firstBucket_keyGreaterThanKeyLower) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+  // Keys = [-7, -7, -7, -7, -7, 0, 3, 5, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  // - Even though # of data with key = -7 exceeds the threshold, they should
+  //   still be in the same bucket.
+  // - They should be separated from key = 0, 3, ....
+  // - They should be in a single range bucket [-7, -7], and another bucket
+  //   [-10, -8] with empty data should be created before it.
+  ASSERT_THAT(results, SizeIs(4));
+  // Bucket 0: key lower = -10, key upper = -8, keys = [].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(-8));
+  EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+              IsEmpty());
+  // Bucket 1: key lower = -7, key upper = -7, keys = [-7, -7, -7, -7, -7].
+  EXPECT_THAT(results[1].key_lower, Eq(-7));
+  EXPECT_THAT(results[1].key_upper, Eq(-7));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -7)));
+  // Bucket 2: key lower = -6, key upper = 5, keys = [0, 3, 5].
+  EXPECT_THAT(results[2].key_lower, Eq(-6));
+  EXPECT_THAT(results[2].key_upper, Eq(5));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[2].start, results[2].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5)));
+  // Bucket 3: key lower = 6, key upper = 10, keys = [10].
+  EXPECT_THAT(results[3].key_lower, Eq(6));
+  EXPECT_THAT(results[3].key_upper, Eq(10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[3].start, results[3].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_sameKeysExceedingThreshold_midBucket_keyEqualsKeyLower) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+  // Keys = [-10, -5, -4, -4, -4, -4, -4, 5, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  // - Even though # of data with key = -4 exceeds the threshold, they should
+  //   still be in the same bucket.
+  // - They should be separated from key = -10, -5, 5, 10.
+  ASSERT_THAT(results, SizeIs(3));
+  // Bucket 0: key lower = -10, key upper = -5, keys = [-10, -5].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(-5));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -5)));
+  // Bucket 1: key lower = -4, key upper = -4, keys = [-4, -4, -4, -4, -4].
+  EXPECT_THAT(results[1].key_lower, Eq(-4));
+  EXPECT_THAT(results[1].key_upper, Eq(-4));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -4)));
+  // Bucket 2: key lower = -3, key upper = 10, keys = [5, 10].
+  EXPECT_THAT(results[2].key_lower, Eq(-3));
+  EXPECT_THAT(results[2].key_upper, Eq(10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[2].start, results[2].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_sameKeysExceedingThreshold_midBucket_keyGreaterThanKeyLower) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+  // Keys = [-10, -5, -1, -1, -1, -1, -1, 5, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  // - Even though # of data with key = -1 exceeds the threshold, they should
+  //   still be in the same bucket.
+  // - They should be separated from key = -10, -5, 5, 10.
+  // - They should be in a single range bucket [-1, -1], and range [-4, -2]
+  //   should be merged into the previous bucket.
+  ASSERT_THAT(results, SizeIs(3));
+  // Bucket 0: key lower = -10, key upper = -2, keys = [-10, -5].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(-2));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -5)));
+  // Bucket 1: key lower = -1, key upper = -1, keys = [-1, -1, -1, -1, -1].
+  EXPECT_THAT(results[1].key_lower, Eq(-1));
+  EXPECT_THAT(results[1].key_upper, Eq(-1));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1)));
+  // Bucket 2: key lower = 0, key upper = 10, keys = [5, 10].
+  EXPECT_THAT(results[2].key_lower, Eq(0));
+  EXPECT_THAT(results[2].key_upper, Eq(10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[2].start, results[2].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_sameKeysExceedingThreshold_lastBucket_keyEqualsKeyLower) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3)};
+
+  // Keys = [-10, -3, 0, 2, 3, 3, 3, 3, 3].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  // - Even though # of data with key = 3 exceeds the threshold, they should
+  //   still be in the same bucket.
+  // - They should be separated from key = -10, -3, 0, 2.
+  // - They should be in a single range bucket [3, 3], and another bucket
+  //   [4, 10] with empty data should be created after it.
+  ASSERT_THAT(results, SizeIs(4));
+  // Bucket 0: key lower = -10, key upper = 0, keys = [-10, -3, 0].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(0));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)));
+  // Bucket 1: key lower = 1, key upper = 2, keys = [2].
+  EXPECT_THAT(results[1].key_lower, Eq(1));
+  EXPECT_THAT(results[1].key_upper, Eq(2));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+  // Bucket 2: key lower = 3, key upper = 10, keys = [3, 3, 3, 3, 3].
+  EXPECT_THAT(results[2].key_lower, Eq(3));
+  EXPECT_THAT(results[2].key_upper, Eq(3));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[2].start, results[2].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 3)));
+  // Bucket 3: key lower = 4, key upper = 10, keys = [].
+  EXPECT_THAT(results[3].key_lower, Eq(4));
+  EXPECT_THAT(results[3].key_upper, Eq(10));
+  EXPECT_THAT(std::vector<IntegerIndexData>(results[3].start, results[3].end),
+              IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_sameKeysExceedingThreshold_lastBucket_keyWithinKeyLowerAndUpper) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6)};
+
+  // Keys = [-10, -3, 0, 2, 6, 6, 6, 6, 6].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  // - Even though # of data with key = 6 exceeds the threshold, they should
+  //   still be in the same bucket.
+  // - They should be separated from key = -10, -3, 0, 2.
+  // - They should be in a single range bucket [6, 6]. Range [3, 5] should be
+  //   merged into the previous bucket. and another bucket [7, 10] with empty
+  //   data should be created after it.
+  ASSERT_THAT(results, SizeIs(4));
+  // Bucket 0: key lower = -10, key upper = 0, keys = [-10, -3, 0].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(0));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)));
+  // Bucket 1: key lower = 1, key upper = 5, keys = [2].
+  EXPECT_THAT(results[1].key_lower, Eq(1));
+  EXPECT_THAT(results[1].key_upper, Eq(5));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+  // Bucket 2: key lower = 6, key upper = 6, keys = [6, 6, 6, 6, 6].
+  EXPECT_THAT(results[2].key_lower, Eq(6));
+  EXPECT_THAT(results[2].key_upper, Eq(6));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[2].start, results[2].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 6)));
+  // Bucket 3: key lower = 7, key upper = 10, keys = [].
+  EXPECT_THAT(results[3].key_lower, Eq(7));
+  EXPECT_THAT(results[3].key_upper, Eq(10));
+  EXPECT_THAT(std::vector<IntegerIndexData>(results[3].start, results[3].end),
+              IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_sameKeysExceedingThreshold_lastBucket_keyEqualsKeyUpper) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+  // Keys = [-10, -3, 0, 2, 10, 10, 10, 10, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  // - Even though # of data with key = 10 exceeds the threshold, they should
+  //   still be in the same bucket.
+  // - They should be separated from key = -10, -3, 0, 2.
+  // - They should be in a single range bucket [10, 10], and range [3, 9] should
+  //   be merged into the previous bucket.
+  ASSERT_THAT(results, SizeIs(3));
+  // Bucket 0: key lower = -10, key upper = 0, keys = [-10, -3, 0].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(0));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)));
+  // Bucket 1: key lower = 1, key upper = 9, keys = [2].
+  EXPECT_THAT(results[1].key_lower, Eq(1));
+  EXPECT_THAT(results[1].key_upper, Eq(9));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+  // Bucket 2: key lower = 10, key upper = 10, keys = [10, 10, 10, 10, 10].
+  EXPECT_THAT(results[2].key_lower, Eq(10));
+  EXPECT_THAT(results[2].key_upper, Eq(10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[2].start, results[2].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_sameKeysExceedingThreshold_shouldNotMergeIntoPreviousBucket) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+  // Keys = [-10, -2, -2, -2, -2, -2, 5, 5, 5, 5, 5, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  // - Data with key = -2 and 5 should be put into a single bucket respectively.
+  // - When dealing with key = 5, range [-1, 4] should not be merged into the
+  //   previous bucket [-2, -2] because [-2, -2] also contains single key data
+  //   exceeding the threshold. Instead, we should create bucket [-1, 4] with
+  //   empty data.
+  ASSERT_THAT(results, SizeIs(5));
+  // Bucket 0: key lower = -10, key upper = -3, keys = [-10].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(-3));
+  EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+              ElementsAre(IntegerIndexData(kDefaultSectionId,
+                                           kDefaultDocumentId, -10)));
+  // Bucket 1: key lower = -2, key upper = -2, keys = [-2, -2, -2, -2, -2].
+  EXPECT_THAT(results[1].key_lower, Eq(-2));
+  EXPECT_THAT(results[1].key_upper, Eq(-2));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+  // Bucket 2: key lower = -1, key upper = 4, keys = [].
+  EXPECT_THAT(results[2].key_lower, Eq(-1));
+  EXPECT_THAT(results[2].key_upper, Eq(4));
+  EXPECT_THAT(std::vector<IntegerIndexData>(results[2].start, results[2].end),
+              IsEmpty());
+  // Bucket 3: key lower = 5, key upper = 5, keys = [5, 5, 5, 5, 5].
+  EXPECT_THAT(results[3].key_lower, Eq(5));
+  EXPECT_THAT(results[3].key_upper, Eq(5));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[3].start, results[3].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5)));
+  // Bucket 4: key lower = 6, key upper = 10, keys = [10].
+  EXPECT_THAT(results[4].key_lower, Eq(6));
+  EXPECT_THAT(results[4].key_upper, Eq(10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[4].start, results[4].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_sameKeysExceedingThreshold_shouldMergeIntoPreviousBucket) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -8),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+  // Keys = [-10, -8, -3, -2, -2, -2, 5, 5, 5, 5, 5, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  // - Data with key = 5 should be put into a single bucket.
+  // - When dealing with key = 5, range [-1, 4] should be merged into the
+  //   previous bucket [-2, -2] because # of data in [-2, -2] doesn't exceed the
+  //   threshold.
+  ASSERT_THAT(results, SizeIs(4));
+  // Bucket 0: key lower = -10, key upper = -3, keys = [-10, -8, -3].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(-3));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -8),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -3)));
+  // Bucket 1: key lower = -2, key upper = 4, keys = [-2, -2, -2].
+  EXPECT_THAT(results[1].key_lower, Eq(-2));
+  EXPECT_THAT(results[1].key_upper, Eq(4));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -2)));
+  // Bucket 2: key lower = 5, key upper = 5, keys = [5, 5, 5, 5, 5].
+  EXPECT_THAT(results[2].key_lower, Eq(5));
+  EXPECT_THAT(results[2].key_upper, Eq(5));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[2].start, results[2].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 5)));
+  // Bucket 3: key lower = 6, key upper = 10, keys = [10].
+  EXPECT_THAT(results[3].key_lower, Eq(6));
+  EXPECT_THAT(results[3].key_upper, Eq(10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[3].start, results[3].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_sameKeysExceedingThreshold_singleBucket_keyEqualsKeyLower) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10)};
+
+  // Keys = [-10, -10, -10, -10, -10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  // - Even though # of data with key = -10 exceeds the threshold, they should
+  //   still be in the same bucket.
+  // - They should be in a single range bucket [-10, -10], and another bucket
+  //   [-9, 10] with empty data should be created after it.
+  ASSERT_THAT(results, SizeIs(2));
+  // Bucket 0: key lower = -10, key upper = -10, keys = [-10, -10, -10, -10,
+  // -10].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(-10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(
+          IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+          IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+          IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+          IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+          IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10)));
+  // Bucket 1: key lower = -9, key upper = 10, keys = [].
+  EXPECT_THAT(results[1].key_lower, Eq(-9));
+  EXPECT_THAT(results[1].key_upper, Eq(10));
+  EXPECT_THAT(std::vector<IntegerIndexData>(results[1].start, results[1].end),
+              IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_sameKeysExceedingThreshold_singleBucket_keyWithinKeyLowerAndUpper) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)};
+
+  // Keys = [0, 0, 0, 0, 0].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  // - Even though # of data with key = 0 exceeds the threshold, they should
+  //   still be in the same bucket.
+  // - They should be in a single range bucket [0, 0]. Another bucket [-10, -1]
+  //   with empty data should be created before it, and another bucket [1, 10]
+  //   with empty data should be created after it.
+  ASSERT_THAT(results, SizeIs(3));
+  // Bucket 0: key lower = -10, key upper = -1, keys = [].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(-1));
+  EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+              IsEmpty());
+  // Bucket 1: key lower = 0, key upper = 0, keys = [0, 0, 0, 0, 0].
+  EXPECT_THAT(results[1].key_lower, Eq(0));
+  EXPECT_THAT(results[1].key_upper, Eq(0));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 0)));
+  // Bucket 2: key lower = 1, key upper = 10, keys = [].
+  EXPECT_THAT(results[2].key_lower, Eq(1));
+  EXPECT_THAT(results[2].key_upper, Eq(10));
+  EXPECT_THAT(std::vector<IntegerIndexData>(results[2].start, results[2].end),
+              IsEmpty());
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_sameKeysExceedingThreshold_singleBucket_keyEqualsKeyUpper) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+  // Keys = [10, 10, 10, 10, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  // - Even though # of data with key = 10 exceeds the threshold, they should
+  //   still be in the same bucket.
+  // - They should be in a single range bucket [10, 10], and another bucket
+  //   [-10, 9] with empty data should be created before it.
+  ASSERT_THAT(results, SizeIs(2));
+  // Bucket 0: key lower = -10, key upper = 9, keys = [].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(9));
+  EXPECT_THAT(std::vector<IntegerIndexData>(results[0].start, results[0].end),
+              IsEmpty());
+  // Bucket 1: key lower = -10, key upper = 10, keys = [10, 10, 10, 10, 10].
+  EXPECT_THAT(results[1].key_lower, Eq(10));
+  EXPECT_THAT(results[1].key_upper, Eq(10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_adjacentKeysTotalNumDataExceedThreshold) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+  // Keys = [-10, -10, -1, -1, 2, 2, 10, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/-10, /*original_key_upper=*/10,
+            /*num_data_threshold=*/3);
+  // Even though # of data with the same key is within the threshold, since
+  // total # of data of adjacent keys exceed the threshold, they should be
+  // separated into different buckets.
+  ASSERT_THAT(results, SizeIs(4));
+  // Bucket 0: key lower = -10, key upper = -10, keys = [-10, -10].
+  EXPECT_THAT(results[0].key_lower, Eq(-10));
+  EXPECT_THAT(results[0].key_upper, Eq(-10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(
+          IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+          IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10)));
+  // Bucket 1: key lower = -9, key upper = -1, keys = [-1, -1].
+  EXPECT_THAT(results[1].key_lower, Eq(-9));
+  EXPECT_THAT(results[1].key_upper, Eq(-1));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1)));
+  // Bucket 2: key lower = 0, key upper = 2, keys = [2, 2].
+  EXPECT_THAT(results[2].key_lower, Eq(0));
+  EXPECT_THAT(results[2].key_upper, Eq(2));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[2].start, results[2].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+  // Bucket 3: key lower = 3, key upper = 10, keys = [10, 10].
+  EXPECT_THAT(results[3].key_lower, Eq(3));
+  EXPECT_THAT(results[3].key_upper, Eq(10));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[3].start, results[3].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_keyLowerEqualsIntMin_smallestKeyGreaterThanKeyLower) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::min() + 1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+  // Keys = [INT64_MIN + 1, -10, -1, 2, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+            /*num_data_threshold=*/3);
+  ASSERT_THAT(results, SizeIs(2));
+  // Bucket 0: key lower = INT64_MIN, key upper = -1, keys = [INT64_MIN + 1,
+  // -10, -1].
+  EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+  EXPECT_THAT(results[0].key_upper, Eq(-1));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::min() + 1),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1)));
+  // Bucket 1: key lower = 0, key upper = INT64_MAX, keys = [2, 10].
+  EXPECT_THAT(results[1].key_lower, Eq(0));
+  EXPECT_THAT(results[1].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_keyLowerEqualsIntMin_smallestKeyEqualsKeyLower) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::min()),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+  // Keys = [INT64_MIN, -10, -1, 2, 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+            /*num_data_threshold=*/3);
+  ASSERT_THAT(results, SizeIs(2));
+  // Bucket 0: key lower = INT64_MIN, key upper = -1, keys = [INT64_MIN, -10,
+  // -1].
+  EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+  EXPECT_THAT(results[0].key_upper, Eq(-1));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::min()),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1)));
+  // Bucket 1: key lower = 0, key upper = INT64_MAX, keys = [2, 10].
+  EXPECT_THAT(results[1].key_lower, Eq(0));
+  EXPECT_THAT(results[1].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_keyLowerEqualsIntMin_keyIntMinExceedingThreshold) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::min()),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::min()),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::min()),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::min()),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::min()),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)};
+
+  // Keys = [INT64_MIN, INT64_MIN, INT64_MIN, INT64_MIN, INT64_MIN, -10, -1, 2,
+  // 10].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+            /*num_data_threshold=*/3);
+  ASSERT_THAT(results, SizeIs(3));
+  // Bucket 0: key lower = INT64_MIN, key upper = INT64_MIN, keys = [INT64_MIN,
+  // INT64_MIN, INT64_MIN, INT64_MIN, INT64_MIN].
+  EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+  EXPECT_THAT(results[0].key_upper, Eq(std::numeric_limits<int64_t>::min()));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::min()),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::min()),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::min()),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::min()),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::min())));
+  // Bucket 1: key lower = INT64_MIN + 1, key upper = 2, keys = [-10, -1, 2].
+  EXPECT_THAT(results[1].key_lower,
+              Eq(std::numeric_limits<int64_t>::min() + 1));
+  EXPECT_THAT(results[1].key_upper, Eq(2));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+  // Bucket 2: key lower = 3, key upper = INT64_MAX, keys = [10].
+  EXPECT_THAT(results[2].key_lower, Eq(3));
+  EXPECT_THAT(results[2].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[2].start, results[2].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_keyUpperEqualsIntMax_largestKeySmallerThanKeyUpper) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::max() - 1),
+  };
+
+  // Keys = [-10, -1, 2, 10, INT64_MAX - 1].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+            /*num_data_threshold=*/3);
+  ASSERT_THAT(results, SizeIs(2));
+  // Bucket 0: key lower = INT64_MIN, key upper = 2, keys = [-10, -1, 2].
+  EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+  EXPECT_THAT(results[0].key_upper, Eq(2));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+  // Bucket 1: key lower = 3, key upper = INT64_MAX, keys = [10, INT64_MAX - 1].
+  EXPECT_THAT(results[1].key_lower, Eq(3));
+  EXPECT_THAT(results[1].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::max() - 1)));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_keyUpperEqualsIntMax_largestKeyEqualsKeyUpper) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::max()),
+  };
+
+  // Keys = [-10, -1, 2, 10, INT64_MAX].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+            /*num_data_threshold=*/3);
+  ASSERT_THAT(results, SizeIs(2));
+  // Bucket 0: key lower = INT64_MIN, key upper = 2, keys = [-10, -1, 2].
+  EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+  EXPECT_THAT(results[0].key_upper, Eq(2));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+  // Bucket 1: key lower = 3, key upper = INT64_MAX, keys = [10, INT64_MAX].
+  EXPECT_THAT(results[1].key_lower, Eq(3));
+  EXPECT_THAT(results[1].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::max())));
+}
+
+TEST(IntegerIndexBucketUtilTest,
+     Split_keyUpperEqualsIntMax_keyIntMaxExceedingThreshold) {
+  std::vector<IntegerIndexData> data = {
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::max()),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::max()),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::max()),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::max()),
+      IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                       std::numeric_limits<int64_t>::max())};
+
+  // Keys = [-10, -1, 2, 10, INT64_MAX, INT64_MAX, INT64_MAX, INT64_MAX,
+  // INT64_MAX].
+  std::vector<DataRangeAndBucketInfo> results =
+      Split(data, /*original_key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*original_key_upper=*/std::numeric_limits<int64_t>::max(),
+            /*num_data_threshold=*/3);
+  ASSERT_THAT(results, SizeIs(3));
+  // Bucket 0: key lower = INT64_MIN, key upper = 2, keys = [-10, -1, 2].
+  EXPECT_THAT(results[0].key_lower, Eq(std::numeric_limits<int64_t>::min()));
+  EXPECT_THAT(results[0].key_upper, Eq(2));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[0].start, results[0].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -10),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, -1),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 2)));
+  // Bucket 1: key lower = 3, key upper = INT_MAX - 1, keys = [10].
+  EXPECT_THAT(results[1].key_lower, Eq(3));
+  EXPECT_THAT(results[1].key_upper,
+              Eq(std::numeric_limits<int64_t>::max() - 1));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[1].start, results[1].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId, 10)));
+  // Bucket 2: key lower = INT64_MAX, key upper = INT64_MAX, keys = [INT64_MAX,
+  // INT64_MAX, INT64_MAX, INT64_MAX, INT64_MAX].
+  EXPECT_THAT(results[2].key_lower, Eq(std::numeric_limits<int64_t>::max()));
+  EXPECT_THAT(results[2].key_upper, Eq(std::numeric_limits<int64_t>::max()));
+  EXPECT_THAT(
+      std::vector<IntegerIndexData>(results[2].start, results[2].end),
+      ElementsAre(IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::max()),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::max()),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::max()),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::max()),
+                  IntegerIndexData(kDefaultSectionId, kDefaultDocumentId,
+                                   std::numeric_limits<int64_t>::max())));
+}
+
+}  // namespace
+
+}  // namespace integer_index_bucket_util
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/numeric/integer-index-data.h b/icing/index/numeric/integer-index-data.h
new file mode 100644
index 0000000..92653fa
--- /dev/null
+++ b/icing/index/numeric/integer-index-data.h
@@ -0,0 +1,59 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_INTEGER_INDEX_DATA_H_
+#define ICING_INDEX_NUMERIC_INTEGER_INDEX_DATA_H_
+
+#include <cstdint>
+
+#include "icing/index/hit/hit.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// Data wrapper to store BasicHit and key for integer index.
+class IntegerIndexData {
+ public:
+  explicit IntegerIndexData(SectionId section_id, DocumentId document_id,
+                            int64_t key)
+      : basic_hit_(section_id, document_id), key_(key) {}
+
+  explicit IntegerIndexData() : basic_hit_(), key_(0) {}
+
+  const BasicHit& basic_hit() const { return basic_hit_; }
+
+  int64_t key() const { return key_; }
+
+  bool is_valid() const { return basic_hit_.is_valid(); }
+
+  bool operator<(const IntegerIndexData& other) const {
+    return basic_hit_ < other.basic_hit_;
+  }
+
+  bool operator==(const IntegerIndexData& other) const {
+    return basic_hit_ == other.basic_hit_ && key_ == other.key_;
+  }
+
+ private:
+  BasicHit basic_hit_;
+  int64_t key_;
+} __attribute__((packed));
+static_assert(sizeof(IntegerIndexData) == 12, "");
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_NUMERIC_INTEGER_INDEX_DATA_H_
diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc
new file mode 100644
index 0000000..72e0266
--- /dev/null
+++ b/icing/index/numeric/integer-index-storage.cc
@@ -0,0 +1,1180 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index-storage.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <queue>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/doc-hit-info-iterator-numeric.h"
+#include "icing/index/numeric/integer-index-bucket-util.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/numeric/posting-list-integer-index-accessor.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Helper function to flush data between [it_start, it_end) into posting list(s)
+// and return posting list id.
+// Note: it will sort data between [it_start, it_end) by basic hit value, so the
+// caller should be aware that the data order will be changed after calling this
+// function.
+libtextclassifier3::StatusOr<PostingListIdentifier> FlushDataIntoPostingLists(
+    FlashIndexStorage* flash_index_storage,
+    PostingListIntegerIndexSerializer* posting_list_serializer,
+    const std::vector<IntegerIndexData>::iterator& it_start,
+    const std::vector<IntegerIndexData>::iterator& it_end) {
+  if (it_start == it_end) {
+    return PostingListIdentifier::kInvalid;
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> new_pl_accessor,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage,
+                                              posting_list_serializer));
+
+  std::sort(it_start, it_end);
+  for (auto it = it_end - 1; it >= it_start; --it) {
+    ICING_RETURN_IF_ERROR(new_pl_accessor->PrependData(*it));
+  }
+
+  PostingListAccessor::FinalizeResult result =
+      std::move(*new_pl_accessor).Finalize();
+  if (!result.status.ok()) {
+    return result.status;
+  }
+  if (!result.id.is_valid()) {
+    return absl_ports::InternalError("Fail to flush data into posting list(s)");
+  }
+  return result.id;
+}
+
+// The following 4 methods are helper functions to get the correct file path of
+// metadata/sorted_buckets/unsorted_buckets/flash_index_storage, according to
+// the given working directory.
+std::string GetMetadataFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix,
+                            ".m");
+}
+
+std::string GetSortedBucketsFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix,
+                            ".s");
+}
+
+std::string GetUnsortedBucketsFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix,
+                            ".u");
+}
+
+std::string GetFlashIndexStorageFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix,
+                            ".f");
+}
+
+}  // namespace
+
+// We add (BasicHits, key) into a bucket in DocumentId descending and SectionId
+// ascending order. When doing range query, we may access buckets and want to
+// return BasicHits to callers sorted by DocumentId. Therefore, this problem is
+// actually "merge K sorted lists".
+// To implement this algorithm via priority_queue, we create this wrapper class
+// to store PostingListIntegerIndexAccessor for iterating through the posting
+// list chain.
+// - Non-relevant (i.e. not in range [key_lower, key_upper]) will be skipped.
+// - Relevant BasicHits will be returned.
+class BucketPostingListIterator {
+ public:
+  class Comparator {
+   public:
+    // REQUIRES: 2 BucketPostingListIterator* instances (lhs, rhs) should be
+    //   valid, i.e. the preceding AdvanceAndFilter() succeeded.
+    bool operator()(const BucketPostingListIterator* lhs,
+                    const BucketPostingListIterator* rhs) const {
+      // std::priority_queue is a max heap and we should return BasicHits in
+      // DocumentId descending order.
+      // - BucketPostingListIterator::operator< should have the same order as
+      //   DocumentId.
+      // - BasicHit encodes inverted document id and BasicHit::operator<
+      //   compares the encoded raw value directly.
+      // - Therefore, BucketPostingListIterator::operator< should compare
+      //   BasicHit reversely.
+      // - This will make priority_queue return buckets in DocumentId
+      //   descending and SectionId ascending order.
+      // - Whatever direction we sort SectionId by (or pop by priority_queue)
+      //   doesn't matter because all hits for the same DocumentId will be
+      //   merged into a single DocHitInfo.
+      return rhs->GetCurrentBasicHit() < lhs->GetCurrentBasicHit();
+    }
+  };
+
+  explicit BucketPostingListIterator(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor)
+      : pl_accessor_(std::move(pl_accessor)),
+        should_retrieve_next_batch_(true) {}
+
+  struct AdvanceAndFilterResult {
+    libtextclassifier3::Status status = libtextclassifier3::Status::OK;
+    int32_t num_advance_calls = 0;
+    int32_t num_blocks_inspected = 0;
+  };
+  // Advances to the next relevant data. The posting list of a bucket contains
+  // keys within range [bucket.key_lower, bucket.key_upper], but some of them
+  // may be out of [query_key_lower, query_key_upper], so when advancing we have
+  // to filter out those non-relevant keys.
+  //
+  // Returns:
+  //   AdvanceAndFilterResult. status will be:
+  //   - OK on success
+  //   - RESOURCE_EXHAUSTED_ERROR if reaching the end (i.e. no more relevant
+  //     data)
+  //   - Any other PostingListIntegerIndexAccessor errors
+  AdvanceAndFilterResult AdvanceAndFilter(int64_t query_key_lower,
+                                          int64_t query_key_upper) {
+    AdvanceAndFilterResult result;
+    // Move curr_ until reaching a relevant data (i.e. key in range
+    // [query_key_lower, query_key_upper])
+    do {
+      if (!should_retrieve_next_batch_) {
+        ++curr_;
+        should_retrieve_next_batch_ =
+            curr_ >= cached_batch_integer_index_data_.cend();
+      }
+      if (should_retrieve_next_batch_) {
+        auto status = GetNextDataBatch();
+        if (!status.ok()) {
+          result.status = std::move(status);
+          return result;
+        }
+        ++result.num_blocks_inspected;
+        should_retrieve_next_batch_ = false;
+      }
+      ++result.num_advance_calls;
+    } while (curr_->key() < query_key_lower || curr_->key() > query_key_upper);
+
+    return result;
+  }
+
+  const BasicHit& GetCurrentBasicHit() const { return curr_->basic_hit(); }
+
+ private:
+  // Gets next batch of data from the posting list chain, caches in
+  // cached_batch_integer_index_data_, and sets curr_ to the begin of the cache.
+  libtextclassifier3::Status GetNextDataBatch() {
+    auto cached_batch_integer_index_data_or = pl_accessor_->GetNextDataBatch();
+    if (!cached_batch_integer_index_data_or.ok()) {
+      ICING_LOG(WARNING)
+          << "Fail to get next batch data from posting list due to: "
+          << cached_batch_integer_index_data_or.status().error_message();
+      return std::move(cached_batch_integer_index_data_or).status();
+    }
+
+    cached_batch_integer_index_data_ =
+        std::move(cached_batch_integer_index_data_or).ValueOrDie();
+    curr_ = cached_batch_integer_index_data_.cbegin();
+
+    if (cached_batch_integer_index_data_.empty()) {
+      return absl_ports::ResourceExhaustedError("End of iterator");
+    }
+
+    return libtextclassifier3::Status::OK;
+  }
+
+  std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor_;
+  std::vector<IntegerIndexData> cached_batch_integer_index_data_;
+  std::vector<IntegerIndexData>::const_iterator curr_;
+  bool should_retrieve_next_batch_;
+};
+
+// Wrapper class to iterate through IntegerIndexStorage to get relevant data.
+// It uses multiple BucketPostingListIterator instances from different candidate
+// buckets and merges all relevant BasicHits from these buckets by
+// std::priority_queue in DocumentId descending order. Also different SectionIds
+// of the same DocumentId will be merged into SectionIdMask and returned as a
+// single DocHitInfo.
+class IntegerIndexStorageIterator : public NumericIndex<int64_t>::Iterator {
+ public:
+  explicit IntegerIndexStorageIterator(
+      int64_t query_key_lower, int64_t query_key_upper,
+      std::vector<std::unique_ptr<BucketPostingListIterator>>&& bucket_pl_iters)
+      : NumericIndex<int64_t>::Iterator(query_key_lower, query_key_upper),
+        num_advance_calls_(0),
+        num_blocks_inspected_(0) {
+    std::vector<BucketPostingListIterator*> bucket_pl_iters_raw_ptrs;
+    for (std::unique_ptr<BucketPostingListIterator>& bucket_pl_itr :
+         bucket_pl_iters) {
+      // Before adding BucketPostingListIterator* into the priority queue, we
+      // have to advance the bucket iterator to the first valid data since the
+      // priority queue needs valid data to compare the order.
+      // Note: it is possible that the bucket iterator fails to advance for the
+      // first round, because data could be filtered out by [query_key_lower,
+      // query_key_upper]. In this case, just discard the iterator.
+      BucketPostingListIterator::AdvanceAndFilterResult
+          advance_and_filter_result =
+              bucket_pl_itr->AdvanceAndFilter(query_key_lower, query_key_upper);
+      if (advance_and_filter_result.status.ok()) {
+        bucket_pl_iters_raw_ptrs.push_back(bucket_pl_itr.get());
+        bucket_pl_iters_.push_back(std::move(bucket_pl_itr));
+      }
+      num_advance_calls_ += advance_and_filter_result.num_advance_calls;
+      num_blocks_inspected_ += advance_and_filter_result.num_blocks_inspected;
+    }
+
+    pq_ = std::priority_queue<BucketPostingListIterator*,
+                              std::vector<BucketPostingListIterator*>,
+                              BucketPostingListIterator::Comparator>(
+        comparator_, std::move(bucket_pl_iters_raw_ptrs));
+  }
+
+  ~IntegerIndexStorageIterator() override = default;
+
+  // Advances to the next DocHitInfo. Note: several BucketPostingListIterator
+  // instances may be advanced if they point to data with the same DocumentId.
+  //
+  // Returns:
+  //   - OK on success
+  //   - RESOURCE_EXHAUSTED_ERROR if reaching the end (i.e. no more relevant
+  //     data)
+  //   - Any BucketPostingListIterator errors
+  libtextclassifier3::Status Advance() override;
+
+  DocHitInfo GetDocHitInfo() const override { return doc_hit_info_; }
+
+  int32_t GetNumAdvanceCalls() const override { return num_advance_calls_; }
+
+  int32_t GetNumBlocksInspected() const override {
+    return num_blocks_inspected_;
+  }
+
+ private:
+  BucketPostingListIterator::Comparator comparator_;
+
+  // We have to fetch and pop the top BucketPostingListIterator from
+  // std::priority_queue to perform "merge K sorted lists algorithm".
+  // - Since std::priority_queue::pop() doesn't return the top element, we have
+  //   to call top() and pop() together.
+  // - std::move the top() element by const_cast is not an appropriate way
+  //   because it introduces transient unstable state for std::priority_queue.
+  // - We don't want to copy BucketPostingListIterator, either.
+  // - Therefore, add bucket_pl_iters_ for the ownership of all
+  //   BucketPostingListIterator instances and std::priority_queue uses the raw
+  //   pointer. So when calling top(), we can simply copy the raw pointer via
+  //   top() and avoid transient unstable state.
+  std::vector<std::unique_ptr<BucketPostingListIterator>> bucket_pl_iters_;
+  std::priority_queue<BucketPostingListIterator*,
+                      std::vector<BucketPostingListIterator*>,
+                      BucketPostingListIterator::Comparator>
+      pq_;
+
+  DocHitInfo doc_hit_info_;
+
+  int32_t num_advance_calls_;
+  int32_t num_blocks_inspected_;
+};
+
+libtextclassifier3::Status IntegerIndexStorageIterator::Advance() {
+  if (pq_.empty()) {
+    return absl_ports::ResourceExhaustedError("End of iterator");
+  }
+
+  DocumentId document_id = pq_.top()->GetCurrentBasicHit().document_id();
+  doc_hit_info_ = DocHitInfo(document_id);
+  // Merge sections with same document_id into a single DocHitInfo
+  while (!pq_.empty() &&
+         pq_.top()->GetCurrentBasicHit().document_id() == document_id) {
+    BucketPostingListIterator* bucket_itr = pq_.top();
+    pq_.pop();
+
+    libtextclassifier3::Status advance_status;
+    do {
+      doc_hit_info_.UpdateSection(
+          bucket_itr->GetCurrentBasicHit().section_id());
+      BucketPostingListIterator::AdvanceAndFilterResult
+          advance_and_filter_result =
+              bucket_itr->AdvanceAndFilter(key_lower_, key_upper_);
+      advance_status = std::move(advance_and_filter_result.status);
+      num_advance_calls_ += advance_and_filter_result.num_advance_calls;
+      num_blocks_inspected_ += advance_and_filter_result.num_blocks_inspected;
+    } while (advance_status.ok() &&
+             bucket_itr->GetCurrentBasicHit().document_id() == document_id);
+    if (advance_status.ok()) {
+      pq_.push(bucket_itr);
+    }
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+bool IntegerIndexStorage::Options::IsValid() const {
+  if (num_data_threshold_for_bucket_split <=
+      kMinNumDataThresholdForBucketSplit) {
+    return false;
+  }
+
+  if (!HasCustomInitBuckets()) {
+    return true;
+  }
+
+  // Verify if the range of buckets are disjoint and the range union is
+  // [INT64_MIN, INT64_MAX].
+  std::vector<Bucket> buckets;
+  buckets.reserve(custom_init_sorted_buckets.size() +
+                  custom_init_unsorted_buckets.size());
+  buckets.insert(buckets.end(), custom_init_sorted_buckets.begin(),
+                 custom_init_sorted_buckets.end());
+  buckets.insert(buckets.end(), custom_init_unsorted_buckets.begin(),
+                 custom_init_unsorted_buckets.end());
+  if (buckets.empty()) {
+    return false;
+  }
+  std::sort(buckets.begin(), buckets.end());
+  int64_t prev_upper = std::numeric_limits<int64_t>::min();
+  for (int i = 0; i < buckets.size(); ++i) {
+    // key_lower should not be greater than key_upper and init bucket should
+    // have invalid posting list identifier.
+    if (buckets[i].key_lower() > buckets[i].key_upper() ||
+        buckets[i].posting_list_identifier().is_valid()) {
+      return false;
+    }
+
+    // Previous upper bound should not be INT64_MAX since it is not the last
+    // bucket.
+    if (prev_upper == std::numeric_limits<int64_t>::max()) {
+      return false;
+    }
+
+    int64_t expected_lower =
+        (i == 0 ? std::numeric_limits<int64_t>::min() : prev_upper + 1);
+    if (buckets[i].key_lower() != expected_lower) {
+      return false;
+    }
+
+    prev_upper = buckets[i].key_upper();
+  }
+
+  return prev_upper == std::numeric_limits<int64_t>::max();
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+IntegerIndexStorage::Create(
+    const Filesystem& filesystem, std::string working_path, Options options,
+    PostingListIntegerIndexSerializer* posting_list_serializer) {
+  if (!options.IsValid()) {
+    return absl_ports::InvalidArgumentError(
+        "Invalid IntegerIndexStorage options");
+  }
+
+  if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
+      !filesystem.FileExists(GetSortedBucketsFilePath(working_path).c_str()) ||
+      !filesystem.FileExists(
+          GetUnsortedBucketsFilePath(working_path).c_str()) ||
+      !filesystem.FileExists(
+          GetFlashIndexStorageFilePath(working_path).c_str())) {
+    // Discard working_path if any of them is missing, and reinitialize.
+    if (filesystem.DirectoryExists(working_path.c_str())) {
+      ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+    }
+    return InitializeNewFiles(filesystem, std::move(working_path),
+                              std::move(options), posting_list_serializer);
+  }
+  return InitializeExistingFiles(filesystem, std::move(working_path),
+                                 std::move(options), posting_list_serializer);
+}
+
+IntegerIndexStorage::~IntegerIndexStorage() {
+  if (!PersistToDisk().ok()) {
+    ICING_LOG(WARNING)
+        << "Failed to persist hash map to disk while destructing "
+        << working_path_;
+  }
+}
+
+class IntegerIndexStorageComparator {
+ public:
+  bool operator()(const IntegerIndexStorage::Bucket& lhs, int64_t rhs) const {
+    return lhs.key_upper() < rhs;
+  }
+} kComparator;
+
+libtextclassifier3::Status IntegerIndexStorage::AddKeys(
+    DocumentId document_id, SectionId section_id,
+    std::vector<int64_t>&& new_keys) {
+  if (new_keys.empty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  SetDirty();
+
+  std::sort(new_keys.begin(), new_keys.end());
+
+  // Dedupe
+  auto last = std::unique(new_keys.begin(), new_keys.end());
+  new_keys.erase(last, new_keys.end());
+
+  if (static_cast<int32_t>(new_keys.size()) >
+      std::numeric_limits<int32_t>::max() - info().num_data) {
+    return absl_ports::ResourceExhaustedError(
+        "# of keys in this integer index storage exceed the limit");
+  }
+
+  // When adding keys into a bucket, we potentially split it into 2 new buckets
+  // and one of them will be added into the unsorted bucket array.
+  // When handling keys belonging to buckets in the unsorted bucket array, we
+  // don't have to (and must not) handle these newly split buckets. Therefore,
+  // collect all newly split buckets in another vector and append them into the
+  // unsorted bucket array after adding all keys.
+  std::vector<Bucket> new_buckets;
+
+  // Binary search range of the sorted bucket array.
+  const Bucket* sorted_bucket_arr_begin = sorted_buckets_->array();
+  const Bucket* sorted_bucket_arr_end =
+      sorted_buckets_->array() + sorted_buckets_->num_elements();
+
+  // Step 1: handle keys belonging to buckets in the sorted bucket array. Skip
+  //         keys belonging to the unsorted bucket array and deal with them in
+  //         the next step.
+  // - Iterate through new_keys by it_start.
+  // - Binary search (std::lower_bound comparing key with bucket.key_upper()) to
+  //   find the first bucket in the sorted bucket array with key_upper is not
+  //   smaller than (>=) the key.
+  // - Skip (and advance it_start) all keys smaller than the target bucket's
+  //   key_lower. It means these keys belong to buckets in the unsorted bucket
+  //   array and we will deal with them later.
+  // - Find it_end such that all keys within range [it_start, it_end) belong to
+  //   the target bucket.
+  // - Batch add keys within range [it_start, it_end) into the target bucket.
+  auto it_start = new_keys.cbegin();
+  while (it_start != new_keys.cend() &&
+         sorted_bucket_arr_begin < sorted_bucket_arr_end) {
+    // Use std::lower_bound to find the first bucket in the sorted bucket array
+    // with key_upper >= *it_start.
+    const Bucket* target_bucket = std::lower_bound(
+        sorted_bucket_arr_begin, sorted_bucket_arr_end, *it_start, kComparator);
+    if (target_bucket >= sorted_bucket_arr_end) {
+      // Keys in range [it_start, new_keys.cend()) are greater than all sorted
+      // buckets' key_upper, so we can end step 1. In fact, they belong to
+      // buckets in the unsorted bucket array and we will deal with them in
+      // step 2.
+      break;
+    }
+
+    // Sequential instead of binary search to advance it_start and it_end for
+    // several reasons:
+    // - Eventually we have to iterate through all keys within range [it_start,
+    //   it_end) and add them into the posting list, so binary search doesn't
+    //   improve the overall time complexity.
+    // - Binary search may jump to far-away indices, which potentially
+    //   downgrades the cache performance.
+
+    // After binary search, we've ensured *it_start <=
+    // target_bucket->key_upper(), but it is still possible that *it_start (and
+    // the next several keys) is still smaller than target_bucket->key_lower(),
+    // so we have to skip them. In fact, they belong to buckets in the unsorted
+    // bucket array.
+    //
+    // For example:
+    // - sorted bucket array: [(INT_MIN, 0), (1, 5), (100, 300), (301, 550)]
+    // - unsorted bucket array: [(550, INT_MAX), (6, 99)]
+    // - new_keys: [10, 20, 40, 102, 150, 200, 500, 600]
+    // std::lower_bound (target = 10) will get target_bucket = (100, 300), but
+    // we have to skip 10, 20, 40 because they are smaller than 100 (the
+    // bucket's key_lower). We should move it_start pointing to key 102.
+    while (it_start != new_keys.cend() &&
+           *it_start < target_bucket->key_lower()) {
+      ++it_start;
+    }
+
+    // Locate it_end such that all keys within range [it_start, it_end) belong
+    // to target_bucket and all keys outside this range don't belong to
+    // target_bucket.
+    //
+    // For example (continue above), we should locate it_end to point to key
+    // 500.
+    auto it_end = it_start;
+    while (it_end != new_keys.cend() && *it_end <= target_bucket->key_upper()) {
+      ++it_end;
+    }
+
+    // Now, keys within range [it_start, it_end) belong to target_bucket, so
+    // construct IntegerIndexData and add them into the bucket's posting list.
+    if (it_start != it_end) {
+      ICING_ASSIGN_OR_RETURN(
+          FileBackedVector<Bucket>::MutableView mutable_bucket,
+          sorted_buckets_->GetMutable(target_bucket -
+                                      sorted_buckets_->array()));
+      ICING_ASSIGN_OR_RETURN(
+          std::vector<Bucket> round_new_buckets,
+          AddKeysIntoBucketAndSplitIfNecessary(
+              document_id, section_id, it_start, it_end, mutable_bucket));
+      new_buckets.insert(new_buckets.end(), round_new_buckets.begin(),
+                         round_new_buckets.end());
+    }
+
+    it_start = it_end;
+    sorted_bucket_arr_begin = target_bucket + 1;
+  }
+
+  // Step 2: handle keys belonging to buckets in the unsorted bucket array. They
+  //         were skipped in step 1.
+  // For each bucket in the unsorted bucket array, find [it_start, it_end) such
+  // that all keys within this range belong to the bucket and add them.
+  // - Binary search (std::lower_bound comparing bucket.key_lower() with key) to
+  //   find it_start.
+  // - Sequential advance (start from it_start) to find it_end. Same reason as
+  //   above for choosing sequential advance instead of binary search.
+  // - Add keys within range [it_start, it_end) into the bucket.
+  for (int32_t i = 0; i < unsorted_buckets_->num_elements(); ++i) {
+    ICING_ASSIGN_OR_RETURN(FileBackedVector<Bucket>::MutableView mutable_bucket,
+                           unsorted_buckets_->GetMutable(i));
+    auto it_start = std::lower_bound(new_keys.cbegin(), new_keys.cend(),
+                                     mutable_bucket.Get().key_lower());
+    if (it_start == new_keys.cend()) {
+      continue;
+    }
+
+    // Sequential advance instead of binary search to find the correct position
+    // of it_end for the same reasons mentioned above in step 1.
+    auto it_end = it_start;
+    while (it_end != new_keys.cend() &&
+           *it_end <= mutable_bucket.Get().key_upper()) {
+      ++it_end;
+    }
+
+    // Now, key within range [it_start, it_end) belong to the bucket, so
+    // construct IntegerIndexData and add them into the bucket's posting list.
+    if (it_start != it_end) {
+      ICING_ASSIGN_OR_RETURN(
+          std::vector<Bucket> round_new_buckets,
+          AddKeysIntoBucketAndSplitIfNecessary(
+              document_id, section_id, it_start, it_end, mutable_bucket));
+      new_buckets.insert(new_buckets.end(), round_new_buckets.begin(),
+                         round_new_buckets.end());
+    }
+  }
+
+  // Step 3: append new buckets into the unsorted bucket array.
+  if (!new_buckets.empty()) {
+    ICING_ASSIGN_OR_RETURN(
+        typename FileBackedVector<Bucket>::MutableArrayView mutable_new_arr,
+        unsorted_buckets_->Allocate(new_buckets.size()));
+    mutable_new_arr.SetArray(/*idx=*/0, new_buckets.data(), new_buckets.size());
+  }
+
+  // Step 4: sort and merge the unsorted bucket array into the sorted bucket
+  //         array if the length of the unsorted bucket array exceeds the
+  //         threshold.
+  if (unsorted_buckets_->num_elements() > kUnsortedBucketsLengthThreshold) {
+    ICING_RETURN_IF_ERROR(SortBuckets());
+  }
+
+  info().num_data += new_keys.size();
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+IntegerIndexStorage::GetIterator(int64_t query_key_lower,
+                                 int64_t query_key_upper) const {
+  if (query_key_lower > query_key_upper) {
+    return absl_ports::InvalidArgumentError(
+        "key_lower should not be greater than key_upper");
+  }
+
+  std::vector<std::unique_ptr<BucketPostingListIterator>> bucket_pl_iters;
+
+  // Sorted bucket array
+  const Bucket* sorted_bucket_arr_begin = sorted_buckets_->array();
+  const Bucket* sorted_bucket_arr_end =
+      sorted_buckets_->array() + sorted_buckets_->num_elements();
+  for (const Bucket* bucket =
+           std::lower_bound(sorted_bucket_arr_begin, sorted_bucket_arr_end,
+                            query_key_lower, kComparator);
+       bucket < sorted_bucket_arr_end && bucket->key_lower() <= query_key_upper;
+       ++bucket) {
+    if (!bucket->posting_list_identifier().is_valid()) {
+      continue;
+    }
+
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+        PostingListIntegerIndexAccessor::CreateFromExisting(
+            flash_index_storage_.get(), posting_list_serializer_,
+            bucket->posting_list_identifier()));
+    bucket_pl_iters.push_back(
+        std::make_unique<BucketPostingListIterator>(std::move(pl_accessor)));
+  }
+
+  // Unsorted bucket array
+  for (int32_t i = 0; i < unsorted_buckets_->num_elements(); ++i) {
+    ICING_ASSIGN_OR_RETURN(const Bucket* bucket, unsorted_buckets_->Get(i));
+    if (query_key_upper < bucket->key_lower() ||
+        query_key_lower > bucket->key_upper() ||
+        !bucket->posting_list_identifier().is_valid()) {
+      // Skip bucket whose range doesn't overlap with [query_key_lower,
+      // query_key_upper] or posting_list_identifier is invalid.
+      continue;
+    }
+
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+        PostingListIntegerIndexAccessor::CreateFromExisting(
+            flash_index_storage_.get(), posting_list_serializer_,
+            bucket->posting_list_identifier()));
+    bucket_pl_iters.push_back(
+        std::make_unique<BucketPostingListIterator>(std::move(pl_accessor)));
+  }
+
+  return std::make_unique<DocHitInfoIteratorNumeric<int64_t>>(
+      std::make_unique<IntegerIndexStorageIterator>(
+          query_key_lower, query_key_upper, std::move(bucket_pl_iters)));
+}
+
+libtextclassifier3::Status IntegerIndexStorage::TransferIndex(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    IntegerIndexStorage* new_storage) const {
+  // Discard all pre-existing buckets in new_storage since we will append newly
+  // merged buckets gradually into new_storage.
+  if (new_storage->sorted_buckets_->num_elements() > 0) {
+    ICING_RETURN_IF_ERROR(new_storage->sorted_buckets_->TruncateTo(0));
+  }
+  if (new_storage->unsorted_buckets_->num_elements() > 0) {
+    ICING_RETURN_IF_ERROR(new_storage->unsorted_buckets_->TruncateTo(0));
+  }
+
+  // "Reference sort" the original storage buckets.
+  std::vector<std::reference_wrapper<const Bucket>> temp_buckets;
+  temp_buckets.reserve(sorted_buckets_->num_elements() +
+                       unsorted_buckets_->num_elements());
+  temp_buckets.insert(
+      temp_buckets.end(), sorted_buckets_->array(),
+      sorted_buckets_->array() + sorted_buckets_->num_elements());
+  temp_buckets.insert(
+      temp_buckets.end(), unsorted_buckets_->array(),
+      unsorted_buckets_->array() + unsorted_buckets_->num_elements());
+  std::sort(temp_buckets.begin(), temp_buckets.end(),
+            [](const std::reference_wrapper<const Bucket>& lhs,
+               const std::reference_wrapper<const Bucket>& rhs) -> bool {
+              return lhs.get() < rhs.get();
+            });
+
+  const int32_t num_data_threshold_for_bucket_merge =
+      kNumDataThresholdRatioForBucketMerge *
+      new_storage->options_.num_data_threshold_for_bucket_split;
+  int64_t curr_key_lower = std::numeric_limits<int64_t>::min();
+  int64_t curr_key_upper = std::numeric_limits<int64_t>::min();
+  std::vector<IntegerIndexData> accumulated_data;
+  for (const std::reference_wrapper<const Bucket>& bucket_ref : temp_buckets) {
+    // Read all data from the bucket.
+    std::vector<IntegerIndexData> new_data;
+    if (bucket_ref.get().posting_list_identifier().is_valid()) {
+      ICING_ASSIGN_OR_RETURN(
+          std::unique_ptr<PostingListIntegerIndexAccessor> old_pl_accessor,
+          PostingListIntegerIndexAccessor::CreateFromExisting(
+              flash_index_storage_.get(), posting_list_serializer_,
+              bucket_ref.get().posting_list_identifier()));
+
+      ICING_ASSIGN_OR_RETURN(std::vector<IntegerIndexData> batch_old_data,
+                             old_pl_accessor->GetNextDataBatch());
+      while (!batch_old_data.empty()) {
+        for (const IntegerIndexData& old_data : batch_old_data) {
+          DocumentId new_document_id =
+              old_data.basic_hit().document_id() < document_id_old_to_new.size()
+                  ? document_id_old_to_new[old_data.basic_hit().document_id()]
+                  : kInvalidDocumentId;
+          // Transfer the document id of the hit if the document is not deleted
+          // or outdated.
+          if (new_document_id != kInvalidDocumentId) {
+            new_data.push_back(
+                IntegerIndexData(old_data.basic_hit().section_id(),
+                                 new_document_id, old_data.key()));
+          }
+        }
+        ICING_ASSIGN_OR_RETURN(batch_old_data,
+                               old_pl_accessor->GetNextDataBatch());
+      }
+    }
+
+    // Decide whether:
+    // - Flush accumulated_data and create a new bucket for them.
+    // - OR merge new_data into accumulated_data and go to the next round.
+    if (!accumulated_data.empty() && accumulated_data.size() + new_data.size() >
+                                         num_data_threshold_for_bucket_merge) {
+      // TODO(b/259743562): [Optimization 3] adjust upper bound to fit more data
+      // from new_data to accumulated_data.
+      ICING_RETURN_IF_ERROR(FlushDataIntoNewSortedBucket(
+          curr_key_lower, curr_key_upper, std::move(accumulated_data),
+          new_storage));
+
+      curr_key_lower = bucket_ref.get().key_lower();
+      accumulated_data = std::move(new_data);
+    } else {
+      // We can just append to accumulated data because
+      // FlushDataIntoNewSortedBucket will take care of sorting the contents.
+      std::move(new_data.begin(), new_data.end(),
+                std::back_inserter(accumulated_data));
+    }
+    curr_key_upper = bucket_ref.get().key_upper();
+  }
+
+  // Add the last round of bucket.
+  ICING_RETURN_IF_ERROR(
+      FlushDataIntoNewSortedBucket(curr_key_lower, curr_key_upper,
+                                   std::move(accumulated_data), new_storage));
+
+  return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+IntegerIndexStorage::InitializeNewFiles(
+    const Filesystem& filesystem, std::string&& working_path, Options&& options,
+    PostingListIntegerIndexSerializer* posting_list_serializer) {
+  // IntegerIndexStorage uses working_path as working directory path.
+  // Create working directory.
+  if (!filesystem.CreateDirectory(working_path.c_str())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to create directory: ", working_path));
+  }
+
+  // Initialize sorted_buckets
+  int32_t pre_mapping_mmap_size = sizeof(Bucket) * (1 << 10);
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+      FileBackedVector<Bucket>::Create(
+          filesystem, GetSortedBucketsFilePath(working_path),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+          FileBackedVector<Bucket>::kMaxFileSize,
+          options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+  // Initialize unsorted_buckets
+  pre_mapping_mmap_size = sizeof(Bucket) * kUnsortedBucketsLengthThreshold;
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+      FileBackedVector<Bucket>::Create(
+          filesystem, GetUnsortedBucketsFilePath(working_path),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+          FileBackedVector<Bucket>::kMaxFileSize,
+          options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+  // Initialize flash_index_storage
+  ICING_ASSIGN_OR_RETURN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path),
+                                &filesystem, posting_list_serializer));
+
+  if (options.HasCustomInitBuckets()) {
+    // Insert custom init buckets.
+    std::sort(options.custom_init_sorted_buckets.begin(),
+              options.custom_init_sorted_buckets.end());
+    ICING_ASSIGN_OR_RETURN(
+        typename FileBackedVector<Bucket>::MutableArrayView
+            mutable_new_sorted_bucket_arr,
+        sorted_buckets->Allocate(options.custom_init_sorted_buckets.size()));
+    mutable_new_sorted_bucket_arr.SetArray(
+        /*idx=*/0, options.custom_init_sorted_buckets.data(),
+        options.custom_init_sorted_buckets.size());
+
+    ICING_ASSIGN_OR_RETURN(typename FileBackedVector<Bucket>::MutableArrayView
+                               mutable_new_unsorted_bucket_arr,
+                           unsorted_buckets->Allocate(
+                               options.custom_init_unsorted_buckets.size()));
+    mutable_new_unsorted_bucket_arr.SetArray(
+        /*idx=*/0, options.custom_init_unsorted_buckets.data(),
+        options.custom_init_unsorted_buckets.size());
+
+    // After inserting buckets, we can clear vectors since there is no need to
+    // cache them.
+    options.custom_init_sorted_buckets.clear();
+    options.custom_init_unsorted_buckets.clear();
+  } else {
+    // Insert one bucket with range [INT64_MIN, INT64_MAX].
+    ICING_RETURN_IF_ERROR(sorted_buckets->Append(Bucket(
+        /*key_lower=*/std::numeric_limits<int64_t>::min(),
+        /*key_upper=*/std::numeric_limits<int64_t>::max())));
+  }
+  ICING_RETURN_IF_ERROR(sorted_buckets->PersistToDisk());
+
+  // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and
+  // call GrowAndRemapIfNecessary to grow the underlying file.
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile metadata_mmapped_file,
+      MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               /*max_file_size=*/kMetadataFileSize,
+                               /*pre_mapping_file_offset=*/0,
+                               /*pre_mapping_mmap_size=*/kMetadataFileSize));
+  ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
+      /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
+
+  // Create instance.
+  auto new_integer_index_storage =
+      std::unique_ptr<IntegerIndexStorage>(new IntegerIndexStorage(
+          filesystem, std::move(working_path), std::move(options),
+          posting_list_serializer,
+          std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+          std::move(sorted_buckets), std::move(unsorted_buckets),
+          std::make_unique<FlashIndexStorage>(std::move(flash_index_storage))));
+  // Initialize info content by writing mapped memory directly.
+  Info& info_ref = new_integer_index_storage->info();
+  info_ref.magic = Info::kMagic;
+  info_ref.num_data = 0;
+  // Initialize new PersistentStorage. The initial checksums will be computed
+  // and set via InitializeNewStorage.
+  ICING_RETURN_IF_ERROR(new_integer_index_storage->InitializeNewStorage());
+
+  return new_integer_index_storage;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+IntegerIndexStorage::InitializeExistingFiles(
+    const Filesystem& filesystem, std::string&& working_path, Options&& options,
+    PostingListIntegerIndexSerializer* posting_list_serializer) {
+  // Mmap the content of the crcs and info.
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile metadata_mmapped_file,
+      MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               /*max_file_size=*/kMetadataFileSize,
+                               /*pre_mapping_file_offset=*/0,
+                               /*pre_mapping_mmap_size=*/kMetadataFileSize));
+  if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
+    return absl_ports::FailedPreconditionError("Incorrect metadata file size");
+  }
+
+  // Initialize sorted_buckets
+  int32_t pre_mapping_mmap_size = sizeof(Bucket) * (1 << 10);
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+      FileBackedVector<Bucket>::Create(
+          filesystem, GetSortedBucketsFilePath(working_path),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+          FileBackedVector<Bucket>::kMaxFileSize,
+          options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+  // Initialize unsorted_buckets
+  pre_mapping_mmap_size = sizeof(Bucket) * kUnsortedBucketsLengthThreshold;
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+      FileBackedVector<Bucket>::Create(
+          filesystem, GetUnsortedBucketsFilePath(working_path),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+          FileBackedVector<Bucket>::kMaxFileSize,
+          options.pre_mapping_fbv ? pre_mapping_mmap_size : 0));
+
+  // Initialize flash_index_storage
+  ICING_ASSIGN_OR_RETURN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path),
+                                &filesystem, posting_list_serializer));
+
+  // Create instance.
+  auto integer_index_storage =
+      std::unique_ptr<IntegerIndexStorage>(new IntegerIndexStorage(
+          filesystem, std::move(working_path), std::move(options),
+          posting_list_serializer,
+          std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+          std::move(sorted_buckets), std::move(unsorted_buckets),
+          std::make_unique<FlashIndexStorage>(std::move(flash_index_storage))));
+  // Initialize existing PersistentStorage. Checksums will be validated.
+  ICING_RETURN_IF_ERROR(integer_index_storage->InitializeExistingStorage());
+
+  // Validate other values of info and options.
+  // Magic should be consistent with the codebase.
+  if (integer_index_storage->info().magic != Info::kMagic) {
+    return absl_ports::FailedPreconditionError("Incorrect magic value");
+  }
+
+  return integer_index_storage;
+}
+
+/* static */ libtextclassifier3::Status
+IntegerIndexStorage::FlushDataIntoNewSortedBucket(
+    int64_t key_lower, int64_t key_upper, std::vector<IntegerIndexData>&& data,
+    IntegerIndexStorage* storage) {
+  storage->SetDirty();
+
+  if (data.empty()) {
+    return storage->sorted_buckets_->Append(Bucket(
+        key_lower, key_upper, PostingListIdentifier::kInvalid, /*num_data=*/0));
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      PostingListIdentifier pl_id,
+      FlushDataIntoPostingLists(storage->flash_index_storage_.get(),
+                                storage->posting_list_serializer_, data.begin(),
+                                data.end()));
+
+  storage->info().num_data += data.size();
+  return storage->sorted_buckets_->Append(
+      Bucket(key_lower, key_upper, pl_id, data.size()));
+}
+
+libtextclassifier3::Status IntegerIndexStorage::PersistStoragesToDisk(
+    bool force) {
+  if (!force && !is_storage_dirty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  ICING_RETURN_IF_ERROR(sorted_buckets_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(unsorted_buckets_->PersistToDisk());
+  if (!flash_index_storage_->PersistToDisk()) {
+    return absl_ports::InternalError(
+        "Fail to persist FlashIndexStorage to disk");
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndexStorage::PersistMetadataToDisk(
+    bool force) {
+  // We can skip persisting metadata to disk only if both info and storage are
+  // clean.
+  if (!force && !is_info_dirty() && !is_storage_dirty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  // Changes should have been applied to the underlying file when using
+  // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
+  // extra safety step to ensure they are written out.
+  return metadata_mmapped_file_->PersistToDisk();
+}
+
+libtextclassifier3::StatusOr<Crc32> IntegerIndexStorage::ComputeInfoChecksum(
+    bool force) {
+  if (!force && !is_info_dirty()) {
+    return Crc32(crcs().component_crcs.info_crc);
+  }
+
+  return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32>
+IntegerIndexStorage::ComputeStoragesChecksum(bool force) {
+  if (!force && !is_storage_dirty()) {
+    return Crc32(crcs().component_crcs.storages_crc);
+  }
+
+  // Compute crcs
+  ICING_ASSIGN_OR_RETURN(Crc32 sorted_buckets_crc,
+                         sorted_buckets_->ComputeChecksum());
+  ICING_ASSIGN_OR_RETURN(Crc32 unsorted_buckets_crc,
+                         unsorted_buckets_->ComputeChecksum());
+
+  // TODO(b/259744228): implement and include flash_index_storage checksum
+  return Crc32(sorted_buckets_crc.Get() ^ unsorted_buckets_crc.Get());
+}
+
+libtextclassifier3::StatusOr<std::vector<IntegerIndexStorage::Bucket>>
+IntegerIndexStorage::AddKeysIntoBucketAndSplitIfNecessary(
+    DocumentId document_id, SectionId section_id,
+    const std::vector<int64_t>::const_iterator& it_start,
+    const std::vector<int64_t>::const_iterator& it_end,
+    FileBackedVector<Bucket>::MutableView& mutable_bucket) {
+  int32_t num_data_in_bucket = mutable_bucket.Get().num_data();
+  int32_t num_new_data = std::distance(it_start, it_end);
+  if (mutable_bucket.Get().key_lower() < mutable_bucket.Get().key_upper() &&
+      num_new_data + num_data_in_bucket >
+          options_.num_data_threshold_for_bucket_split) {
+    // Split bucket.
+
+    // 1. Read all data and free all posting lists.
+    std::vector<IntegerIndexData> all_data;
+    if (mutable_bucket.Get().posting_list_identifier().is_valid()) {
+      ICING_ASSIGN_OR_RETURN(
+          std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+          PostingListIntegerIndexAccessor::CreateFromExisting(
+              flash_index_storage_.get(), posting_list_serializer_,
+              mutable_bucket.Get().posting_list_identifier()));
+      ICING_ASSIGN_OR_RETURN(all_data, pl_accessor->GetAllDataAndFree());
+    }
+
+    // 2. Append all new data.
+    all_data.reserve(all_data.size() + num_new_data);
+    for (auto it = it_start; it != it_end; ++it) {
+      all_data.push_back(IntegerIndexData(section_id, document_id, *it));
+    }
+
+    // 3. Run bucket splitting algorithm to decide new buckets and dispatch
+    //    data.
+    //    - # of data in a full bucket =
+    //      options_.num_data_threshold_for_bucket_split.
+    //    - Bucket splitting logic will be invoked if adding new data
+    //      (num_new_data >= 1) into a full bucket.
+    //    - In order to achieve good (amortized) time complexity, we want # of
+    //      data in new buckets to be around half_of_threshold (i.e.
+    //      options_.num_data_threshold_for_bucket_split / 2).
+    //    - Using half_of_threshold as the cutoff threshold will cause splitting
+    //      buckets with [half_of_threshold, half_of_threshold, num_new_data]
+    //      data, which is not ideal because num_new_data is usually small.
+    //    - Thus, we pick (half_of_threshold + kNumDataAfterSplitAdjustment) as
+    //      the cutoff threshold to avoid over-splitting. It can tolerate
+    //      num_new_data up to (2 * kNumDataAfterSplitAdjustment) and
+    //      split only 2 buckets (instead of 3) with
+    //      [half_of_threshold + kNumDataAfterSplitAdjustment,
+    //       half_of_threshold + (kNumDataAfterSplitAdjustment - num_new_data)].
+    int32_t cutoff_threshold =
+        options_.num_data_threshold_for_bucket_split / 2 +
+        kNumDataAfterSplitAdjustment;
+    std::vector<integer_index_bucket_util::DataRangeAndBucketInfo>
+        new_bucket_infos = integer_index_bucket_util::Split(
+            all_data, mutable_bucket.Get().key_lower(),
+            mutable_bucket.Get().key_upper(), cutoff_threshold);
+    if (new_bucket_infos.empty()) {
+      ICING_LOG(WARNING)
+          << "No buckets after splitting. This should not happen.";
+      return absl_ports::InternalError("Split error");
+    }
+
+    // 4. Flush data and create new buckets.
+    std::vector<Bucket> new_buckets;
+    for (int i = 0; i < new_bucket_infos.size(); ++i) {
+      int32_t num_data_in_new_bucket =
+          std::distance(new_bucket_infos[i].start, new_bucket_infos[i].end);
+      ICING_ASSIGN_OR_RETURN(
+          PostingListIdentifier pl_id,
+          FlushDataIntoPostingLists(
+              flash_index_storage_.get(), posting_list_serializer_,
+              new_bucket_infos[i].start, new_bucket_infos[i].end));
+      if (i == 0) {
+        // Reuse mutable_bucket
+        mutable_bucket.Get().set_key_lower(new_bucket_infos[i].key_lower);
+        mutable_bucket.Get().set_key_upper(new_bucket_infos[i].key_upper);
+        mutable_bucket.Get().set_posting_list_identifier(pl_id);
+        mutable_bucket.Get().set_num_data(num_data_in_new_bucket);
+      } else {
+        new_buckets.push_back(Bucket(new_bucket_infos[i].key_lower,
+                                     new_bucket_infos[i].key_upper, pl_id,
+                                     num_data_in_new_bucket));
+      }
+    }
+
+    return new_buckets;
+  }
+
+  // Otherwise, we don't need to split bucket. Just simply add all new data into
+  // the bucket.
+  std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor;
+  if (mutable_bucket.Get().posting_list_identifier().is_valid()) {
+    ICING_ASSIGN_OR_RETURN(
+        pl_accessor, PostingListIntegerIndexAccessor::CreateFromExisting(
+                         flash_index_storage_.get(), posting_list_serializer_,
+                         mutable_bucket.Get().posting_list_identifier()));
+  } else {
+    ICING_ASSIGN_OR_RETURN(
+        pl_accessor, PostingListIntegerIndexAccessor::Create(
+                         flash_index_storage_.get(), posting_list_serializer_));
+  }
+
+  for (auto it = it_start; it != it_end; ++it) {
+    ICING_RETURN_IF_ERROR(pl_accessor->PrependData(
+        IntegerIndexData(section_id, document_id, *it)));
+  }
+
+  PostingListAccessor::FinalizeResult result =
+      std::move(*pl_accessor).Finalize();
+  if (!result.status.ok()) {
+    return result.status;
+  }
+  if (!result.id.is_valid()) {
+    return absl_ports::InternalError("Fail to flush data into posting list(s)");
+  }
+
+  mutable_bucket.Get().set_posting_list_identifier(result.id);
+  // We've already verified num_new_data won't exceed the limit of the entire
+  // storage, so it is safe to add to the counter of the bucket.
+  mutable_bucket.Get().set_num_data(num_data_in_bucket + num_new_data);
+
+  return std::vector<Bucket>();
+}
+
+libtextclassifier3::Status IntegerIndexStorage::SortBuckets() {
+  if (unsorted_buckets_->num_elements() == 0) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  int32_t sorted_len = sorted_buckets_->num_elements();
+  int32_t unsorted_len = unsorted_buckets_->num_elements();
+  if (sorted_len > FileBackedVector<Bucket>::kMaxNumElements - unsorted_len) {
+    return absl_ports::OutOfRangeError(
+        "Sorted buckets length exceeds the limit after merging");
+  }
+
+  ICING_RETURN_IF_ERROR(sorted_buckets_->Allocate(unsorted_len));
+
+  // Sort unsorted_buckets_.
+  ICING_RETURN_IF_ERROR(
+      unsorted_buckets_->Sort(/*begin_idx=*/0, /*end_idx=*/unsorted_len));
+
+  // Merge unsorted_buckets_ into sorted_buckets_ and clear unsorted_buckets_.
+  // Note that we could have used std::sort + std::inplace_merge, but it is more
+  // complicated to deal with FileBackedVector SetDirty logic, so implement our
+  // own merging with FileBackedVector methods.
+  //
+  // Merge buckets from back. This could save some iterations and avoid setting
+  // dirty for unchanged elements of the original sorted segments.
+  // For example, we can avoid setting dirty for elements [1, 2, 3, 5] for the
+  // following sorted/unsorted data:
+  // - sorted: [1, 2, 3, 5, 8, 13, _, _, _, _)]
+  // - unsorted: [6, 10, 14, 15]
+  int32_t sorted_write_idx = sorted_len + unsorted_len - 1;
+  int32_t sorted_curr_idx = sorted_len - 1;
+  int32_t unsorted_curr_idx = unsorted_len - 1;
+  while (unsorted_curr_idx >= 0) {
+    if (sorted_curr_idx >= 0 && unsorted_buckets_->array()[unsorted_curr_idx] <
+                                    sorted_buckets_->array()[sorted_curr_idx]) {
+      ICING_RETURN_IF_ERROR(sorted_buckets_->Set(
+          sorted_write_idx, sorted_buckets_->array()[sorted_curr_idx]));
+      --sorted_curr_idx;
+
+    } else {
+      ICING_RETURN_IF_ERROR(sorted_buckets_->Set(
+          sorted_write_idx, unsorted_buckets_->array()[unsorted_curr_idx]));
+      --unsorted_curr_idx;
+    }
+    --sorted_write_idx;
+  }
+
+  ICING_RETURN_IF_ERROR(unsorted_buckets_->TruncateTo(0));
+
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/numeric/integer-index-storage.h b/icing/index/numeric/integer-index-storage.h
new file mode 100644
index 0000000..0c1afbb
--- /dev/null
+++ b/icing/index/numeric/integer-index-storage.h
@@ -0,0 +1,506 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_INTEGER_INDEX_STORAGE_H_
+#define ICING_INDEX_NUMERIC_INTEGER_INDEX_STORAGE_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// IntegerIndexStorage: a class for indexing (persistent storage) and searching
+// contents of integer type sections in documents.
+// - Accepts new integer contents (a.k.a keys) and adds records (BasicHit, key)
+//   into the integer index.
+// - Stores records (BasicHit, key) in posting lists and compresses them.
+// - Bucketizes these records by key to make range query more efficient and
+//   manages them with the corresponding posting lists.
+//   - When a posting list reaches the max size and is full, the mechanism of
+//     PostingListAccessor is to create another new (max-size) posting list and
+//     chain them together.
+//   - It will be inefficient if we store all records in the same PL chain. E.g.
+//     small range query needs to iterate through the whole PL chain but skips a
+//     lot of non-relevant records (whose keys don't belong to the query range).
+//   - Therefore, we implement the splitting mechanism to split a full max-size
+//     posting list. Also adjust range of the original bucket and add new
+//     buckets.
+//   - Ranges of all buckets are disjoint and the union of them is [INT64_MIN,
+//     INT64_MAX].
+//   - Buckets should be sorted, so we can do binary search to find the desired
+//     bucket(s). However, we may split a bucket into several buckets, and the
+//     cost to insert newly created buckets is high.
+//   - Thus, we introduce an unsorted bucket array for newly created buckets,
+//     and merge unsorted buckets into the sorted bucket array only if length of
+//     the unsorted bucket array exceeds the threshold. This mechanism will
+//     reduce # of merging events and amortize the overall cost for bucket order
+//     maintenance.
+//     Note: some tree data structures (e.g. segment tree, B+ tree) maintain the
+//     bucket order more efficiently than the sorted/unsorted bucket array
+//     mechanism, but the implementation is more complicated and doesn't improve
+//     the performance too much according to our analysis, so currently we
+//     choose sorted/unsorted bucket array.
+//   - Then we do binary search on the sorted bucket array and sequential search
+//     on the unsorted bucket array.
+class IntegerIndexStorage : public PersistentStorage {
+ public:
+  struct Info {
+    static constexpr int32_t kMagic = 0x6470e547;
+
+    int32_t magic;
+    int32_t num_data;
+
+    Crc32 ComputeChecksum() const {
+      return Crc32(
+          std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
+    }
+  } __attribute__((packed));
+  static_assert(sizeof(Info) == 8, "");
+
+  // Bucket
+  class Bucket {
+   public:
+    // Absolute max # of buckets allowed. Since the absolute max file size of
+    // FileBackedVector on 32-bit platform is ~2^28, we can at most have ~13.4M
+    // buckets. To make it power of 2, round it down to 2^23. Also since we're
+    // using FileBackedVector to store buckets, add some static_asserts to
+    // ensure numbers here are compatible with FileBackedVector.
+    static constexpr int32_t kMaxNumBuckets = 1 << 23;
+
+    explicit Bucket(int64_t key_lower, int64_t key_upper,
+                    PostingListIdentifier posting_list_identifier =
+                        PostingListIdentifier::kInvalid,
+                    int32_t num_data = 0)
+        : key_lower_(key_lower),
+          key_upper_(key_upper),
+          posting_list_identifier_(posting_list_identifier),
+          num_data_(num_data) {}
+
+    bool operator<(const Bucket& other) const {
+      return key_lower_ < other.key_lower_;
+    }
+
+    // For FileBackedVector
+    bool operator==(const Bucket& other) const {
+      return key_lower_ == other.key_lower_ && key_upper_ == other.key_upper_ &&
+             posting_list_identifier_ == other.posting_list_identifier_;
+    }
+
+    int64_t key_lower() const { return key_lower_; }
+
+    int64_t key_upper() const { return key_upper_; }
+
+    void set_key_lower(int64_t key_lower) { key_lower_ = key_lower; }
+
+    void set_key_upper(int64_t key_upper) { key_upper_ = key_upper; }
+
+    PostingListIdentifier posting_list_identifier() const {
+      return posting_list_identifier_;
+    }
+    void set_posting_list_identifier(
+        PostingListIdentifier posting_list_identifier) {
+      posting_list_identifier_ = posting_list_identifier;
+    }
+
+    int32_t num_data() const { return num_data_; }
+    void set_num_data(int32_t num_data) { num_data_ = num_data; }
+
+   private:
+    int64_t key_lower_;
+    int64_t key_upper_;
+    PostingListIdentifier posting_list_identifier_;
+    int32_t num_data_;
+  } __attribute__((packed));
+  static_assert(sizeof(Bucket) == 24, "");
+  static_assert(sizeof(Bucket) == FileBackedVector<Bucket>::kElementTypeSize,
+                "Bucket type size is inconsistent with FileBackedVector "
+                "element type size");
+  static_assert(Bucket::kMaxNumBuckets <=
+                    (FileBackedVector<Bucket>::kMaxFileSize -
+                     FileBackedVector<Bucket>::Header::kHeaderSize) /
+                        FileBackedVector<Bucket>::kElementTypeSize,
+                "Max # of buckets cannot fit into FileBackedVector");
+
+  struct Options {
+    // - According to the benchmark result, the more # of buckets, the higher
+    //   latency for range query. Therefore, this number cannot be too small to
+    //   avoid splitting bucket too aggressively.
+    // - We use `num_data_threshold_for_bucket_split / 2 + 5` as the cutoff
+    //   threshold after splitting. This number cannot be too small (e.g. 10)
+    //   because in this case we will have similar # of data in a single bucket
+    //   before and after splitting, which contradicts the purpose of splitting.
+    // - For convenience, let's set 64 as the minimum value.
+    static constexpr int32_t kMinNumDataThresholdForBucketSplit = 64;
+
+    explicit Options(int32_t num_data_threshold_for_bucket_split_in,
+                     bool pre_mapping_fbv_in)
+        : num_data_threshold_for_bucket_split(
+              num_data_threshold_for_bucket_split_in),
+          pre_mapping_fbv(pre_mapping_fbv_in) {}
+
+    explicit Options(std::vector<Bucket> custom_init_sorted_buckets_in,
+                     std::vector<Bucket> custom_init_unsorted_buckets_in,
+                     int32_t num_data_threshold_for_bucket_split_in,
+                     bool pre_mapping_fbv_in)
+        : custom_init_sorted_buckets(std::move(custom_init_sorted_buckets_in)),
+          custom_init_unsorted_buckets(
+              std::move(custom_init_unsorted_buckets_in)),
+          num_data_threshold_for_bucket_split(
+              num_data_threshold_for_bucket_split_in),
+          pre_mapping_fbv(pre_mapping_fbv_in) {}
+
+    bool IsValid() const;
+
+    bool HasCustomInitBuckets() const {
+      return !custom_init_sorted_buckets.empty() ||
+             !custom_init_unsorted_buckets.empty();
+    }
+
+    // Custom buckets when initializing new files. If both are empty, then the
+    // initial bucket is (INT64_MIN, INT64_MAX). Usually we only set them in the
+    // unit test. Note that all buckets in custom_init_sorted_buckets and
+    // custom_init_unsorted_buckets should be disjoint and the range union
+    // should be [INT64_MIN, INT64_MAX].
+    std::vector<Bucket> custom_init_sorted_buckets;
+    std::vector<Bucket> custom_init_unsorted_buckets;
+
+    // Threshold for invoking bucket splitting. If # of data in a bucket exceeds
+    // this number after adding new data, then it will invoke bucket splitting
+    // logic.
+    //
+    // Note: num_data_threshold_for_bucket_split should be >=
+    //   kMinNumDataThresholdForBucketSplit.
+    int32_t num_data_threshold_for_bucket_split;
+
+    // Flag indicating whether memory map max possible file size for underlying
+    // FileBackedVector before growing the actual file size.
+    bool pre_mapping_fbv;
+  };
+
+  // Metadata file layout: <Crcs><Info>
+  static constexpr int32_t kCrcsMetadataFileOffset = 0;
+  static constexpr int32_t kInfoMetadataFileOffset =
+      static_cast<int32_t>(sizeof(Crcs));
+  static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+  static_assert(kMetadataFileSize == 20, "");
+
+  static constexpr WorkingPathType kWorkingPathType =
+      WorkingPathType::kDirectory;
+  static constexpr std::string_view kFilePrefix = "integer_index_storage";
+
+  // Default # of data threshold for bucket splitting during indexing (AddKeys).
+  // When # of data in a bucket reaches this number, we will try to split data
+  // into multiple buckets according to their keys.
+  static constexpr int32_t kDefaultNumDataThresholdForBucketSplit = 65536;
+
+  // # of data threshold for bucket merging during optimization (TransferIndex)
+  // = kNumDataThresholdRatioForBucketMerge *
+  //   options.num_data_threshold_for_bucket_split
+  //
+  // If total # data of adjacent buckets exceed this threshold, then flush the
+  // accumulated data. Otherwise merge buckets and their data.
+  static constexpr double kNumDataThresholdRatioForBucketMerge = 0.7;
+
+  // Length threshold to sort and merge unsorted buckets into sorted buckets. If
+  // the length of unsorted_buckets exceed the threshold, then call
+  // SortBuckets().
+  // TODO(b/259743562): decide if removing unsorted buckets given that we
+  //   changed bucket splitting threshold and # of buckets are small now.
+  static constexpr int32_t kUnsortedBucketsLengthThreshold = 5;
+
+  // Creates a new IntegerIndexStorage instance to index integers (for a single
+  // property). If any of the underlying file is missing, then delete the whole
+  // working_path and (re)initialize with new ones. Otherwise initialize and
+  // create the instance by existing files.
+  //
+  // filesystem: Object to make system level calls
+  // working_path: Specifies the working path for PersistentStorage.
+  //               IntegerIndexStorage uses working path as working directory
+  //               and all related files will be stored under this directory. It
+  //               takes full ownership and of working_path_, including
+  //               creation/deletion. It is the caller's responsibility to
+  //               specify correct working path and avoid mixing different
+  //               persistent storages together under the same path. Also the
+  //               caller has the ownership for the parent directory of
+  //               working_path_, and it is responsible for parent directory
+  //               creation/deletion. See PersistentStorage for more details
+  //               about the concept of working_path.
+  // options: Options instance.
+  // posting_list_serializer: a PostingListIntegerIndexSerializer instance to
+  //                          serialize/deserialize integer index data to/from
+  //                          posting lists.
+  //
+  // Returns:
+  //   - INVALID_ARGUMENT_ERROR if any value in options is invalid.
+  //   - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+  //                               checksum.
+  //   - INTERNAL_ERROR on I/O errors.
+  //   - Any FileBackedVector/FlashIndexStorage errors.
+  static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+  Create(const Filesystem& filesystem, std::string working_path,
+         Options options,
+         PostingListIntegerIndexSerializer* posting_list_serializer);
+
+  // Deletes IntegerIndexStorage under working_path.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+                                            const std::string& working_path) {
+    return PersistentStorage::Discard(filesystem, working_path,
+                                      kWorkingPathType);
+  }
+
+  // Delete copy and move constructor/assignment operator.
+  IntegerIndexStorage(const IntegerIndexStorage&) = delete;
+  IntegerIndexStorage& operator=(const IntegerIndexStorage&) = delete;
+
+  IntegerIndexStorage(IntegerIndexStorage&&) = delete;
+  IntegerIndexStorage& operator=(IntegerIndexStorage&&) = delete;
+
+  ~IntegerIndexStorage() override;
+
+  // Batch adds new keys (of the same DocumentId and SectionId) into the integer
+  // index storage.
+  // Note that since we separate different property names into different integer
+  // index storages, it is impossible to have keys in a single document across
+  // multiple sections to add into the same integer index storage.
+  //
+  // Returns:
+  //   - OK on success
+  //   - RESOURCE_EXHAUSTED_ERROR if # of integers in this storage exceed
+  //     INT_MAX after adding new_keys
+  //   - Any FileBackedVector or PostingList errors
+  libtextclassifier3::Status AddKeys(DocumentId document_id,
+                                     SectionId section_id,
+                                     std::vector<int64_t>&& new_keys);
+
+  // Returns a DocHitInfoIteratorNumeric<int64_t> (in DocHitInfoIterator
+  // interface type format) for iterating through all docs which have the
+  // specified (integer) property contents in range [query_key_lower,
+  // query_key_upper].
+  // When iterating through all relevant doc hits, it:
+  // - Merges multiple SectionIds of doc hits with same DocumentId into a single
+  //   SectionIdMask and constructs DocHitInfo.
+  // - Returns DocHitInfo in descending DocumentId order.
+  //
+  // Returns:
+  //   - On success: a DocHitInfoIterator(Numeric)
+  //   - INVALID_ARGUMENT_ERROR if query_key_lower > query_key_upper
+  //   - Any FileBackedVector or PostingList errors
+  libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
+      int64_t query_key_lower, int64_t query_key_upper) const;
+
+  // Transfers integer index data from the current storage to new_storage and
+  // optimizes buckets (for new_storage only), i.e. merging adjacent buckets if
+  // total # of data among them are less than or equal to
+  // kNumDataThresholdForBucketMerge.
+  //
+  // REQUIRES: new_storage should be a newly created storage instance, i.e. not
+  //   contain any data. Otherwise, existing data and posting lists won't be
+  //   freed and space will be wasted.
+  //
+  // Returns:
+  //   - OK on success
+  //   - OUT_OF_RANGE_ERROR if sorted buckets length exceeds the limit after
+  //     merging
+  //   - INTERNAL_ERROR on IO error
+  libtextclassifier3::Status TransferIndex(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      IntegerIndexStorage* new_storage) const;
+
+  int32_t num_data() const { return info().num_data; }
+
+ private:
+  static constexpr int8_t kNumDataAfterSplitAdjustment = 5;
+
+  explicit IntegerIndexStorage(
+      const Filesystem& filesystem, std::string&& working_path,
+      Options&& options,
+      PostingListIntegerIndexSerializer* posting_list_serializer,
+      std::unique_ptr<MemoryMappedFile> metadata_mmapped_file,
+      std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+      std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+      std::unique_ptr<FlashIndexStorage> flash_index_storage)
+      : PersistentStorage(filesystem, std::move(working_path),
+                          kWorkingPathType),
+        options_(std::move(options)),
+        posting_list_serializer_(posting_list_serializer),
+        metadata_mmapped_file_(std::move(metadata_mmapped_file)),
+        sorted_buckets_(std::move(sorted_buckets)),
+        unsorted_buckets_(std::move(unsorted_buckets)),
+        flash_index_storage_(std::move(flash_index_storage)),
+        is_info_dirty_(false),
+        is_storage_dirty_(false) {}
+
+  static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+  InitializeNewFiles(
+      const Filesystem& filesystem, std::string&& working_path,
+      Options&& options,
+      PostingListIntegerIndexSerializer* posting_list_serializer);
+
+  static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+  InitializeExistingFiles(
+      const Filesystem& filesystem, std::string&& working_path,
+      Options&& options,
+      PostingListIntegerIndexSerializer* posting_list_serializer);
+
+  // Flushes data into posting list(s), creates a new bucket with range
+  // [key_lower, key_upper], and appends it into sorted buckets for storage.
+  // It is a helper function for TransferIndex.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR if fails to write existing data into posting list(s)
+  //   - Any FileBackedVector or PostingList errors
+  static libtextclassifier3::Status FlushDataIntoNewSortedBucket(
+      int64_t key_lower, int64_t key_upper,
+      std::vector<IntegerIndexData>&& data, IntegerIndexStorage* storage);
+
+  // Flushes contents of all storages to underlying files.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
+
+  // Flushes contents of metadata file.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
+
+  // Computes and returns Info checksum.
+  //
+  // Returns:
+  //   - Crc of the Info on success
+  libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
+
+  // Computes and returns all storages checksum. Checksums of sorted_buckets_,
+  // unsorted_buckets_ will be combined together by XOR.
+  // TODO(b/259744228): implement and include flash_index_storage checksum
+  //
+  // Returns:
+  //   - Crc of all storages on success
+  //   - INTERNAL_ERROR if any data inconsistency
+  libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+      bool force) override;
+
+  // Helper function to add keys in range [it_start, it_end) into the given
+  // bucket. It handles the bucket and its corresponding posting list(s) to make
+  // searching and indexing efficient.
+  //
+  // When the (single) posting list of the bucket is full:
+  // - If the size of posting list hasn't reached the max size, then just simply
+  //   add a new key into it, and PostingListAccessor mechanism will
+  //   automatically double the size of the posting list.
+  // - Else:
+  //   - If the bucket is splittable (i.e. key_lower < key_upper), then split it
+  //     into several new buckets with new ranges, and split the data (according
+  //     to their keys and the range of new buckets) of the original posting
+  //     list into several new posting lists.
+  //   - Otherwise, just simply add a new key into it, and PostingListAccessor
+  //     mechanism will automatically create a new max size posting list and
+  //     chain them.
+  //
+  // Returns:
+  //   - On success: a vector of new Buckets (to add into the unsorted bucket
+  //     array later)
+  //   - Any FileBackedVector or PostingList errors
+  libtextclassifier3::StatusOr<std::vector<Bucket>>
+  AddKeysIntoBucketAndSplitIfNecessary(
+      DocumentId document_id, SectionId section_id,
+      const std::vector<int64_t>::const_iterator& it_start,
+      const std::vector<int64_t>::const_iterator& it_end,
+      FileBackedVector<Bucket>::MutableView& mutable_bucket);
+
+  // Merges all unsorted buckets into sorted buckets and clears unsorted
+  // buckets.
+  //
+  // Returns:
+  //   - OK on success
+  //   - OUT_OF_RANGE_ERROR if sorted buckets length exceeds the limit after
+  //     merging
+  //   - Any FileBackedVector errors
+  libtextclassifier3::Status SortBuckets();
+
+  Crcs& crcs() override {
+    return *reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
+                                    kCrcsMetadataFileOffset);
+  }
+
+  const Crcs& crcs() const override {
+    return *reinterpret_cast<const Crcs*>(metadata_mmapped_file_->region() +
+                                          kCrcsMetadataFileOffset);
+  }
+
+  Info& info() {
+    return *reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
+                                    kInfoMetadataFileOffset);
+  }
+
+  const Info& info() const {
+    return *reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
+                                          kInfoMetadataFileOffset);
+  }
+
+  void SetInfoDirty() { is_info_dirty_ = true; }
+  // When storage is dirty, we have to set info dirty as well. So just expose
+  // SetDirty to set both.
+  void SetDirty() {
+    is_info_dirty_ = true;
+    is_storage_dirty_ = true;
+  }
+
+  bool is_info_dirty() const { return is_info_dirty_; }
+  bool is_storage_dirty() const { return is_storage_dirty_; }
+
+  Options options_;
+
+  PostingListIntegerIndexSerializer* posting_list_serializer_;  // Does not own.
+
+  std::unique_ptr<MemoryMappedFile> metadata_mmapped_file_;
+  std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets_;
+  std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets_;
+  std::unique_ptr<FlashIndexStorage> flash_index_storage_;
+
+  bool is_info_dirty_;
+  bool is_storage_dirty_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_NUMERIC_INTEGER_INDEX_STORAGE_H_
diff --git a/icing/index/numeric/integer-index-storage_benchmark.cc b/icing/index/numeric/integer-index-storage_benchmark.cc
new file mode 100644
index 0000000..85d381d
--- /dev/null
+++ b/icing/index/numeric/integer-index-storage_benchmark.cc
@@ -0,0 +1,407 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/integer-index-storage.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/numeric/normal-distribution-number-generator.h"
+#include "icing/testing/numeric/number-generator.h"
+#include "icing/testing/numeric/uniform-distribution-integer-generator.h"
+#include "icing/testing/tmp-directory.h"
+
+// Run on a Linux workstation:
+//   $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//   //icing/index/numeric:integer-index-storage_benchmark
+//
+//   $ blaze-bin/icing/index/numeric/integer-index-storage_benchmark
+//   --benchmark_filter=all --benchmark_memory_usage
+//
+// Run on an Android device:
+//   $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//   --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//   //icing/index/numeric:integer-index-storage_benchmark
+//
+//   $ adb push
+//   blaze-bin/icing/index/numeric/integer-index-storage_benchmark
+//   /data/local/tmp/
+//
+//   $ adb shell /data/local/tmp/integer-index-storage_benchmark
+//   --benchmark_filter=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+static constexpr int32_t kNumDataThresholdForBucketSplit =
+    IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit;
+static constexpr bool kPreMappingFbv = true;
+
+static constexpr SectionId kDefaultSectionId = 12;
+static constexpr int kDefaultSeed = 12345;
+
+enum DistributionTypeEnum {
+  kUniformDistribution,
+  kNormalDistribution,
+};
+
+class IntegerIndexStorageBenchmark {
+ public:
+  Filesystem filesystem;
+  std::string working_path;
+
+  PostingListIntegerIndexSerializer posting_list_serializer;
+
+  explicit IntegerIndexStorageBenchmark()
+      : working_path(GetTestTempDir() + "/integer_index_benchmark") {}
+
+  ~IntegerIndexStorageBenchmark() {
+    filesystem.DeleteDirectoryRecursively(working_path.c_str());
+  }
+};
+
+libtextclassifier3::StatusOr<std::unique_ptr<NumberGenerator<int64_t>>>
+CreateIntegerGenerator(DistributionTypeEnum distribution_type, int seed,
+                       int num_keys) {
+  switch (distribution_type) {
+    case DistributionTypeEnum::kUniformDistribution:
+      // Since the collision # follows poisson distribution with lambda =
+      // (num_keys / range), we set the range 10x (lambda = 0.1) to avoid too
+      // many collisions.
+      //
+      // Distribution:
+      // - keys in range being picked for 0 times: 90.5%
+      // - keys in range being picked for 1 time: 9%
+      // - keys in range being picked for 2 times: 0.45%
+      // - keys in range being picked for 3 times: 0.015%
+      //
+      // For example, num_keys = 1M, range = 10M. Then there will be ~904837
+      // unique keys, 45242 keys being picked twice, 1508 keys being picked
+      // thrice ...
+      return std::make_unique<UniformDistributionIntegerGenerator<int64_t>>(
+          seed, /*range_lower=*/0,
+          /*range_upper=*/static_cast<int64_t>(num_keys) * 10 - 1);
+    case DistributionTypeEnum::kNormalDistribution:
+      // Normal distribution with mean = 0 and stddev = num_keys / 1024.
+      // - keys in range [-1 * stddev, 1 * stddev]: 68.2%
+      // - keys in range [-2 * stddev, 2 * stddev]: 95.4%
+      // - keys in range [-3 * stddev, 3 * stddev]: 99.7%
+      //
+      // - When generating num_keys integers, 68.2% of them will be in range
+      //   [-num_keys / 1024, num_keys / 1024]
+      // - Each number in this range will be sampled (num_keys * 0.682) /
+      //   ((num_keys / 1024) * 2) = 349 times on average and become
+      //   "single-range bucket".
+      return std::make_unique<NormalDistributionNumberGenerator<int64_t>>(
+          seed, /*mean=*/0.0, /*stddev=*/num_keys / 1024.0);
+    default:
+      return absl_ports::InvalidArgumentError("Unknown type");
+  }
+}
+
+void BM_Index(benchmark::State& state) {
+  DistributionTypeEnum distribution_type =
+      static_cast<DistributionTypeEnum>(state.range(0));
+  int num_keys = state.range(1);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumberGenerator<int64_t>> generator,
+      CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
+  std::vector<int64_t> keys(num_keys);
+  for (int i = 0; i < num_keys; ++i) {
+    keys[i] = generator->Generate();
+  }
+
+  IntegerIndexStorageBenchmark benchmark;
+  for (auto _ : state) {
+    state.PauseTiming();
+    benchmark.filesystem.DeleteDirectoryRecursively(
+        benchmark.working_path.c_str());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> storage,
+        IntegerIndexStorage::Create(
+            benchmark.filesystem, benchmark.working_path,
+            IntegerIndexStorage::Options(kNumDataThresholdForBucketSplit,
+                                         kPreMappingFbv),
+            &benchmark.posting_list_serializer));
+    state.ResumeTiming();
+
+    for (int i = 0; i < num_keys; ++i) {
+      ICING_ASSERT_OK(storage->AddKeys(static_cast<DocumentId>(i),
+                                       kDefaultSectionId, {keys[i]}));
+    }
+    ICING_ASSERT_OK(storage->PersistToDisk());
+
+    state.PauseTiming();
+    storage.reset();
+    state.ResumeTiming();
+  }
+}
+BENCHMARK(BM_Index)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
+
+void BM_BatchIndex(benchmark::State& state) {
+  DistributionTypeEnum distribution_type =
+      static_cast<DistributionTypeEnum>(state.range(0));
+  int num_keys = state.range(1);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumberGenerator<int64_t>> generator,
+      CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
+  std::vector<int64_t> keys(num_keys);
+  for (int i = 0; i < num_keys; ++i) {
+    keys[i] = generator->Generate();
+  }
+
+  IntegerIndexStorageBenchmark benchmark;
+  for (auto _ : state) {
+    state.PauseTiming();
+    benchmark.filesystem.DeleteDirectoryRecursively(
+        benchmark.working_path.c_str());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> storage,
+        IntegerIndexStorage::Create(
+            benchmark.filesystem, benchmark.working_path,
+            IntegerIndexStorage::Options(kNumDataThresholdForBucketSplit,
+                                         kPreMappingFbv),
+            &benchmark.posting_list_serializer));
+    std::vector<int64_t> keys_copy(keys);
+    state.ResumeTiming();
+
+    ICING_ASSERT_OK(storage->AddKeys(static_cast<DocumentId>(0),
+                                     kDefaultSectionId, std::move(keys_copy)));
+    ICING_ASSERT_OK(storage->PersistToDisk());
+
+    state.PauseTiming();
+    storage.reset();
+    state.ResumeTiming();
+  }
+}
+BENCHMARK(BM_BatchIndex)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
+
+void BM_ExactQuery(benchmark::State& state) {
+  DistributionTypeEnum distribution_type =
+      static_cast<DistributionTypeEnum>(state.range(0));
+  int num_keys = state.range(1);
+
+  IntegerIndexStorageBenchmark benchmark;
+  benchmark.filesystem.DeleteDirectoryRecursively(
+      benchmark.working_path.c_str());
+  DestructibleDirectory ddir(&benchmark.filesystem, benchmark.working_path);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          benchmark.filesystem, benchmark.working_path,
+          IntegerIndexStorage::Options(kNumDataThresholdForBucketSplit,
+                                       kPreMappingFbv),
+          &benchmark.posting_list_serializer));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumberGenerator<int64_t>> generator,
+      CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
+  std::unordered_map<int64_t, std::vector<DocumentId>> keys;
+  for (int i = 0; i < num_keys; ++i) {
+    int64_t key = generator->Generate();
+    keys[key].push_back(static_cast<DocumentId>(i));
+    ICING_ASSERT_OK(
+        storage->AddKeys(static_cast<DocumentId>(i), kDefaultSectionId, {key}));
+  }
+  ICING_ASSERT_OK(storage->PersistToDisk());
+
+  for (auto _ : state) {
+    int64_t exact_query_key = generator->Generate();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocHitInfoIterator> iterator,
+        storage->GetIterator(/*query_key_lower=*/exact_query_key,
+                             /*query_key_upper=*/exact_query_key));
+    std::vector<DocHitInfo> data;
+    while (iterator->Advance().ok()) {
+      data.push_back(iterator->doc_hit_info());
+    }
+
+    state.PauseTiming();
+    const auto it = keys.find(exact_query_key);
+    if (it == keys.end()) {
+      ASSERT_THAT(data, IsEmpty());
+    } else {
+      ASSERT_THAT(data, SizeIs(it->second.size()));
+      std::reverse(data.begin(), data.end());
+      for (int i = 0; i < data.size(); ++i) {
+        ASSERT_THAT(data[i].document_id(), Eq(it->second[i]));
+        ASSERT_THAT(data[i].hit_section_ids_mask(), Eq(1 << kDefaultSectionId));
+      }
+    }
+    state.ResumeTiming();
+  }
+}
+BENCHMARK(BM_ExactQuery)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
+
+void BM_RangeQueryAll(benchmark::State& state) {
+  DistributionTypeEnum distribution_type =
+      static_cast<DistributionTypeEnum>(state.range(0));
+  int num_keys = state.range(1);
+
+  IntegerIndexStorageBenchmark benchmark;
+  benchmark.filesystem.DeleteDirectoryRecursively(
+      benchmark.working_path.c_str());
+  DestructibleDirectory ddir(&benchmark.filesystem, benchmark.working_path);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          benchmark.filesystem, benchmark.working_path,
+          IntegerIndexStorage::Options(kNumDataThresholdForBucketSplit,
+                                       kPreMappingFbv),
+          &benchmark.posting_list_serializer));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumberGenerator<int64_t>> generator,
+      CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
+  for (int i = 0; i < num_keys; ++i) {
+    ICING_ASSERT_OK(storage->AddKeys(static_cast<DocumentId>(i),
+                                     kDefaultSectionId,
+                                     {generator->Generate()}));
+  }
+  ICING_ASSERT_OK(storage->PersistToDisk());
+
+  for (auto _ : state) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocHitInfoIterator> iterator,
+        storage->GetIterator(
+            /*query_key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*query_key_upper=*/std::numeric_limits<int64_t>::max()));
+    std::vector<DocHitInfo> data;
+    while (iterator->Advance().ok()) {
+      data.push_back(iterator->doc_hit_info());
+    }
+
+    ASSERT_THAT(data, SizeIs(num_keys));
+  }
+}
+BENCHMARK(BM_RangeQueryAll)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
+    ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+    ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/numeric/integer-index-storage_test.cc b/icing/index/numeric/integer-index-storage_test.cc
new file mode 100644
index 0000000..a632bc8
--- /dev/null
+++ b/icing/index/numeric/integer-index-storage_test.cc
@@ -0,0 +1,2161 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index-storage.h"
+
+#include <unistd.h>
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Contains;
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Key;
+using ::testing::Le;
+using ::testing::Lt;
+using ::testing::Ne;
+using ::testing::Not;
+
+using Bucket = IntegerIndexStorage::Bucket;
+using Crcs = PersistentStorage::Crcs;
+using Info = IntegerIndexStorage::Info;
+using Options = IntegerIndexStorage::Options;
+
+static constexpr int32_t kCorruptedValueOffset = 3;
+static constexpr DocumentId kDefaultDocumentId = 123;
+static constexpr SectionId kDefaultSectionId = 31;
+
+class IntegerIndexStorageTest : public ::testing::TestWithParam<bool> {
+ protected:
+  void SetUp() override {
+    base_dir_ = GetTestTempDir() + "/icing";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    working_path_ = base_dir_ + "/integer_index_storage_test";
+
+    serializer_ = std::make_unique<PostingListIntegerIndexSerializer>();
+  }
+
+  void TearDown() override {
+    serializer_.reset();
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  std::string base_dir_;
+  std::string working_path_;
+  std::unique_ptr<PostingListIntegerIndexSerializer> serializer_;
+};
+
+libtextclassifier3::StatusOr<std::vector<DocHitInfo>> Query(
+    const IntegerIndexStorage* storage, int64_t key_lower, int64_t key_upper) {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> iter,
+                         storage->GetIterator(key_lower, key_upper));
+  std::vector<DocHitInfo> hits;
+  while (iter->Advance().ok()) {
+    hits.push_back(iter->doc_hit_info());
+  }
+  return hits;
+}
+
+TEST_P(IntegerIndexStorageTest, OptionsEmptyCustomInitBucketsShouldBeValid) {
+  EXPECT_THAT(
+      Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+              /*pre_mapping_fbv_in=*/GetParam())
+          .IsValid(),
+      IsTrue());
+}
+
+TEST_P(IntegerIndexStorageTest, OptionsInvalidNumDataThresholdForBucketSplit) {
+  EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{},
+                      /*custom_init_unsorted_buckets_in=*/{},
+                      /*num_data_threshold_for_bucket_split=*/-1,
+                      /*pre_mapping_fbv_in=*/GetParam())
+                  .IsValid(),
+              IsFalse());
+  EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{},
+                      /*custom_init_unsorted_buckets_in=*/{},
+                      /*num_data_threshold_for_bucket_split=*/0,
+                      /*pre_mapping_fbv_in=*/GetParam())
+                  .IsValid(),
+              IsFalse());
+  EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{},
+                      /*custom_init_unsorted_buckets_in=*/{},
+                      /*num_data_threshold_for_bucket_split=*/63,
+                      /*pre_mapping_fbv_in=*/GetParam())
+                  .IsValid(),
+              IsFalse());
+}
+
+TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsRange) {
+  // Invalid custom init sorted bucket
+  EXPECT_THAT(
+      Options(/*custom_init_sorted_buckets_in=*/
+              {Bucket(std::numeric_limits<int64_t>::min(), 5), Bucket(9, 6)},
+              /*custom_init_unsorted_buckets_in=*/
+              {Bucket(10, std::numeric_limits<int64_t>::max())},
+              IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+              /*pre_mapping_fbv_in=*/GetParam())
+          .IsValid(),
+      IsFalse());
+
+  // Invalid custom init unsorted bucket
+  EXPECT_THAT(
+      Options(/*custom_init_sorted_buckets_in=*/
+              {Bucket(10, std::numeric_limits<int64_t>::max())},
+              /*custom_init_unsorted_buckets_in=*/
+              {Bucket(std::numeric_limits<int64_t>::min(), 5), Bucket(9, 6)},
+              IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+              /*pre_mapping_fbv_in=*/GetParam())
+          .IsValid(),
+      IsFalse());
+}
+
+TEST_P(IntegerIndexStorageTest,
+       OptionsInvalidCustomInitBucketsPostingListIdentifier) {
+  // Custom init buckets should contain invalid posting list identifier.
+  PostingListIdentifier valid_posting_list_identifier(0, 0, 0);
+  ASSERT_THAT(valid_posting_list_identifier.is_valid(), IsTrue());
+
+  // Invalid custom init sorted bucket
+  EXPECT_THAT(
+      Options(/*custom_init_sorted_buckets_in=*/
+              {Bucket(std::numeric_limits<int64_t>::min(),
+                      std::numeric_limits<int64_t>::max(),
+                      valid_posting_list_identifier)},
+              /*custom_init_unsorted_buckets_in=*/{},
+              IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+              /*pre_mapping_fbv_in=*/GetParam())
+          .IsValid(),
+      IsFalse());
+
+  // Invalid custom init unsorted bucket
+  EXPECT_THAT(
+      Options(/*custom_init_sorted_buckets_in=*/{},
+              /*custom_init_unsorted_buckets_in=*/
+              {Bucket(std::numeric_limits<int64_t>::min(),
+                      std::numeric_limits<int64_t>::max(),
+                      valid_posting_list_identifier)},
+              IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+              /*pre_mapping_fbv_in=*/GetParam())
+          .IsValid(),
+      IsFalse());
+}
+
+TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsOverlapping) {
+  // sorted buckets overlap
+  EXPECT_THAT(
+      Options(/*custom_init_sorted_buckets_in=*/
+              {Bucket(std::numeric_limits<int64_t>::min(), -100),
+               Bucket(-100, std::numeric_limits<int64_t>::max())},
+              /*custom_init_unsorted_buckets_in=*/{},
+              IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+              /*pre_mapping_fbv_in=*/GetParam())
+          .IsValid(),
+      IsFalse());
+
+  // unsorted buckets overlap
+  EXPECT_THAT(
+      Options(/*custom_init_sorted_buckets_in=*/{},
+              /*custom_init_unsorted_buckets_in=*/
+              {Bucket(-100, std::numeric_limits<int64_t>::max()),
+               Bucket(std::numeric_limits<int64_t>::min(), -100)},
+              IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+              /*pre_mapping_fbv_in=*/GetParam())
+          .IsValid(),
+      IsFalse());
+
+  // Cross buckets overlap
+  EXPECT_THAT(
+      Options(/*custom_init_sorted_buckets_in=*/
+              {Bucket(std::numeric_limits<int64_t>::min(), -100),
+               Bucket(-99, 0)},
+              /*custom_init_unsorted_buckets_in=*/
+              {Bucket(200, std::numeric_limits<int64_t>::max()), Bucket(0, 50),
+               Bucket(51, 199)},
+              IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+              /*pre_mapping_fbv_in=*/GetParam())
+          .IsValid(),
+      IsFalse());
+}
+
+TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsUnion) {
+  // Missing INT64_MAX
+  EXPECT_THAT(
+      Options(/*custom_init_sorted_buckets_in=*/
+              {Bucket(std::numeric_limits<int64_t>::min(), -100),
+               Bucket(-99, 0)},
+              /*custom_init_unsorted_buckets_in=*/{Bucket(1, 1000)},
+              IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+              /*pre_mapping_fbv_in=*/GetParam())
+          .IsValid(),
+      IsFalse());
+
+  // Missing INT64_MIN
+  EXPECT_THAT(
+      Options(/*custom_init_sorted_buckets_in=*/
+              {Bucket(-200, -100), Bucket(-99, 0)},
+              /*custom_init_unsorted_buckets_in=*/
+              {Bucket(1, std::numeric_limits<int64_t>::max())},
+              IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+              /*pre_mapping_fbv_in=*/GetParam())
+          .IsValid(),
+      IsFalse());
+
+  // Missing some intermediate ranges
+  EXPECT_THAT(
+      Options(/*custom_init_sorted_buckets_in=*/
+              {Bucket(std::numeric_limits<int64_t>::min(), -100)},
+              /*custom_init_unsorted_buckets_in=*/
+              {Bucket(1, std::numeric_limits<int64_t>::max())},
+              IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+              /*pre_mapping_fbv_in=*/GetParam())
+          .IsValid(),
+      IsFalse());
+}
+
+TEST_P(IntegerIndexStorageTest, InvalidWorkingPath) {
+  EXPECT_THAT(
+      IntegerIndexStorage::Create(
+          filesystem_, "/dev/null/integer_index_storage_test",
+          Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()),
+      StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_P(IntegerIndexStorageTest, CreateWithInvalidOptionsShouldFail) {
+  Options invalid_options(
+      /*custom_init_sorted_buckets_in=*/{},
+      /*custom_init_unsorted_buckets_in=*/
+      {Bucket(-100, std::numeric_limits<int64_t>::max()),
+       Bucket(std::numeric_limits<int64_t>::min(), -100)},
+      IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+      /*pre_mapping_fbv_in=*/GetParam());
+  ASSERT_THAT(invalid_options.IsValid(), IsFalse());
+
+  EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, working_path_,
+                                          invalid_options, serializer_.get()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(IntegerIndexStorageTest, InitializeNewFiles) {
+  {
+    // Create new integer index storage
+    ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> storage,
+        IntegerIndexStorage::Create(
+            filesystem_, working_path_,
+            Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get()));
+
+    ICING_ASSERT_OK(storage->PersistToDisk());
+  }
+
+  // Metadata file should be initialized correctly for both info and crcs
+  // sections.
+  const std::string metadata_file_path = absl_ports::StrCat(
+      working_path_, "/", IntegerIndexStorage::kFilePrefix, ".m");
+  ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+  ASSERT_TRUE(metadata_sfd.is_valid());
+
+  // Check info section
+  Info info;
+  ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+                                IntegerIndexStorage::kInfoMetadataFileOffset));
+  EXPECT_THAT(info.magic, Eq(Info::kMagic));
+  EXPECT_THAT(info.num_data, Eq(0));
+
+  // Check crcs section
+  Crcs crcs;
+  ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+                                IntegerIndexStorage::kCrcsMetadataFileOffset));
+  // # of elements in sorted_buckets should be 1, so it should have non-zero
+  // all storages crc value.
+  EXPECT_THAT(crcs.component_crcs.storages_crc, Ne(0));
+  EXPECT_THAT(crcs.component_crcs.info_crc,
+              Eq(Crc32(std::string_view(reinterpret_cast<const char*>(&info),
+                                        sizeof(Info)))
+                     .Get()));
+  EXPECT_THAT(crcs.all_crc,
+              Eq(Crc32(std::string_view(
+                           reinterpret_cast<const char*>(&crcs.component_crcs),
+                           sizeof(Crcs::ComponentCrcs)))
+                     .Get()));
+}
+
+TEST_P(IntegerIndexStorageTest,
+       InitializationShouldFailWithoutPersistToDiskOrDestruction) {
+  // Create new integer index storage
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Insert some data.
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, /*section_id=*/20,
+                                   /*new_keys=*/{0, 100, -100}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, /*section_id=*/2,
+                                   /*new_keys=*/{3, -1000, 500}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, /*section_id=*/15,
+                                   /*new_keys=*/{-6, 321, 98}));
+
+  // Without calling PersistToDisk, checksums will not be recomputed or synced
+  // to disk, so initializing another instance on the same files should fail.
+  EXPECT_THAT(
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_P(IntegerIndexStorageTest, InitializationShouldSucceedWithPersistToDisk) {
+  // Create new integer index storage
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage1,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Insert some data.
+  ICING_ASSERT_OK(storage1->AddKeys(/*document_id=*/0, /*section_id=*/20,
+                                    /*new_keys=*/{0, 100, -100}));
+  ICING_ASSERT_OK(storage1->AddKeys(/*document_id=*/1, /*section_id=*/2,
+                                    /*new_keys=*/{3, -1000, 500}));
+  ICING_ASSERT_OK(storage1->AddKeys(/*document_id=*/2, /*section_id=*/15,
+                                    /*new_keys=*/{-6, 321, 98}));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<DocHitInfo> doc_hit_info_vec,
+      Query(storage1.get(),
+            /*key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*key_upper=*/std::numeric_limits<int64_t>::max()));
+
+  // After calling PersistToDisk, all checksums should be recomputed and synced
+  // correctly to disk, so initializing another instance on the same files
+  // should succeed, and we should be able to get the same contents.
+  ICING_EXPECT_OK(storage1->PersistToDisk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage2,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+  EXPECT_THAT(
+      Query(storage2.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*key_upper=*/std::numeric_limits<int64_t>::max()),
+      IsOkAndHolds(
+          ElementsAreArray(doc_hit_info_vec.begin(), doc_hit_info_vec.end())));
+}
+
+TEST_P(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) {
+  std::vector<DocHitInfo> doc_hit_info_vec;
+  {
+    // Create new integer index storage
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> storage,
+        IntegerIndexStorage::Create(
+            filesystem_, working_path_,
+            Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get()));
+
+    // Insert some data.
+    ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, /*section_id=*/20,
+                                     /*new_keys=*/{0, 100, -100}));
+    ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, /*section_id=*/2,
+                                     /*new_keys=*/{3, -1000, 500}));
+    ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, /*section_id=*/15,
+                                     /*new_keys=*/{-6, 321, 98}));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        doc_hit_info_vec,
+        Query(storage.get(),
+              /*key_lower=*/std::numeric_limits<int64_t>::min(),
+              /*key_upper=*/std::numeric_limits<int64_t>::max()));
+  }
+
+  {
+    // The previous instance went out of scope and was destructed. Although we
+    // didn't call PersistToDisk explicitly, the destructor should invoke it and
+    // thus initializing another instance on the same files should succeed, and
+    // we should be able to get the same contents.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> storage,
+        IntegerIndexStorage::Create(
+            filesystem_, working_path_,
+            Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get()));
+    EXPECT_THAT(
+        Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+              /*key_upper=*/std::numeric_limits<int64_t>::max()),
+        IsOkAndHolds(ElementsAreArray(doc_hit_info_vec.begin(),
+                                      doc_hit_info_vec.end())));
+  }
+}
+
+TEST_P(IntegerIndexStorageTest,
+       InitializeExistingFilesWithWrongAllCrcShouldFail) {
+  {
+    // Create new integer index storage
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> storage,
+        IntegerIndexStorage::Create(
+            filesystem_, working_path_,
+            Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get()));
+    ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+                                     /*new_keys=*/{0, 100, -100}));
+
+    ICING_ASSERT_OK(storage->PersistToDisk());
+  }
+
+  const std::string metadata_file_path = absl_ports::StrCat(
+      working_path_, "/", IntegerIndexStorage::kFilePrefix, ".m");
+  ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+  ASSERT_TRUE(metadata_sfd.is_valid());
+
+  Crcs crcs;
+  ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+                                IntegerIndexStorage::kCrcsMetadataFileOffset));
+
+  // Manually corrupt all_crc
+  crcs.all_crc += kCorruptedValueOffset;
+  ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+                                 IntegerIndexStorage::kCrcsMetadataFileOffset,
+                                 &crcs, sizeof(Crcs)));
+  metadata_sfd.reset();
+
+  {
+    // Attempt to create the integer index storage with metadata containing
+    // corrupted all_crc. This should fail.
+    libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+        storage_or = IntegerIndexStorage::Create(
+            filesystem_, working_path_,
+            Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get());
+    EXPECT_THAT(storage_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(storage_or.status().error_message(),
+                HasSubstr("Invalid all crc"));
+  }
+}
+
+TEST_P(IntegerIndexStorageTest,
+       InitializeExistingFilesWithCorruptedInfoShouldFail) {
+  {
+    // Create new integer index storage
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> storage,
+        IntegerIndexStorage::Create(
+            filesystem_, working_path_,
+            Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get()));
+    ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+                                     /*new_keys=*/{0, 100, -100}));
+
+    ICING_ASSERT_OK(storage->PersistToDisk());
+  }
+
+  const std::string metadata_file_path = absl_ports::StrCat(
+      working_path_, "/", IntegerIndexStorage::kFilePrefix, ".m");
+  ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+  ASSERT_TRUE(metadata_sfd.is_valid());
+
+  Info info;
+  ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+                                IntegerIndexStorage::kInfoMetadataFileOffset));
+
+  // Modify info, but don't update the checksum. This would be similar to
+  // corruption of info.
+  info.num_data += kCorruptedValueOffset;
+  ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+                                 IntegerIndexStorage::kInfoMetadataFileOffset,
+                                 &info, sizeof(Info)));
+  metadata_sfd.reset();
+
+  {
+    // Attempt to create the integer index storage with info that doesn't match
+    // its checksum and confirm that it fails.
+    libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+        storage_or = IntegerIndexStorage::Create(
+            filesystem_, working_path_,
+            Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get());
+    EXPECT_THAT(storage_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(storage_or.status().error_message(),
+                HasSubstr("Invalid info crc"));
+  }
+}
+
+TEST_P(IntegerIndexStorageTest,
+       InitializeExistingFilesWithCorruptedSortedBucketsShouldFail) {
+  {
+    // Create new integer index storage
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> storage,
+        IntegerIndexStorage::Create(
+            filesystem_, working_path_,
+            Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get()));
+    ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+                                     /*new_keys=*/{0, 100, -100}));
+
+    ICING_ASSERT_OK(storage->PersistToDisk());
+  }
+
+  {
+    // Corrupt sorted buckets manually.
+    const std::string sorted_buckets_file_path = absl_ports::StrCat(
+        working_path_, "/", IntegerIndexStorage::kFilePrefix, ".s");
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+        FileBackedVector<Bucket>::Create(
+            filesystem_, sorted_buckets_file_path,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc,
+                               sorted_buckets->ComputeChecksum());
+    ICING_ASSERT_OK(sorted_buckets->Append(Bucket(
+        /*key_lower=*/0, /*key_upper=*/std::numeric_limits<int64_t>::max())));
+    ICING_ASSERT_OK(sorted_buckets->PersistToDisk());
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc,
+                               sorted_buckets->ComputeChecksum());
+    ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+  }
+
+  {
+    // Attempt to create the integer index storage with metadata containing
+    // corrupted sorted_buckets_crc. This should fail.
+    libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+        storage_or = IntegerIndexStorage::Create(
+            filesystem_, working_path_,
+            Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get());
+    EXPECT_THAT(storage_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(storage_or.status().error_message(),
+                HasSubstr("Invalid storages crc"));
+  }
+}
+
+TEST_P(IntegerIndexStorageTest,
+       InitializeExistingFilesWithCorruptedUnsortedBucketsShouldFail) {
+  {
+    // Create new integer index storage
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> storage,
+        IntegerIndexStorage::Create(
+            filesystem_, working_path_,
+            Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get()));
+    ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+                                     /*new_keys=*/{0, 100, -100}));
+
+    ICING_ASSERT_OK(storage->PersistToDisk());
+  }
+
+  {
+    // Corrupt unsorted buckets manually.
+    const std::string unsorted_buckets_file_path = absl_ports::StrCat(
+        working_path_, "/", IntegerIndexStorage::kFilePrefix, ".u");
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+        FileBackedVector<Bucket>::Create(
+            filesystem_, unsorted_buckets_file_path,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+            /*max_file_size=*/sizeof(Bucket) * 100 +
+                FileBackedVector<Bucket>::Header::kHeaderSize));
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc,
+                               unsorted_buckets->ComputeChecksum());
+    ICING_ASSERT_OK(unsorted_buckets->Append(Bucket(
+        /*key_lower=*/0, /*key_upper=*/std::numeric_limits<int64_t>::max())));
+    ICING_ASSERT_OK(unsorted_buckets->PersistToDisk());
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc,
+                               unsorted_buckets->ComputeChecksum());
+    ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+  }
+
+  {
+    // Attempt to create the integer index storage with metadata containing
+    // corrupted unsorted_buckets_crc. This should fail.
+    libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+        storage_or = IntegerIndexStorage::Create(
+            filesystem_, working_path_,
+            Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get());
+    EXPECT_THAT(storage_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(storage_or.status().error_message(),
+                HasSubstr("Invalid storages crc"));
+  }
+}
+
+// TODO(b/259744228): add test for corrupted flash_index_storage
+
+TEST_P(IntegerIndexStorageTest, InvalidQuery) {
+  // Create new integer index storage
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+  EXPECT_THAT(
+      storage->GetIterator(/*query_key_lower=*/0, /*query_key_upper=*/-1),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(IntegerIndexStorageTest, AddKeysShouldUpdateNumData) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> storage,
+        IntegerIndexStorage::Create(
+            filesystem_, working_path_,
+            Options(std::move(custom_init_sorted_buckets),
+                    std::move(custom_init_unsorted_buckets),
+                    IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get()));
+
+    // Add some keys into buckets [(-1000,-100), (200,300), (-99,-1)].
+    EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+                                 /*new_keys=*/{-51, -500}),
+                IsOk());
+    EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                                 /*new_keys=*/{201, 209, -149}),
+                IsOk());
+    EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+                                 /*new_keys=*/{208}),
+                IsOk());
+    EXPECT_THAT(storage->num_data(), Eq(6));
+
+    ICING_ASSERT_OK(storage->PersistToDisk());
+  }
+
+  // Check sorted_buckets manually.
+  const std::string sorted_buckets_file_path = absl_ports::StrCat(
+      working_path_, "/", IntegerIndexStorage::kFilePrefix, ".s");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+      FileBackedVector<Bucket>::Create(
+          filesystem_, sorted_buckets_file_path,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  EXPECT_THAT(sorted_buckets->num_elements(), Eq(5));
+
+  ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk1,
+                             sorted_buckets->Get(/*idx=*/0));
+  EXPECT_THAT(sbk1->key_lower(), Eq(-1000));
+  EXPECT_THAT(sbk1->key_upper(), Eq(-100));
+  EXPECT_THAT(sbk1->num_data(), Eq(2));
+  ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk2,
+                             sorted_buckets->Get(/*idx=*/1));
+  EXPECT_THAT(sbk2->key_lower(), Eq(0));
+  EXPECT_THAT(sbk2->key_upper(), Eq(100));
+  EXPECT_THAT(sbk2->num_data(), Eq(0));
+  ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk3,
+                             sorted_buckets->Get(/*idx=*/2));
+  EXPECT_THAT(sbk3->key_lower(), Eq(150));
+  EXPECT_THAT(sbk3->key_upper(), Eq(199));
+  EXPECT_THAT(sbk3->num_data(), Eq(0));
+  ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk4,
+                             sorted_buckets->Get(/*idx=*/3));
+  EXPECT_THAT(sbk4->key_lower(), Eq(200));
+  EXPECT_THAT(sbk4->key_upper(), Eq(300));
+  EXPECT_THAT(sbk4->num_data(), Eq(3));
+  ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk5,
+                             sorted_buckets->Get(/*idx=*/4));
+  EXPECT_THAT(sbk5->key_lower(), Eq(301));
+  EXPECT_THAT(sbk5->key_upper(), Eq(999));
+  EXPECT_THAT(sbk5->num_data(), Eq(0));
+
+  // Check unsorted_buckets and unsorted buckets manually.
+  const std::string unsorted_buckets_file_path = absl_ports::StrCat(
+      working_path_, "/", IntegerIndexStorage::kFilePrefix, ".u");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+      FileBackedVector<Bucket>::Create(
+          filesystem_, unsorted_buckets_file_path,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  EXPECT_THAT(unsorted_buckets->num_elements(), Eq(4));
+
+  ICING_ASSERT_OK_AND_ASSIGN(const Bucket* ubk1,
+                             unsorted_buckets->Get(/*idx=*/0));
+  EXPECT_THAT(ubk1->key_lower(), Eq(1000));
+  EXPECT_THAT(ubk1->key_upper(), Eq(std::numeric_limits<int64_t>::max()));
+  EXPECT_THAT(ubk1->num_data(), Eq(0));
+  ICING_ASSERT_OK_AND_ASSIGN(const Bucket* ubk2,
+                             unsorted_buckets->Get(/*idx=*/1));
+  EXPECT_THAT(ubk2->key_lower(), Eq(-99));
+  EXPECT_THAT(ubk2->key_upper(), Eq(-1));
+  EXPECT_THAT(ubk2->num_data(), Eq(1));
+  ICING_ASSERT_OK_AND_ASSIGN(const Bucket* ubk3,
+                             unsorted_buckets->Get(/*idx=*/2));
+  EXPECT_THAT(ubk3->key_lower(), Eq(101));
+  EXPECT_THAT(ubk3->key_upper(), Eq(149));
+  EXPECT_THAT(ubk3->num_data(), Eq(0));
+  ICING_ASSERT_OK_AND_ASSIGN(const Bucket* ubk4,
+                             unsorted_buckets->Get(/*idx=*/3));
+  EXPECT_THAT(ubk4->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
+  EXPECT_THAT(ubk4->key_upper(), Eq(-1001));
+  EXPECT_THAT(ubk4->num_data(), Eq(0));
+}
+
+TEST_P(IntegerIndexStorageTest, ExactQuerySortedBuckets) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Add some keys into sorted buckets [(-1000,-100), (200,300)].
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+                               /*new_keys=*/{-500}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                               /*new_keys=*/{208}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+                               /*new_keys=*/{-200}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+                               /*new_keys=*/{-1000}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+                               /*new_keys=*/{300}),
+              IsOk());
+  EXPECT_THAT(storage->num_data(), Eq(5));
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  // Exact query on key in each sorted bucket should get the correct result.
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-500, /*key_upper=*/-500),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/208, /*key_upper=*/208),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-200, /*key_upper=*/-200),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1000, /*key_upper=*/-1000),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/300, /*key_upper=*/300),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, ExactQueryUnsortedBuckets) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Add some keys into unsorted buckets [(1000,INT64_MAX), (INT64_MIN,-1001)].
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+                               /*new_keys=*/{1024}),
+              IsOk());
+  EXPECT_THAT(
+      storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                       /*new_keys=*/{std::numeric_limits<int64_t>::max()}),
+      IsOk());
+  EXPECT_THAT(
+      storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+                       /*new_keys=*/{std::numeric_limits<int64_t>::min()}),
+      IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+                               /*new_keys=*/{-1500}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+                               /*new_keys=*/{2000}),
+              IsOk());
+  EXPECT_THAT(storage->num_data(), Eq(5));
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  // Exact query on key in each unsorted bucket should get the correct result.
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/1024, /*key_upper=*/1024),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(
+      Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::max(),
+            /*key_upper=*/std::numeric_limits<int64_t>::max()),
+      IsOkAndHolds(
+          ElementsAre(EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+  EXPECT_THAT(
+      Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*key_upper=*/std::numeric_limits<int64_t>::min()),
+      IsOkAndHolds(
+          ElementsAre(EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1500, /*key_upper=*/-1500),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/2000, /*key_upper=*/2000),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, ExactQueryIdenticalKeys) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Add some keys into buckets [(0,100), (1000,INT64_MAX)].
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+                               /*new_keys=*/{1024}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                               /*new_keys=*/{1024}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+                               /*new_keys=*/{20}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+                               /*new_keys=*/{20}),
+              IsOk());
+  EXPECT_THAT(storage->num_data(), Eq(4));
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  // Exact query on key with multiple hits should get the correct result.
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/1024, /*key_upper=*/1024),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/20, /*key_upper=*/20),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, RangeQueryEmptyIntegerIndexStorage) {
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  EXPECT_THAT(
+      Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*key_upper=*/std::numeric_limits<int64_t>::max()),
+      IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, RangeQuerySingleEntireSortedBucket) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Add some keys into sorted buckets [(-1000,-100), (200,300)].
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+                               /*new_keys=*/{-500}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                               /*new_keys=*/{208}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+                               /*new_keys=*/{-200}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+                               /*new_keys=*/{-1000}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+                               /*new_keys=*/{300}),
+              IsOk());
+  EXPECT_THAT(storage->num_data(), Eq(5));
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  // Range query on each sorted bucket boundary should get the correct result.
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1000, /*key_upper=*/-100),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/0, /*key_upper=*/100),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/150, /*key_upper=*/199),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/200, /*key_upper=*/300),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/301, /*key_upper=*/999),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, RangeQuerySingleEntireUnsortedBucket) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Add some keys into unsorted buckets [(1000,INT64_MAX), (INT64_MIN,-1001)].
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+                               /*new_keys=*/{1024}),
+              IsOk());
+  EXPECT_THAT(
+      storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                       /*new_keys=*/{std::numeric_limits<int64_t>::max()}),
+      IsOk());
+  EXPECT_THAT(
+      storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+                       /*new_keys=*/{std::numeric_limits<int64_t>::min()}),
+      IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+                               /*new_keys=*/{-1500}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+                               /*new_keys=*/{2000}),
+              IsOk());
+  EXPECT_THAT(storage->num_data(), Eq(5));
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  // Range query on each unsorted bucket boundary should get the correct result.
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/1000,
+                    /*key_upper=*/std::numeric_limits<int64_t>::max()),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-99, /*key_upper=*/-1),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/101, /*key_upper=*/149),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(
+      Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*key_upper=*/-1001),
+      IsOkAndHolds(
+          ElementsAre(EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, RangeQuerySinglePartialSortedBucket) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Add some keys into sorted bucket (0,100).
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+                               /*new_keys=*/{43}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                               /*new_keys=*/{30}),
+              IsOk());
+  EXPECT_THAT(storage->num_data(), Eq(2));
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  // Range query on partial range of each sorted bucket should get the correct
+  // result.
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/25, /*key_upper=*/200),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1000, /*key_upper=*/49),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/25, /*key_upper=*/49),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/31, /*key_upper=*/49),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/25, /*key_upper=*/31),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/3, /*key_upper=*/5),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, RangeQuerySinglePartialUnsortedBucket) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Add some keys into unsorted buckets (-99,-1).
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+                               /*new_keys=*/{-19}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                               /*new_keys=*/{-72}),
+              IsOk());
+  EXPECT_THAT(storage->num_data(), Eq(2));
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  // Range query on partial range of each unsorted bucket should get the correct
+  // result.
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1000, /*key_upper=*/-15),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-80, /*key_upper=*/149),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-80, /*key_upper=*/-15),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-38, /*key_upper=*/-15),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-80, /*key_upper=*/-38),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-95, /*key_upper=*/-92),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, RangeQueryMultipleBuckets) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Add some keys into buckets [(-1000,-100), (200,300), (1000,INT64_MAX),
+  // (INT64_MIN,-1001)]
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+                               /*new_keys=*/{-500}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                               /*new_keys=*/{1024}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+                               /*new_keys=*/{-200}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+                               /*new_keys=*/{208}),
+              IsOk());
+  EXPECT_THAT(
+      storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+                       /*new_keys=*/{std::numeric_limits<int64_t>::max()}),
+      IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/5, kDefaultSectionId,
+                               /*new_keys=*/{-1000}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/6, kDefaultSectionId,
+                               /*new_keys=*/{300}),
+              IsOk());
+  EXPECT_THAT(
+      storage->AddKeys(/*document_id=*/7, kDefaultSectionId,
+                       /*new_keys=*/{std::numeric_limits<int64_t>::min()}),
+      IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/8, kDefaultSectionId,
+                               /*new_keys=*/{-1500}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(/*document_id=*/9, kDefaultSectionId,
+                               /*new_keys=*/{2000}),
+              IsOk());
+  EXPECT_THAT(storage->num_data(), Eq(10));
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  // Range query should get the correct result.
+  EXPECT_THAT(
+      Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*key_upper=*/std::numeric_limits<int64_t>::max()),
+      IsOkAndHolds(
+          ElementsAre(EqualsDocHitInfo(/*document_id=*/9, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/7, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/6, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(Query(storage.get(), /*key_lower=*/-199,
+                    /*key_upper=*/std::numeric_limits<int64_t>::max()),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/9, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/6, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+  EXPECT_THAT(
+      Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*key_upper=*/-200),
+      IsOkAndHolds(
+          ElementsAre(EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/7, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+                      EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, BatchAdd) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Batch add the following keys (including some edge cases) to test the
+  // correctness of the sort and binary search logic in AddKeys().
+  // clang-format off
+  std::vector<int64_t> keys = {4000, 3000, 2000,  300,   201,   200,  106, 104,
+                               100,  3,    2,     1,     0,     -97,  -98, -99,
+                               -100, -200, -1000, -1001, -1500, -2000,
+                               std::numeric_limits<int64_t>::max(),
+                               std::numeric_limits<int64_t>::min()};
+  // clang-format on
+  EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+                               std::vector<int64_t>(keys)),
+              IsOk());
+  EXPECT_THAT(storage->num_data(), Eq(keys.size()));
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  for (int64_t key : keys) {
+    EXPECT_THAT(Query(storage.get(), /*key_lower=*/key, /*key_upper=*/key),
+                IsOkAndHolds(ElementsAre(
+                    EqualsDocHitInfo(kDefaultDocumentId, expected_sections))));
+  }
+}
+
+TEST_P(IntegerIndexStorageTest, BatchAddShouldDedupeKeys) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  std::vector<int64_t> keys = {2, 3, 1, 2, 4, -1, -1, 100, 3};
+  EXPECT_THAT(
+      storage->AddKeys(kDefaultDocumentId, kDefaultSectionId, std::move(keys)),
+      IsOk());
+  EXPECT_THAT(storage->num_data(), Eq(6));
+}
+
+TEST_P(IntegerIndexStorageTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Add some keys with same document id and section id.
+  EXPECT_THAT(
+      storage->AddKeys(
+          /*document_id=*/0, kDefaultSectionId, /*new_keys=*/
+          {-500, 1024, -200, 208, std::numeric_limits<int64_t>::max(), -1000,
+           300, std::numeric_limits<int64_t>::min(), -1500, 2000}),
+      IsOk());
+  EXPECT_THAT(storage->num_data(), Eq(10));
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  EXPECT_THAT(
+      Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*key_upper=*/std::numeric_limits<int64_t>::max()),
+      IsOkAndHolds(
+          ElementsAre(EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest,
+       MultipleSectionsShouldMergeSectionsAndDedupeDocHitInfo) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Add some keys with same document id but different section ids.
+  EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/63,
+                               /*new_keys=*/{-500}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/62,
+                               /*new_keys=*/{1024}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/61,
+                               /*new_keys=*/{-200}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/60,
+                               /*new_keys=*/{208}),
+              IsOk());
+  EXPECT_THAT(
+      storage->AddKeys(kDefaultDocumentId, /*section_id=*/59,
+                       /*new_keys=*/{std::numeric_limits<int64_t>::max()}),
+      IsOk());
+  EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/58,
+                               /*new_keys=*/{-1000}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/57,
+                               /*new_keys=*/{300}),
+              IsOk());
+  EXPECT_THAT(
+      storage->AddKeys(kDefaultDocumentId, /*section_id=*/56,
+                       /*new_keys=*/{std::numeric_limits<int64_t>::min()}),
+      IsOk());
+  EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/55,
+                               /*new_keys=*/{-1500}),
+              IsOk());
+  EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/54,
+                               /*new_keys=*/{2000}),
+              IsOk());
+  EXPECT_THAT(storage->num_data(), Eq(10));
+
+  std::vector<SectionId> expected_sections = {63, 62, 61, 60, 59,
+                                              58, 57, 56, 55, 54};
+  EXPECT_THAT(
+      Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*key_upper=*/std::numeric_limits<int64_t>::max()),
+      IsOkAndHolds(ElementsAre(
+          EqualsDocHitInfo(kDefaultDocumentId, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, IteratorCallStatsMultipleBuckets) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Add some keys into sorted buckets [(-1000,-100), (200,300)].
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+                                   /*new_keys=*/{-500}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                                   /*new_keys=*/{208}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+                                   /*new_keys=*/{-200}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+                                   /*new_keys=*/{-1000}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+                                   /*new_keys=*/{300}));
+  ASSERT_THAT(storage->num_data(), Eq(5));
+
+  // GetIterator for range [INT_MIN, INT_MAX] and Advance all. Those 5 keys are
+  // in 2 buckets, so we will be inspecting 2 posting lists in 2 blocks.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> iter1,
+      storage->GetIterator(/*key_lower=*/std::numeric_limits<int64_t>::min(),
+                           /*key_upper=*/std::numeric_limits<int64_t>::max()));
+  while (iter1->Advance().ok()) {
+    // Advance all hits.
+  }
+  EXPECT_THAT(
+      iter1->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/0,
+          /*num_leaf_advance_calls_main_index=*/0,
+          /*num_leaf_advance_calls_integer_index=*/5,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/2));
+
+  // GetIterator for range [-1000, -100] and Advance all. Since we only have to
+  // read bucket (-1000,-100), there will be 3 advance calls and 1 block
+  // inspected.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> iter2,
+      storage->GetIterator(/*key_lower=*/-1000, /*key_upper=*/-100));
+  while (iter2->Advance().ok()) {
+    // Advance all hits.
+  }
+  EXPECT_THAT(
+      iter2->GetCallStats(),
+      EqualsDocHitInfoIteratorCallStats(
+          /*num_leaf_advance_calls_lite_index=*/0,
+          /*num_leaf_advance_calls_main_index=*/0,
+          /*num_leaf_advance_calls_integer_index=*/3,
+          /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
+}
+
+TEST_P(IntegerIndexStorageTest, IteratorCallStatsSingleBucketChainedBlocks) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  int32_t num_keys_to_add = 800;
+  ASSERT_THAT(num_keys_to_add,
+              Lt(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit));
+  for (int i = 0; i < num_keys_to_add; ++i) {
+    ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/i, kDefaultSectionId,
+                                     /*new_keys=*/{i}));
+  }
+
+  // Those 800 keys are in 1 single bucket with 3 chained posting lists, so we
+  // will be inspecting 3 blocks.
+  int32_t expected_num_blocks_inspected = 3;
+
+  // GetIterator for range [INT_MIN, INT_MAX] and Advance all.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> iter1,
+      storage->GetIterator(/*key_lower=*/std::numeric_limits<int64_t>::min(),
+                           /*key_upper=*/std::numeric_limits<int64_t>::max()));
+  while (iter1->Advance().ok()) {
+    // Advance all hits.
+  }
+  EXPECT_THAT(iter1->GetCallStats(),
+              EqualsDocHitInfoIteratorCallStats(
+                  /*num_leaf_advance_calls_lite_index=*/0,
+                  /*num_leaf_advance_calls_main_index=*/0,
+                  /*num_leaf_advance_calls_integer_index=*/num_keys_to_add,
+                  /*num_leaf_advance_calls_no_index=*/0,
+                  expected_num_blocks_inspected));
+
+  // GetIterator for range [1, 1] and Advance all. Although there is only 1
+  // relevant data, we still have to inspect the entire bucket and its posting
+  // lists chain (which contain 3 blocks and 800 data).
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> iter2,
+      storage->GetIterator(/*key_lower=*/1, /*key_upper=*/1));
+  while (iter2->Advance().ok()) {
+    // Advance all hits.
+  }
+  EXPECT_THAT(iter2->GetCallStats(),
+              EqualsDocHitInfoIteratorCallStats(
+                  /*num_leaf_advance_calls_lite_index=*/0,
+                  /*num_leaf_advance_calls_main_index=*/0,
+                  /*num_leaf_advance_calls_integer_index=*/num_keys_to_add,
+                  /*num_leaf_advance_calls_no_index=*/0,
+                  expected_num_blocks_inspected));
+}
+
+TEST_P(IntegerIndexStorageTest, SplitBuckets) {
+  int32_t custom_num_data_threshold_for_bucket_split = 300;
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(/*custom_init_sorted_buckets_in=*/{},
+                  /*custom_init_unsorted_buckets_in=*/{},
+                  custom_num_data_threshold_for_bucket_split,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Add custom_num_data_threshold_for_bucket_split + 1 keys to invoke bucket
+  // splitting.
+  // - Keys: custom_num_data_threshold_for_bucket_split to 0 Document
+  // - ids: 0 to custom_num_data_threshold_for_bucket_split
+  std::unordered_map<int64_t, DocumentId> data;
+  int64_t key = custom_num_data_threshold_for_bucket_split;
+  DocumentId document_id = 0;
+  for (int i = 0; i < custom_num_data_threshold_for_bucket_split + 1; ++i) {
+    data[key] = document_id;
+    ICING_ASSERT_OK(
+        storage->AddKeys(document_id, kDefaultSectionId, /*new_keys=*/{key}));
+    ++document_id;
+    --key;
+  }
+  ICING_ASSERT_OK(storage->PersistToDisk());
+
+  // Manually check sorted and unsorted buckets.
+  {
+    // Check sorted buckets.
+    const std::string sorted_buckets_file_path = absl_ports::StrCat(
+        working_path_, "/", IntegerIndexStorage::kFilePrefix, ".s");
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+        FileBackedVector<Bucket>::Create(
+            filesystem_, sorted_buckets_file_path,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+    EXPECT_THAT(sorted_buckets->num_elements(), Eq(1));
+    ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bucket1,
+                               sorted_buckets->Get(/*idx=*/0));
+    EXPECT_THAT(bucket1->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
+    EXPECT_THAT(bucket1->key_upper(), Ne(std::numeric_limits<int64_t>::max()));
+
+    int64_t sorted_bucket_key_upper = bucket1->key_upper();
+
+    // Check unsorted buckets.
+    const std::string unsorted_buckets_file_path = absl_ports::StrCat(
+        working_path_, "/", IntegerIndexStorage::kFilePrefix, ".u");
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+        FileBackedVector<Bucket>::Create(
+            filesystem_, unsorted_buckets_file_path,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+    EXPECT_THAT(unsorted_buckets->num_elements(), Ge(1));
+    ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bucket2,
+                               unsorted_buckets->Get(/*idx=*/0));
+    EXPECT_THAT(bucket2->key_lower(), Eq(sorted_bucket_key_upper + 1));
+  }
+
+  // Ensure that search works normally.
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  for (int64_t key = custom_num_data_threshold_for_bucket_split; key >= 0;
+       key--) {
+    ASSERT_THAT(data, Contains(Key(key)));
+    DocumentId expected_document_id = data[key];
+    EXPECT_THAT(Query(storage.get(), /*key_lower=*/key, /*key_upper=*/key),
+                IsOkAndHolds(ElementsAre(EqualsDocHitInfo(expected_document_id,
+                                                          expected_sections))));
+  }
+}
+
+TEST_P(IntegerIndexStorageTest, SplitBucketsTriggerSortBuckets) {
+  int32_t custom_num_data_threshold_for_bucket_split = 300;
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(/*custom_init_sorted_buckets_in=*/{},
+                  /*custom_init_unsorted_buckets_in=*/{},
+                  custom_num_data_threshold_for_bucket_split,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Add IntegerIndexStorage::kUnsortedBucketsLengthThreshold keys. For each
+  // key, add custom_num_data_threshold_for_bucket_split + 1 data. Then we will
+  // get:
+  // - Bucket splitting will create kUnsortedBucketsLengthThreshold + 1 unsorted
+  //   buckets [[50, 50], [49, 49], ..., [1, 1], [51, INT64_MAX]].
+  // - Since there are kUnsortedBucketsLengthThreshold + 1 unsorted buckets, we
+  //   should sort and merge buckets.
+  std::unordered_map<int64_t, std::vector<DocumentId>> data;
+  int64_t key = IntegerIndexStorage::kUnsortedBucketsLengthThreshold;
+  DocumentId document_id = 0;
+  for (int i = 0; i < IntegerIndexStorage::kUnsortedBucketsLengthThreshold;
+       ++i) {
+    for (int j = 0; j < custom_num_data_threshold_for_bucket_split + 1; ++j) {
+      data[key].push_back(document_id);
+      ICING_ASSERT_OK(
+          storage->AddKeys(document_id, kDefaultSectionId, /*new_keys=*/{key}));
+      ++document_id;
+    }
+    --key;
+  }
+  ICING_ASSERT_OK(storage->PersistToDisk());
+
+  // Manually check sorted and unsorted buckets.
+  {
+    // Check unsorted buckets.
+    const std::string unsorted_buckets_file_path = absl_ports::StrCat(
+        working_path_, "/", IntegerIndexStorage::kFilePrefix, ".u");
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+        FileBackedVector<Bucket>::Create(
+            filesystem_, unsorted_buckets_file_path,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    EXPECT_THAT(unsorted_buckets->num_elements(), Eq(0));
+
+    // Check sorted buckets.
+    const std::string sorted_buckets_file_path = absl_ports::StrCat(
+        working_path_, "/", IntegerIndexStorage::kFilePrefix, ".s");
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+        FileBackedVector<Bucket>::Create(
+            filesystem_, sorted_buckets_file_path,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    EXPECT_THAT(sorted_buckets->num_elements(), Gt(1));
+  }
+
+  // Ensure that search works normally.
+  for (key = 1; key <= IntegerIndexStorage::kUnsortedBucketsLengthThreshold;
+       ++key) {
+    ASSERT_THAT(data, Contains(Key(key)));
+
+    std::vector<DocHitInfo> expected_doc_hit_infos;
+    for (DocumentId doc_id : data[key]) {
+      expected_doc_hit_infos.push_back(DocHitInfo(
+          doc_id, /*hit_section_ids_mask=*/UINT64_C(1) << kDefaultSectionId));
+    }
+    EXPECT_THAT(Query(storage.get(), /*key_lower=*/key, /*key_upper=*/key),
+                IsOkAndHolds(ElementsAreArray(expected_doc_hit_infos.rbegin(),
+                                              expected_doc_hit_infos.rend())));
+  }
+}
+
+TEST_P(IntegerIndexStorageTest, TransferIndex) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                                   /*new_keys=*/{-500}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+                                   /*new_keys=*/{1024}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+                                   /*new_keys=*/{-200}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/5, kDefaultSectionId,
+                                   /*new_keys=*/{-60}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/8, kDefaultSectionId,
+                                   /*new_keys=*/{-60}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/13, kDefaultSectionId,
+                                   /*new_keys=*/{-500}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/21, kDefaultSectionId,
+                                   /*new_keys=*/{2048}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/34, kDefaultSectionId,
+                                   /*new_keys=*/{156}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/55, kDefaultSectionId,
+                                   /*new_keys=*/{20}));
+  ASSERT_THAT(storage->num_data(), Eq(9));
+
+  // Delete doc id = 5, 34, compress and keep the rest.
+  std::vector<DocumentId> document_id_old_to_new(56, kInvalidDocumentId);
+  document_id_old_to_new[1] = 8;
+  document_id_old_to_new[2] = 3;
+  document_id_old_to_new[3] = 0;
+  document_id_old_to_new[8] = 2;
+  document_id_old_to_new[13] = 6;
+  document_id_old_to_new[21] = 1;
+  document_id_old_to_new[55] = 4;
+
+  // Transfer to new storage.
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> new_storage,
+        IntegerIndexStorage::Create(
+            filesystem_, working_path_ + "_temp",
+            Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get()));
+    EXPECT_THAT(
+        storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+        IsOk());
+    ICING_ASSERT_OK(new_storage->PersistToDisk());
+  }
+
+  // Verify after transferring and reinitializing the instance.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> new_storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_ + "_temp",
+          Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  EXPECT_THAT(new_storage->num_data(), Eq(7));
+
+  // -500 had hits for old_docids 1 and 13, which are now 6 and 8.
+  EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/-500, /*key_upper=*/-500),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/6, expected_sections))));
+
+  // 1024 had a hit for old_docid 2, which is now 3.
+  EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/1024, /*key_upper=*/1024),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+
+  // -200 had a hit for old_docid 3, which is now 0.
+  EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/-200, /*key_upper=*/-200),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+
+  // -60 had hits for old_docids 5 and 8, which is now only 2 (because doc 5 has
+  // been deleted).
+  EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/-60, /*key_upper=*/-60),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+
+  // 2048 had a hit for old_docid 21, which is now 1.
+  EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/2048, /*key_upper=*/2048),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+
+  // 156 had a hit for old_docid 34, which is not found now (because doc 34 has
+  // been deleted).
+  EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/156, /*key_upper=*/156),
+              IsOkAndHolds(IsEmpty()));
+
+  // 20 had a hit for old_docid 55, which is now 4.
+  EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/20, /*key_upper=*/20),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
+}
+
+TEST_P(IntegerIndexStorageTest, TransferIndexOutOfRangeDocumentId) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                                   /*new_keys=*/{120}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+                                   /*new_keys=*/{-2000}));
+  ASSERT_THAT(storage->num_data(), Eq(2));
+
+  // Create document_id_old_to_new with size = 2. TransferIndex should handle
+  // out of range DocumentId properly.
+  std::vector<DocumentId> document_id_old_to_new = {kInvalidDocumentId, 0};
+
+  // Transfer to new storage.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> new_storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_ + "_temp",
+          Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+  EXPECT_THAT(storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+              IsOk());
+
+  // Verify after transferring.
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  EXPECT_THAT(new_storage->num_data(), Eq(1));
+  EXPECT_THAT(Query(new_storage.get(), /*key_lower=*/120, /*key_upper=*/120),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(
+      Query(new_storage.get(), /*key_lower=*/-2000, /*key_upper=*/-2000),
+      IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, TransferEmptyIndex) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+  ASSERT_THAT(storage->num_data(), Eq(0));
+
+  std::vector<DocumentId> document_id_old_to_new = {kInvalidDocumentId, 0, 1,
+                                                    kInvalidDocumentId, 2};
+
+  // Transfer to new storage.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> new_storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_ + "_temp",
+          Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+  EXPECT_THAT(storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+              IsOk());
+
+  // Verify after transferring.
+  EXPECT_THAT(new_storage->num_data(), Eq(0));
+  EXPECT_THAT(Query(new_storage.get(),
+                    /*key_lower=*/std::numeric_limits<int64_t>::min(),
+                    /*key_upper=*/std::numeric_limits<int64_t>::max()),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, TransferIndexDeleteAll) {
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                                   /*new_keys=*/{-500}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+                                   /*new_keys=*/{1024}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+                                   /*new_keys=*/{-200}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/5, kDefaultSectionId,
+                                   /*new_keys=*/{-60}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/8, kDefaultSectionId,
+                                   /*new_keys=*/{-60}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/13, kDefaultSectionId,
+                                   /*new_keys=*/{-500}));
+  ASSERT_THAT(storage->num_data(), Eq(6));
+
+  // Delete all documents.
+  std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+
+  // Transfer to new storage.
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> new_storage,
+        IntegerIndexStorage::Create(
+            filesystem_, working_path_ + "_temp",
+            Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get()));
+    EXPECT_THAT(
+        storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+        IsOk());
+    ICING_ASSERT_OK(new_storage->PersistToDisk());
+  }
+
+  // Verify after transferring and reinitializing the instance.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> new_storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_ + "_temp",
+          Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  EXPECT_THAT(new_storage->num_data(), Eq(0));
+  EXPECT_THAT(Query(new_storage.get(),
+                    /*key_lower=*/std::numeric_limits<int64_t>::min(),
+                    /*key_upper=*/std::numeric_limits<int64_t>::max()),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_P(IntegerIndexStorageTest, TransferIndexShouldInvokeMergeBuckets) {
+  int32_t custom_num_data_threshold_for_bucket_split = 300;
+  int32_t custom_num_data_threshold_for_bucket_merge =
+      IntegerIndexStorage::kNumDataThresholdRatioForBucketMerge *
+      custom_num_data_threshold_for_bucket_split;
+
+  // This test verifies that if TransferIndex invokes bucket merging logic to
+  // ensure sure we're able to avoid having mostly empty buckets after inserting
+  // and deleting data for many rounds.
+
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  custom_num_data_threshold_for_bucket_split,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+                                   /*new_keys=*/{-500}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+                                   /*new_keys=*/{1024}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+                                   /*new_keys=*/{-200}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+                                   /*new_keys=*/{-60}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+                                   /*new_keys=*/{-60}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/5, kDefaultSectionId,
+                                   /*new_keys=*/{-500}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/6, kDefaultSectionId,
+                                   /*new_keys=*/{2048}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/7, kDefaultSectionId,
+                                   /*new_keys=*/{156}));
+  ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/8, kDefaultSectionId,
+                                   /*new_keys=*/{20}));
+  ASSERT_THAT(storage->num_data(), Eq(9));
+  ASSERT_THAT(storage->num_data(),
+              Le(custom_num_data_threshold_for_bucket_merge));
+
+  // Create document_id_old_to_new that keeps all existing documents.
+  std::vector<DocumentId> document_id_old_to_new(9);
+  std::iota(document_id_old_to_new.begin(), document_id_old_to_new.end(), 0);
+
+  // Transfer to new storage. It should result in 1 bucket: [INT64_MIN,
+  // INT64_MAX] after transferring.
+  const std::string new_storage_working_path = working_path_ + "_temp";
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> new_storage,
+        IntegerIndexStorage::Create(
+            filesystem_, new_storage_working_path,
+            Options(/*custom_init_sorted_buckets_in=*/{},
+                    /*custom_init_unsorted_buckets_in=*/{},
+                    custom_num_data_threshold_for_bucket_split,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get()));
+    EXPECT_THAT(
+        storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+        IsOk());
+    EXPECT_THAT(new_storage->num_data(), Eq(9));
+  }
+
+  // Check new_storage->sorted_bucket_ manually.
+  const std::string sorted_buckets_file_path = absl_ports::StrCat(
+      new_storage_working_path, "/", IntegerIndexStorage::kFilePrefix, ".s");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+      FileBackedVector<Bucket>::Create(
+          filesystem_, sorted_buckets_file_path,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  EXPECT_THAT(sorted_buckets->num_elements(), Eq(1));
+
+  ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bk1, sorted_buckets->Get(/*idx=*/0));
+  EXPECT_THAT(bk1->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
+  EXPECT_THAT(bk1->key_upper(), Eq(std::numeric_limits<int64_t>::max()));
+  EXPECT_THAT(bk1->num_data(), Eq(9));
+}
+
+TEST_P(IntegerIndexStorageTest, TransferIndexExceedsMergeThreshold) {
+  int32_t custom_num_data_threshold_for_bucket_split = 300;
+  int32_t custom_num_data_threshold_for_bucket_merge =
+      IntegerIndexStorage::kNumDataThresholdRatioForBucketMerge *
+      custom_num_data_threshold_for_bucket_split;
+
+  // This test verifies that if TransferIndex invokes bucket merging logic and
+  // doesn't merge buckets too aggressively to ensure we won't get a bucket with
+  // too many data.
+
+  // We use predefined custom buckets to initialize new integer index storage
+  // and create some test keys accordingly.
+  std::vector<Bucket> custom_init_sorted_buckets = {
+      Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+      Bucket(301, 999)};
+  std::vector<Bucket> custom_init_unsorted_buckets = {
+      Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+      Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndexStorage> storage,
+      IntegerIndexStorage::Create(
+          filesystem_, working_path_,
+          Options(std::move(custom_init_sorted_buckets),
+                  std::move(custom_init_unsorted_buckets),
+                  custom_num_data_threshold_for_bucket_split,
+                  /*pre_mapping_fbv_in=*/GetParam()),
+          serializer_.get()));
+
+  // Insert data into 2 buckets so that total # of these 2 buckets exceed
+  // custom_num_data_threshold_for_bucket_merge.
+  // - Bucket 1: [-1000, -100]
+  // - Bucket 2: [101, 149]
+  DocumentId document_id = 0;
+  int num_data_for_bucket1 = custom_num_data_threshold_for_bucket_merge - 50;
+  for (int i = 0; i < num_data_for_bucket1; ++i) {
+    ICING_ASSERT_OK(storage->AddKeys(document_id, kDefaultSectionId,
+                                     /*new_keys=*/{-200}));
+    ++document_id;
+  }
+
+  int num_data_for_bucket2 = 150;
+  for (int i = 0; i < num_data_for_bucket2; ++i) {
+    ICING_ASSERT_OK(storage->AddKeys(document_id, kDefaultSectionId,
+                                     /*new_keys=*/{120}));
+    ++document_id;
+  }
+
+  ASSERT_THAT(storage->num_data(),
+              Eq(num_data_for_bucket1 + num_data_for_bucket2));
+  ASSERT_THAT(num_data_for_bucket1 + num_data_for_bucket2,
+              Gt(custom_num_data_threshold_for_bucket_merge));
+
+  // Create document_id_old_to_new that keeps all existing documents.
+  std::vector<DocumentId> document_id_old_to_new(document_id);
+  std::iota(document_id_old_to_new.begin(), document_id_old_to_new.end(), 0);
+
+  // Transfer to new storage. This should result in 2 buckets: [INT64_MIN, 100]
+  // and [101, INT64_MAX]
+  const std::string new_storage_working_path = working_path_ + "_temp";
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> new_storage,
+        IntegerIndexStorage::Create(
+            filesystem_, new_storage_working_path,
+            Options(/*custom_init_sorted_buckets_in=*/{},
+                    /*custom_init_unsorted_buckets_in=*/{},
+                    custom_num_data_threshold_for_bucket_split,
+                    /*pre_mapping_fbv_in=*/GetParam()),
+            serializer_.get()));
+    EXPECT_THAT(
+        storage->TransferIndex(document_id_old_to_new, new_storage.get()),
+        IsOk());
+    EXPECT_THAT(new_storage->num_data(),
+                Eq(num_data_for_bucket1 + num_data_for_bucket2));
+  }
+
+  // Check new_storage->sorted_bucket_ manually.
+  const std::string sorted_buckets_file_path = absl_ports::StrCat(
+      new_storage_working_path, "/", IntegerIndexStorage::kFilePrefix, ".s");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+      FileBackedVector<Bucket>::Create(
+          filesystem_, sorted_buckets_file_path,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  EXPECT_THAT(sorted_buckets->num_elements(), Eq(2));
+
+  ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bk1, sorted_buckets->Get(/*idx=*/0));
+  EXPECT_THAT(bk1->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
+  EXPECT_THAT(bk1->key_upper(), Eq(100));
+  EXPECT_THAT(bk1->num_data(), Eq(num_data_for_bucket1));
+  ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bk2, sorted_buckets->Get(/*idx=*/1));
+  EXPECT_THAT(bk2->key_lower(), Eq(101));
+  EXPECT_THAT(bk2->key_upper(), Eq(std::numeric_limits<int64_t>::max()));
+  EXPECT_THAT(bk2->num_data(), Eq(num_data_for_bucket2));
+}
+
+INSTANTIATE_TEST_SUITE_P(IntegerIndexStorageTest, IntegerIndexStorageTest,
+                         testing::Values(true, false));
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/numeric/integer-index.cc b/icing/index/numeric/integer-index.cc
new file mode 100644
index 0000000..8c80698
--- /dev/null
+++ b/icing/index/numeric/integer-index.cc
@@ -0,0 +1,651 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
+#include "icing/index/numeric/doc-hit-info-iterator-numeric.h"
+#include "icing/index/numeric/integer-index-storage.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Helper function to get the file name of metadata.
+std::string GetMetadataFileName() {
+  return absl_ports::StrCat(IntegerIndex::kFilePrefix, ".m");
+}
+
+// Helper function to get the file path of metadata according to the given
+// working directory.
+std::string GetMetadataFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/", GetMetadataFileName());
+}
+
+constexpr std::string_view kWildcardPropertyIndexFileName =
+    "wildcard_property_index";
+
+constexpr std::string_view kWildcardPropertyStorageFileName =
+    "wildcard_property_storage";
+
+std::string GetWildcardPropertyStorageFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/",
+                            kWildcardPropertyStorageFileName);
+}
+
+// Helper function to get the sub working (directory) path of
+// IntegerIndexStorage according to the given working directory and property
+// path.
+std::string GetPropertyIndexStoragePath(std::string_view working_path,
+                                        std::string_view property_path) {
+  return absl_ports::StrCat(working_path, "/", property_path);
+}
+
+// Helper function to get all existing property paths by listing all
+// directories.
+libtextclassifier3::StatusOr<std::vector<std::string>>
+GetAllExistingPropertyPaths(const Filesystem& filesystem,
+                            const std::string& working_path) {
+  std::vector<std::string> property_paths;
+  std::unordered_set<std::string> excludes = {
+      GetMetadataFileName(), std::string(kWildcardPropertyStorageFileName)};
+  if (!filesystem.ListDirectory(working_path.c_str(), excludes,
+                                /*recursive=*/false, &property_paths)) {
+    return absl_ports::InternalError("Failed to list directory");
+  }
+  return property_paths;
+}
+
+libtextclassifier3::StatusOr<IntegerIndex::PropertyToStorageMapType>
+GetPropertyIntegerIndexStorageMap(
+    const Filesystem& filesystem, const std::string& working_path,
+    PostingListIntegerIndexSerializer* posting_list_serializer,
+    int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
+  ICING_ASSIGN_OR_RETURN(std::vector<std::string> property_paths,
+                         GetAllExistingPropertyPaths(filesystem, working_path));
+
+  IntegerIndex::PropertyToStorageMapType property_to_storage_map;
+  for (const std::string& property_path : property_paths) {
+    if (property_path == kWildcardPropertyIndexFileName) {
+      continue;
+    }
+    std::string storage_working_path =
+        GetPropertyIndexStoragePath(working_path, property_path);
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<IntegerIndexStorage> storage,
+        IntegerIndexStorage::Create(
+            filesystem, storage_working_path,
+            IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
+                                         pre_mapping_fbv),
+            posting_list_serializer));
+    property_to_storage_map.insert(
+        std::make_pair(property_path, std::move(storage)));
+  }
+
+  return property_to_storage_map;
+}
+
+// RETURNS:
+//   - On success, an unordered_set representing the list of property paths
+//     stored in the WildcardPropertyStorage managed by property_storage
+//   - INTERNAL_ERROR on any failure to successfully read the underlying proto.
+libtextclassifier3::StatusOr<std::unordered_set<std::string>> CreatePropertySet(
+    const FileBackedProto<WildcardPropertyStorage>& property_storage) {
+  std::unordered_set<std::string> wildcard_properties_set;
+  auto wildcard_properties_or = property_storage.Read();
+  if (!wildcard_properties_or.ok()) {
+    if (absl_ports::IsNotFound(wildcard_properties_or.status())) {
+      return wildcard_properties_set;
+    }
+    return wildcard_properties_or.status();
+  }
+
+  const WildcardPropertyStorage* wildcard_properties =
+      wildcard_properties_or.ValueOrDie();
+  wildcard_properties_set.reserve(wildcard_properties->property_entries_size());
+  for (const std::string& property : wildcard_properties->property_entries()) {
+    wildcard_properties_set.insert(property);
+  }
+  return wildcard_properties_set;
+}
+
+}  // namespace
+
+libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && {
+  integer_index_.SetDirty();
+
+  auto iter = integer_index_.property_to_storage_map_.find(property_path_);
+  IntegerIndexStorage* target_storage = nullptr;
+  // 1. Check if this property already has its own individual index.
+  if (iter != integer_index_.property_to_storage_map_.end()) {
+    target_storage = iter->second.get();
+    // 2. Check if this property was added to wildcard storage.
+  } else if (integer_index_.wildcard_properties_set_.find(property_path_) !=
+             integer_index_.wildcard_properties_set_.end()) {
+    target_storage = integer_index_.wildcard_index_storage_.get();
+    // 3. Check if we've reach the limit of individual property storages.
+  } else if (integer_index_.property_to_storage_map_.size() >=
+             kMaxPropertyStorages) {
+    // 3a. Create the wildcard storage if it doesn't exist.
+    if (integer_index_.wildcard_index_storage_ == nullptr) {
+      ICING_ASSIGN_OR_RETURN(
+          integer_index_.wildcard_index_storage_,
+          IntegerIndexStorage::Create(
+              integer_index_.filesystem_,
+              GetPropertyIndexStoragePath(integer_index_.working_path_,
+                                          kWildcardPropertyIndexFileName),
+              IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
+                                           pre_mapping_fbv_),
+              integer_index_.posting_list_serializer_.get()));
+    }
+    ICING_RETURN_IF_ERROR(
+        integer_index_.AddPropertyToWildcardStorage(property_path_));
+    target_storage = integer_index_.wildcard_index_storage_.get();
+    // 4. Create a new individual storage for this new property.
+  } else {
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<IntegerIndexStorage> new_storage,
+        IntegerIndexStorage::Create(
+            integer_index_.filesystem_,
+            GetPropertyIndexStoragePath(integer_index_.working_path_,
+                                        property_path_),
+            IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
+                                         pre_mapping_fbv_),
+            integer_index_.posting_list_serializer_.get()));
+    target_storage = new_storage.get();
+    integer_index_.property_to_storage_map_.insert(
+        std::make_pair(property_path_, std::move(new_storage)));
+  }
+
+  return target_storage->AddKeys(document_id_, section_id_,
+                                 std::move(seen_keys_));
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+IntegerIndex::Create(const Filesystem& filesystem, std::string working_path,
+                     int32_t num_data_threshold_for_bucket_split,
+                     bool pre_mapping_fbv) {
+  if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str())) {
+    // Discard working_path if metadata file is missing, and reinitialize.
+    if (filesystem.DirectoryExists(working_path.c_str())) {
+      ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+    }
+    return InitializeNewFiles(filesystem, std::move(working_path),
+                              num_data_threshold_for_bucket_split,
+                              pre_mapping_fbv);
+  }
+  return InitializeExistingFiles(filesystem, std::move(working_path),
+                                 num_data_threshold_for_bucket_split,
+                                 pre_mapping_fbv);
+}
+
+IntegerIndex::~IntegerIndex() {
+  if (!PersistToDisk().ok()) {
+    ICING_LOG(WARNING)
+        << "Failed to persist integer index to disk while destructing "
+        << working_path_;
+  }
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+IntegerIndex::GetIterator(std::string_view property_path, int64_t key_lower,
+                          int64_t key_upper,
+                          const DocumentStore& document_store,
+                          const SchemaStore& schema_store,
+                          int64_t current_time_ms) const {
+  std::string property_path_str(property_path);
+  auto iter = property_to_storage_map_.find(property_path_str);
+  if (iter != property_to_storage_map_.end()) {
+    return iter->second->GetIterator(key_lower, key_upper);
+  }
+
+  if (wildcard_properties_set_.find(property_path_str) !=
+      wildcard_properties_set_.end()) {
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<DocHitInfoIterator> delegate,
+        wildcard_index_storage_->GetIterator(key_lower, key_upper));
+    std::set<std::string> property_paths = {std::move(property_path_str)};
+    return DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+        std::move(delegate), &document_store, &schema_store,
+        std::move(property_paths), current_time_ms);
+  }
+
+  // Return an empty iterator.
+  return std::make_unique<DocHitInfoIteratorNumeric<int64_t>>(
+      /*numeric_index_iter=*/nullptr);
+}
+
+libtextclassifier3::Status IntegerIndex::AddPropertyToWildcardStorage(
+    const std::string& property_path) {
+  SetDirty();
+
+  WildcardPropertyStorage wildcard_properties;
+  wildcard_properties.mutable_property_entries()->Reserve(
+      wildcard_properties_set_.size());
+  for (const std::string& property_path : wildcard_properties_set_) {
+    wildcard_properties.add_property_entries(property_path);
+  }
+  ICING_RETURN_IF_ERROR(wildcard_property_storage_->Write(
+      std::make_unique<WildcardPropertyStorage>(
+          std::move(wildcard_properties))));
+
+  wildcard_properties_set_.insert(property_path);
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndex::Optimize(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    DocumentId new_last_added_document_id) {
+  std::string temp_working_path = working_path_ + "_temp";
+  ICING_RETURN_IF_ERROR(Discard(filesystem_, temp_working_path));
+
+  DestructibleDirectory temp_working_path_ddir(&filesystem_,
+                                               std::move(temp_working_path));
+  if (!temp_working_path_ddir.is_valid()) {
+    return absl_ports::InternalError(
+        "Unable to create temp directory to build new integer index");
+  }
+
+  {
+    // Transfer all indexed data from current integer index to new integer
+    // index. Also PersistToDisk and destruct the instance after finishing, so
+    // we can safely swap directories later.
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<IntegerIndex> new_integer_index,
+        Create(filesystem_, temp_working_path_ddir.dir(),
+               num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
+    ICING_RETURN_IF_ERROR(
+        TransferIndex(document_id_old_to_new, new_integer_index.get()));
+    new_integer_index->set_last_added_document_id(new_last_added_document_id);
+    ICING_RETURN_IF_ERROR(new_integer_index->PersistToDisk());
+  }
+
+  // Destruct current storage instances to safely swap directories.
+  metadata_mmapped_file_.reset();
+  property_to_storage_map_.clear();
+  wildcard_index_storage_.reset();
+  wildcard_property_storage_.reset();
+  if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
+                             working_path_.c_str())) {
+    return absl_ports::InternalError(
+        "Unable to apply new integer index due to failed swap");
+  }
+
+  // Reinitialize the integer index.
+  std::string metadata_file_path = GetMetadataFilePath(working_path_);
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile metadata_mmapped_file,
+      MemoryMappedFile::Create(filesystem_, metadata_file_path,
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               /*max_file_size=*/kMetadataFileSize,
+                               /*pre_mapping_file_offset=*/0,
+                               /*pre_mapping_mmap_size=*/kMetadataFileSize));
+  if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
+    return absl_ports::InternalError(
+        "Invalid metadata file size after Optimize");
+  }
+  metadata_mmapped_file_ =
+      std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file));
+
+  // Recreate all of the data structures tracking the wildcard storage.
+  std::string wildcard_property_path =
+      GetWildcardPropertyStorageFilePath(working_path_);
+  wildcard_property_storage_ =
+      std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
+          filesystem_, wildcard_property_path);
+
+  ICING_ASSIGN_OR_RETURN(wildcard_properties_set_,
+                         CreatePropertySet(*wildcard_property_storage_));
+  if (!wildcard_properties_set_.empty()) {
+    ICING_ASSIGN_OR_RETURN(
+        wildcard_index_storage_,
+        IntegerIndexStorage::Create(
+            filesystem_,
+            GetPropertyIndexStoragePath(working_path_,
+                                        kWildcardPropertyIndexFileName),
+            IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
+                                         pre_mapping_fbv_),
+            posting_list_serializer_.get()));
+  }
+
+  // Initialize all existing integer index storages.
+  ICING_ASSIGN_OR_RETURN(
+      property_to_storage_map_,
+      GetPropertyIntegerIndexStorageMap(
+          filesystem_, working_path_, posting_list_serializer_.get(),
+          num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndex::Clear() {
+  SetDirty();
+
+  // Step 1: clear property_to_storage_map_.
+  property_to_storage_map_.clear();
+  wildcard_index_storage_.reset();
+
+  // Step 2: delete all IntegerIndexStorages. It is safe because there is no
+  //         active IntegerIndexStorage after clearing the map.
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<std::string> property_paths,
+      GetAllExistingPropertyPaths(filesystem_, working_path_));
+  for (const std::string& property_path : property_paths) {
+    ICING_RETURN_IF_ERROR(IntegerIndexStorage::Discard(
+        filesystem_,
+        GetPropertyIndexStoragePath(working_path_, property_path)));
+  }
+
+  // Step 3: Delete the wildcard property storage
+  std::string wildcard_property_path =
+      GetWildcardPropertyStorageFilePath(working_path_);
+  if (filesystem_.FileExists(wildcard_property_path.c_str()) ||
+      !filesystem_.DeleteFile(wildcard_property_path.c_str())) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Unable to delete file at path ", wildcard_property_path));
+  }
+
+  info().last_added_document_id = kInvalidDocumentId;
+  return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+IntegerIndex::InitializeNewFiles(const Filesystem& filesystem,
+                                 std::string&& working_path,
+                                 int32_t num_data_threshold_for_bucket_split,
+                                 bool pre_mapping_fbv) {
+  // Create working directory.
+  if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to create directory: ", working_path));
+  }
+
+  // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and
+  // call GrowAndRemapIfNecessary to grow the underlying file.
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile metadata_mmapped_file,
+      MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               /*max_file_size=*/kMetadataFileSize,
+                               /*pre_mapping_file_offset=*/0,
+                               /*pre_mapping_mmap_size=*/kMetadataFileSize));
+  ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
+      /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
+
+  std::string wildcard_property_path =
+      GetWildcardPropertyStorageFilePath(working_path);
+  auto wildcard_property_storage =
+      std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
+          filesystem, wildcard_property_path);
+
+  // Create instance.
+  auto new_integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
+      filesystem, std::move(working_path),
+      std::make_unique<PostingListIntegerIndexSerializer>(),
+      std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+      /*property_to_storage_map=*/{}, std::move(wildcard_property_storage),
+      /*wildcard_properties_set=*/{}, /*wildcard_index_storage=*/nullptr,
+      num_data_threshold_for_bucket_split, pre_mapping_fbv));
+
+  // Initialize info content by writing mapped memory directly.
+  Info& info_ref = new_integer_index->info();
+  info_ref.magic = Info::kMagic;
+  info_ref.last_added_document_id = kInvalidDocumentId;
+  info_ref.num_data_threshold_for_bucket_split =
+      num_data_threshold_for_bucket_split;
+  // Initialize new PersistentStorage. The initial checksums will be computed
+  // and set via InitializeNewStorage.
+  ICING_RETURN_IF_ERROR(new_integer_index->InitializeNewStorage());
+
+  return new_integer_index;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+IntegerIndex::InitializeExistingFiles(
+    const Filesystem& filesystem, std::string&& working_path,
+    int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
+  // Mmap the content of the crcs and info.
+  ICING_ASSIGN_OR_RETURN(
+      MemoryMappedFile metadata_mmapped_file,
+      MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+                               MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+                               /*max_file_size=*/kMetadataFileSize,
+                               /*pre_mapping_file_offset=*/0,
+                               /*pre_mapping_mmap_size=*/kMetadataFileSize));
+  if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
+    return absl_ports::FailedPreconditionError("Incorrect metadata file size");
+  }
+
+  auto posting_list_serializer =
+      std::make_unique<PostingListIntegerIndexSerializer>();
+
+  // Initialize all existing integer index storages.
+  ICING_ASSIGN_OR_RETURN(
+      PropertyToStorageMapType property_to_storage_map,
+      GetPropertyIntegerIndexStorageMap(
+          filesystem, working_path, posting_list_serializer.get(),
+          num_data_threshold_for_bucket_split, pre_mapping_fbv));
+
+  std::string wildcard_property_path =
+      GetWildcardPropertyStorageFilePath(working_path);
+  auto wildcard_property_storage =
+      std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
+          filesystem, wildcard_property_path);
+
+  ICING_ASSIGN_OR_RETURN(
+      std::unordered_set<std::string> wildcard_properties_set,
+      CreatePropertySet(*wildcard_property_storage));
+
+  std::unique_ptr<IntegerIndexStorage> wildcard_index_storage;
+  if (!wildcard_properties_set.empty()) {
+    ICING_ASSIGN_OR_RETURN(
+        wildcard_index_storage,
+        IntegerIndexStorage::Create(
+            filesystem,
+            GetPropertyIndexStoragePath(working_path,
+                                        kWildcardPropertyIndexFileName),
+            IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
+                                         pre_mapping_fbv),
+            posting_list_serializer.get()));
+  }
+
+  // Create instance.
+  auto integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
+      filesystem, std::move(working_path), std::move(posting_list_serializer),
+      std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+      std::move(property_to_storage_map), std::move(wildcard_property_storage),
+      std::move(wildcard_properties_set), std::move(wildcard_index_storage),
+      num_data_threshold_for_bucket_split, pre_mapping_fbv));
+  // Initialize existing PersistentStorage. Checksums will be validated.
+  ICING_RETURN_IF_ERROR(integer_index->InitializeExistingStorage());
+
+  // Validate magic.
+  if (integer_index->info().magic != Info::kMagic) {
+    return absl_ports::FailedPreconditionError("Incorrect magic value");
+  }
+
+  // If num_data_threshold_for_bucket_split mismatches, then return error to let
+  // caller rebuild.
+  if (integer_index->info().num_data_threshold_for_bucket_split !=
+      num_data_threshold_for_bucket_split) {
+    return absl_ports::FailedPreconditionError(
+        "Mismatch num_data_threshold_for_bucket_split");
+  }
+
+  return integer_index;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+IntegerIndex::TransferIntegerIndexStorage(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    const IntegerIndexStorage* old_storage, const std::string& property_path,
+    IntegerIndex* new_integer_index) const {
+  std::string new_storage_working_path = GetPropertyIndexStoragePath(
+      new_integer_index->working_path_, property_path);
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<IntegerIndexStorage> new_storage,
+      IntegerIndexStorage::Create(
+          new_integer_index->filesystem_, new_storage_working_path,
+          IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
+                                       pre_mapping_fbv_),
+          new_integer_index->posting_list_serializer_.get()));
+
+  ICING_RETURN_IF_ERROR(
+      old_storage->TransferIndex(document_id_old_to_new, new_storage.get()));
+
+  if (new_storage->num_data() == 0) {
+    new_storage.reset();
+    ICING_RETURN_IF_ERROR(
+        IntegerIndexStorage::Discard(filesystem_, new_storage_working_path));
+  }
+  return new_storage;
+}
+
+libtextclassifier3::Status IntegerIndex::TransferWildcardStorage(
+    IntegerIndex* new_integer_index) const {
+  auto property_storage = std::make_unique<WildcardPropertyStorage>();
+  property_storage->mutable_property_entries()->Reserve(
+      wildcard_properties_set_.size());
+  for (const std::string& property : wildcard_properties_set_) {
+    property_storage->add_property_entries(property);
+  }
+
+  ICING_RETURN_IF_ERROR(new_integer_index->wildcard_property_storage_->Write(
+      std::move(property_storage)));
+  new_integer_index->wildcard_properties_set_ = wildcard_properties_set_;
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndex::TransferIndex(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    IntegerIndex* new_integer_index) const {
+  // Transfer over the integer index storages
+  std::unique_ptr<IntegerIndexStorage> new_storage;
+  for (const auto& [property_path, old_storage] : property_to_storage_map_) {
+    ICING_ASSIGN_OR_RETURN(
+        new_storage,
+        TransferIntegerIndexStorage(document_id_old_to_new, old_storage.get(),
+                                    property_path, new_integer_index));
+    if (new_storage != nullptr) {
+      new_integer_index->property_to_storage_map_.insert(
+          {property_path, std::move(new_storage)});
+    }
+  }
+  if (wildcard_index_storage_ != nullptr) {
+    ICING_ASSIGN_OR_RETURN(
+        new_storage,
+        TransferIntegerIndexStorage(
+            document_id_old_to_new, wildcard_index_storage_.get(),
+            std::string(kWildcardPropertyIndexFileName), new_integer_index));
+    if (new_storage != nullptr) {
+      new_integer_index->wildcard_index_storage_ = std::move(new_storage);
+
+      // The only time we need to copy over the list of properties using
+      // wildcard storage is if wildcard_index_storage and new_storage are both
+      // non-null. Otherwise, the new wildcard index storage won't have any
+      // data.
+      ICING_RETURN_IF_ERROR(TransferWildcardStorage(new_integer_index));
+    }
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndex::PersistStoragesToDisk(bool force) {
+  if (!force && !is_storage_dirty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  for (auto& [_, storage] : property_to_storage_map_) {
+    ICING_RETURN_IF_ERROR(storage->PersistToDisk());
+  }
+  // No need to persist wildcard_properties_storage_. All calls to
+  // FileBackedProto::Write are fully written through at the time of the call.
+  if (wildcard_index_storage_) {
+    ICING_RETURN_IF_ERROR(wildcard_index_storage_->PersistToDisk());
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndex::PersistMetadataToDisk(bool force) {
+  if (!force && !is_info_dirty() && !is_storage_dirty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  // Changes should have been applied to the underlying file when using
+  // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
+  // extra safety step to ensure they are written out.
+  return metadata_mmapped_file_->PersistToDisk();
+}
+
+libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeInfoChecksum(
+    bool force) {
+  if (!force && !is_info_dirty()) {
+    return Crc32(crcs().component_crcs.info_crc);
+  }
+
+  return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeStoragesChecksum(
+    bool force) {
+  if (!force && !is_storage_dirty()) {
+    return Crc32(crcs().component_crcs.storages_crc);
+  }
+
+  // XOR all crcs of all storages. Since XOR is commutative and associative,
+  // the order doesn't matter.
+  uint32_t storages_checksum = 0;
+  for (auto& [property_path, storage] : property_to_storage_map_) {
+    ICING_ASSIGN_OR_RETURN(Crc32 storage_crc, storage->UpdateChecksums());
+    storage_crc.Append(property_path);
+
+    storages_checksum ^= storage_crc.Get();
+  }
+
+  if (wildcard_index_storage_ != nullptr) {
+    ICING_ASSIGN_OR_RETURN(Crc32 storage_crc,
+                           wildcard_index_storage_->UpdateChecksums());
+    storages_checksum ^= storage_crc.Get();
+  }
+
+  ICING_ASSIGN_OR_RETURN(Crc32 wildcard_properties_crc,
+                         wildcard_property_storage_->ComputeChecksum());
+  storages_checksum ^= wildcard_properties_crc.Get();
+
+  return Crc32(storages_checksum);
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h
new file mode 100644
index 0000000..e7a3127
--- /dev/null
+++ b/icing/index/numeric/integer-index.h
@@ -0,0 +1,409 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_INTEGER_INDEX_H_
+#define ICING_INDEX_NUMERIC_INTEGER_INDEX_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/file-backed-proto.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/index/numeric/integer-index-storage.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/index/numeric/wildcard-property-storage.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// IntegerIndex: a wrapper class for managing IntegerIndexStorage (a lower level
+// persistent storage class for indexing and searching contents of integer type
+// sections in documents) instances for different property paths.
+// We separate indexable integer data from different properties into different
+// storages, and IntegerIndex manages and handles indexable integer data
+// appropriately to their corresponding IntegerIndexStorage instance according
+// to the given property path.
+class IntegerIndex : public NumericIndex<int64_t> {
+ public:
+  using PropertyToStorageMapType =
+      std::unordered_map<std::string, std::unique_ptr<IntegerIndexStorage>>;
+
+  // Maximum number of individual property storages that this index will allow
+  // before falling back to placing hits for any new properties into the
+  // 'wildcard' storage.
+  static constexpr int kMaxPropertyStorages = 32;
+
+  static constexpr int32_t kDefaultNumDataThresholdForBucketSplit =
+      IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit;
+
+  struct Info {
+    static constexpr int32_t kMagic = 0x5d8a1e8a;
+
+    int32_t magic;
+    DocumentId last_added_document_id;
+    int32_t num_data_threshold_for_bucket_split;
+
+    Crc32 ComputeChecksum() const {
+      return Crc32(
+          std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
+    }
+  } __attribute__((packed));
+  static_assert(sizeof(Info) == 12, "");
+
+  // Metadata file layout: <Crcs><Info>
+  static constexpr int32_t kCrcsMetadataFileOffset = 0;
+  static constexpr int32_t kInfoMetadataFileOffset =
+      static_cast<int32_t>(sizeof(Crcs));
+  static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+  static_assert(kMetadataFileSize == 24, "");
+
+  static constexpr WorkingPathType kWorkingPathType =
+      WorkingPathType::kDirectory;
+  static constexpr std::string_view kFilePrefix = "integer_index";
+
+  // Creates a new IntegerIndex instance to index integers. If any of the
+  // underlying file is missing, then delete the whole working_path and
+  // (re)initialize with new ones. Otherwise initialize and create the instance
+  // by existing files.
+  //
+  // filesystem: Object to make system level calls
+  // working_path: Specifies the working path for PersistentStorage.
+  //               IntegerIndex uses working path as working directory and all
+  //               related files will be stored under this directory. See
+  //               PersistentStorage for more details about the concept of
+  //               working_path.
+  // num_data_threshold_for_bucket_split: see IntegerIndexStorage::Options for
+  //                                      more details.
+  // pre_mapping_fbv: flag indicating whether memory map max possible file size
+  //                  for underlying FileBackedVector before growing the actual
+  //                  file size.
+  //
+  // Returns:
+  //   - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+  //                               checksum.
+  //   - INTERNAL_ERROR on I/O errors.
+  //   - Any FileBackedVector/MemoryMappedFile errors.
+  static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> Create(
+      const Filesystem& filesystem, std::string working_path,
+      int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv);
+
+  // Deletes IntegerIndex under working_path.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+                                            const std::string& working_path) {
+    return PersistentStorage::Discard(filesystem, working_path,
+                                      kWorkingPathType);
+  }
+
+  ~IntegerIndex() override;
+
+  // Returns an Editor instance for adding new records into integer index for a
+  // given property, DocumentId and SectionId. See Editor for more details.
+  std::unique_ptr<typename NumericIndex<int64_t>::Editor> Edit(
+      std::string_view property_path, DocumentId document_id,
+      SectionId section_id) override {
+    return std::make_unique<Editor>(property_path, document_id, section_id,
+                                    *this, num_data_threshold_for_bucket_split_,
+                                    pre_mapping_fbv_);
+  }
+
+  // Returns a DocHitInfoIterator for iterating through all docs which have the
+  // specified (integer) property contents in range [query_key_lower,
+  // query_key_upper].
+  // When iterating through all relevant doc hits, it:
+  // - Merges multiple SectionIds of doc hits with same DocumentId into a single
+  //   SectionIdMask and constructs DocHitInfo.
+  // - Returns DocHitInfo in descending DocumentId order.
+  //
+  // Returns:
+  //   - On success: a DocHitInfoIterator instance
+  //   - NOT_FOUND_ERROR if the given property_path doesn't exist
+  //   - Any IntegerIndexStorage errors
+  libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
+      std::string_view property_path, int64_t key_lower, int64_t key_upper,
+      const DocumentStore& document_store, const SchemaStore& schema_store,
+      int64_t current_time_ms) const override;
+
+  // Reduces internal file sizes by reclaiming space and ids of deleted
+  // documents. Integer index will convert all data (hits) to the new document
+  // ids and regenerate all index files. If all data in a property path are
+  // completely deleted, then the underlying storage will be discarded as well.
+  //
+  // - document_id_old_to_new: a map for converting old document id to new
+  //   document id.
+  // - new_last_added_document_id: will be used to update the last added
+  //                               document id in the integer index.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on IO error
+  libtextclassifier3::Status Optimize(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      DocumentId new_last_added_document_id) override;
+
+  // Clears all integer index data by discarding all existing storages, and set
+  // last_added_document_id to kInvalidDocumentId.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status Clear() override;
+
+  DocumentId last_added_document_id() const override {
+    return info().last_added_document_id;
+  }
+
+  void set_last_added_document_id(DocumentId document_id) override {
+    SetInfoDirty();
+
+    Info& info_ref = info();
+    if (info_ref.last_added_document_id == kInvalidDocumentId ||
+        document_id > info_ref.last_added_document_id) {
+      info_ref.last_added_document_id = document_id;
+    }
+  }
+
+  int num_property_indices() const override {
+    return property_to_storage_map_.size() +
+           ((wildcard_index_storage_ == nullptr) ? 0 : 1);
+  }
+
+ private:
+  class Editor : public NumericIndex<int64_t>::Editor {
+   public:
+    explicit Editor(std::string_view property_path, DocumentId document_id,
+                    SectionId section_id, IntegerIndex& integer_index,
+                    int32_t num_data_threshold_for_bucket_split,
+                    bool pre_mapping_fbv)
+        : NumericIndex<int64_t>::Editor(property_path, document_id, section_id),
+          integer_index_(integer_index),
+          num_data_threshold_for_bucket_split_(
+              num_data_threshold_for_bucket_split),
+          pre_mapping_fbv_(pre_mapping_fbv) {}
+
+    ~Editor() override = default;
+
+    libtextclassifier3::Status BufferKey(int64_t key) override {
+      seen_keys_.push_back(key);
+      return libtextclassifier3::Status::OK;
+    }
+
+    libtextclassifier3::Status IndexAllBufferedKeys() && override;
+
+   private:
+    // Vector for caching all seen keys. Since IntegerIndexStorage::AddKeys
+    // sorts and dedupes keys, we can just simply use vector here and move it to
+    // AddKeys().
+    std::vector<int64_t> seen_keys_;
+
+    IntegerIndex& integer_index_;  // Does not own.
+
+    int32_t num_data_threshold_for_bucket_split_;
+
+    // Flag indicating whether memory map max possible file size for underlying
+    // FileBackedVector before growing the actual file size.
+    bool pre_mapping_fbv_;
+  };
+
+  explicit IntegerIndex(
+      const Filesystem& filesystem, std::string&& working_path,
+      std::unique_ptr<PostingListIntegerIndexSerializer>
+          posting_list_serializer,
+      std::unique_ptr<MemoryMappedFile> metadata_mmapped_file,
+      PropertyToStorageMapType&& property_to_storage_map,
+      std::unique_ptr<FileBackedProto<WildcardPropertyStorage>>
+          wildcard_property_storage,
+      std::unordered_set<std::string> wildcard_properties_set,
+      std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage,
+      int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv)
+      : NumericIndex<int64_t>(filesystem, std::move(working_path),
+                              kWorkingPathType),
+        posting_list_serializer_(std::move(posting_list_serializer)),
+        metadata_mmapped_file_(std::move(metadata_mmapped_file)),
+        property_to_storage_map_(std::move(property_to_storage_map)),
+        wildcard_property_storage_(std::move(wildcard_property_storage)),
+        wildcard_properties_set_(std::move(wildcard_properties_set)),
+        wildcard_index_storage_(std::move(wildcard_index_storage)),
+        num_data_threshold_for_bucket_split_(
+            num_data_threshold_for_bucket_split),
+        pre_mapping_fbv_(pre_mapping_fbv),
+        is_info_dirty_(false),
+        is_storage_dirty_(false) {}
+
+  static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+  InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+                     int32_t num_data_threshold_for_bucket_split,
+                     bool pre_mapping_fbv);
+
+  static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+  InitializeExistingFiles(const Filesystem& filesystem,
+                          std::string&& working_path,
+                          int32_t num_data_threshold_for_bucket_split,
+                          bool pre_mapping_fbv);
+
+  // Adds the property path to the list of properties using wildcard storage.
+  // This will both update the in-memory list (wildcard_properties_set_) and
+  // the persistent list (wilcard_property_storage_).
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR if unable to successfully persist updated properties
+  //     list in wildcard_property_storage_.
+  libtextclassifier3::Status AddPropertyToWildcardStorage(
+      const std::string& property_path);
+
+  // Transfers integer index data from the current integer index to
+  // new_integer_index.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error. This could potentially leave the storages
+  //     in an invalid state and the caller should handle it properly (e.g.
+  //     discard and rebuild)
+  libtextclassifier3::Status TransferIndex(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      IntegerIndex* new_integer_index) const;
+
+  // Transfers integer index data from old_storage to new_integer_index.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error. This could potentially leave the storages
+  //     in an invalid state and the caller should handle it properly (e.g.
+  //     discard and rebuild)
+  libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+  TransferIntegerIndexStorage(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      const IntegerIndexStorage* old_storage, const std::string& property_path,
+      IntegerIndex* new_integer_index) const;
+
+  // Transfers the persistent and in-memory list of properties using the
+  // wildcard storage from old_storage to new_integer_index.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR if unable to successfully persist updated properties
+  //     list in new_integer_index.
+  libtextclassifier3::Status TransferWildcardStorage(
+      IntegerIndex* new_integer_index) const;
+
+  // Flushes contents of all storages to underlying files.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
+
+  // Flushes contents of metadata file.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
+
+  // Computes and returns Info checksum.
+  //
+  // Returns:
+  //   - Crc of the Info on success
+  libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
+
+  // Computes and returns all storages checksum. Checksums of (storage_crc,
+  // property_path) for all existing property paths will be combined together by
+  // XOR.
+  //
+  // Returns:
+  //   - Crc of all storages on success
+  //   - INTERNAL_ERROR if any data inconsistency
+  libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+      bool force) override;
+
+  Crcs& crcs() override {
+    return *reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
+                                    kCrcsMetadataFileOffset);
+  }
+
+  const Crcs& crcs() const override {
+    return *reinterpret_cast<const Crcs*>(metadata_mmapped_file_->region() +
+                                          kCrcsMetadataFileOffset);
+  }
+
+  Info& info() {
+    return *reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
+                                    kInfoMetadataFileOffset);
+  }
+
+  const Info& info() const {
+    return *reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
+                                          kInfoMetadataFileOffset);
+  }
+
+  void SetInfoDirty() { is_info_dirty_ = true; }
+  // When storage is dirty, we have to set info dirty as well. So just expose
+  // SetDirty to set both.
+  void SetDirty() {
+    is_info_dirty_ = true;
+    is_storage_dirty_ = true;
+  }
+
+  bool is_info_dirty() const { return is_info_dirty_; }
+  bool is_storage_dirty() const { return is_storage_dirty_; }
+
+  std::unique_ptr<PostingListIntegerIndexSerializer> posting_list_serializer_;
+
+  std::unique_ptr<MemoryMappedFile> metadata_mmapped_file_;
+
+  // Property path to integer index storage map.
+  PropertyToStorageMapType property_to_storage_map_;
+
+  // Persistent list of properties that have added content to
+  // wildcard_index_storage_.
+  std::unique_ptr<FileBackedProto<WildcardPropertyStorage>>
+      wildcard_property_storage_;
+
+  // In-memory list of properties that have added content to
+  // wildcard_index_storage_.
+  std::unordered_set<std::string> wildcard_properties_set_;
+
+  // The index storage that is used once we have already created
+  // kMaxPropertyStorages in property_to_storage_map.
+  std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage_;
+
+  int32_t num_data_threshold_for_bucket_split_;
+
+  // Flag indicating whether memory map max possible file size for underlying
+  // FileBackedVector before growing the actual file size.
+  bool pre_mapping_fbv_;
+
+  bool is_info_dirty_;
+  bool is_storage_dirty_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_NUMERIC_INTEGER_INDEX_H_
diff --git a/icing/index/numeric/integer-index_test.cc b/icing/index/numeric/integer-index_test.cc
new file mode 100644
index 0000000..3b60001
--- /dev/null
+++ b/icing/index/numeric/integer-index_test.cc
@@ -0,0 +1,2598 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <type_traits>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/numeric/integer-index-storage.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Lt;
+
+using Crcs = PersistentStorage::Crcs;
+using Info = IntegerIndex::Info;
+
+static constexpr int32_t kCorruptedValueOffset = 3;
+constexpr static std::string_view kDefaultTestPropertyPath = "test.property";
+
+constexpr SectionId kDefaultSectionId = 0;
+
+template <typename T>
+class NumericIndexIntegerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    base_dir_ = GetTestTempDir() + "/icing";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    working_path_ = base_dir_ + "/numeric_index_integer_test";
+    std::string schema_dir = base_dir_ + "/schema_test";
+
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(schema_dir.c_str()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_, SchemaStore::Create(&filesystem_, schema_dir, &clock_));
+
+    std::string document_store_dir = base_dir_ + "/doc_store_test";
+    ASSERT_TRUE(
+        filesystem_.CreateDirectoryRecursively(document_store_dir.c_str()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult doc_store_create_result,
+        DocumentStore::Create(
+            &filesystem_, document_store_dir, &clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    doc_store_ = std::move(doc_store_create_result.document_store);
+  }
+
+  void TearDown() override {
+    doc_store_.reset();
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  template <typename UnknownIntegerIndexType>
+  libtextclassifier3::StatusOr<std::unique_ptr<NumericIndex<int64_t>>>
+  CreateIntegerIndex() {
+    return absl_ports::InvalidArgumentError("Unknown type");
+  }
+
+  template <>
+  libtextclassifier3::StatusOr<std::unique_ptr<NumericIndex<int64_t>>>
+  CreateIntegerIndex<DummyNumericIndex<int64_t>>() {
+    return DummyNumericIndex<int64_t>::Create(filesystem_, working_path_);
+  }
+
+  template <>
+  libtextclassifier3::StatusOr<std::unique_ptr<NumericIndex<int64_t>>>
+  CreateIntegerIndex<IntegerIndex>() {
+    return IntegerIndex::Create(
+        filesystem_, working_path_, /*num_data_threshold_for_bucket_split=*/
+        IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+        /*pre_mapping_fbv=*/false);
+  }
+
+  template <typename NotIntegerIndexType>
+  bool is_integer_index() const {
+    return false;
+  }
+
+  template <>
+  bool is_integer_index<IntegerIndex>() const {
+    return true;
+  }
+
+  libtextclassifier3::StatusOr<std::vector<DocumentId>> CompactDocStore() {
+    std::string document_store_dir = base_dir_ + "/doc_store_test";
+    std::string document_store_compact_dir =
+        base_dir_ + "/doc_store_compact_test";
+    if (!filesystem_.CreateDirectoryRecursively(
+            document_store_compact_dir.c_str())) {
+      return absl_ports::InternalError("Unable to create compact directory");
+    }
+    ICING_ASSIGN_OR_RETURN(
+        DocumentStore::OptimizeResult doc_store_optimize_result,
+        doc_store_->OptimizeInto(document_store_compact_dir, nullptr));
+
+    doc_store_.reset();
+    if (!filesystem_.SwapFiles(document_store_dir.c_str(),
+                               document_store_compact_dir.c_str())) {
+      return absl_ports::InternalError("Unable to swap directories.");
+    }
+    if (!filesystem_.DeleteDirectoryRecursively(
+            document_store_compact_dir.c_str())) {
+      return absl_ports::InternalError("Unable to delete compact directory");
+    }
+
+    ICING_ASSIGN_OR_RETURN(
+        DocumentStore::CreateResult doc_store_create_result,
+        DocumentStore::Create(
+            &filesystem_, document_store_dir, &clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    doc_store_ = std::move(doc_store_create_result.document_store);
+    return std::move(doc_store_optimize_result.document_id_old_to_new);
+  }
+
+  libtextclassifier3::StatusOr<std::vector<DocHitInfo>> Query(
+      const NumericIndex<int64_t>* integer_index,
+      std::string_view property_path, int64_t key_lower, int64_t key_upper) {
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<DocHitInfoIterator> iter,
+        integer_index->GetIterator(property_path, key_lower, key_upper,
+                                   *doc_store_, *schema_store_,
+                                   clock_.GetSystemTimeMilliseconds()));
+
+    std::vector<DocHitInfo> result;
+    while (iter->Advance().ok()) {
+      result.push_back(iter->doc_hit_info());
+    }
+    return result;
+  }
+
+  Filesystem filesystem_;
+  std::string base_dir_;
+  std::string working_path_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> doc_store_;
+  Clock clock_;
+};
+
+void Index(NumericIndex<int64_t>* integer_index, std::string_view property_path,
+           DocumentId document_id, SectionId section_id,
+           std::vector<int64_t> keys) {
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      integer_index->Edit(property_path, document_id, section_id);
+
+  for (const auto& key : keys) {
+    ICING_EXPECT_OK(editor->BufferKey(key));
+  }
+  ICING_EXPECT_OK(std::move(*editor).IndexAllBufferedKeys());
+}
+
+using TestTypes = ::testing::Types<DummyNumericIndex<int64_t>, IntegerIndex>;
+TYPED_TEST_SUITE(NumericIndexIntegerTest, TestTypes);
+
+TYPED_TEST(NumericIndexIntegerTest, SetLastAddedDocumentId) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  constexpr DocumentId kDocumentId = 100;
+  integer_index->set_last_added_document_id(kDocumentId);
+  EXPECT_THAT(integer_index->last_added_document_id(), Eq(kDocumentId));
+
+  constexpr DocumentId kNextDocumentId = 123;
+  integer_index->set_last_added_document_id(kNextDocumentId);
+  EXPECT_THAT(integer_index->last_added_document_id(), Eq(kNextDocumentId));
+}
+
+TYPED_TEST(
+    NumericIndexIntegerTest,
+    SetLastAddedDocumentIdShouldIgnoreNewDocumentIdNotGreaterThanTheCurrent) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  constexpr DocumentId kDocumentId = 123;
+  integer_index->set_last_added_document_id(kDocumentId);
+  ASSERT_THAT(integer_index->last_added_document_id(), Eq(kDocumentId));
+
+  constexpr DocumentId kNextDocumentId = 100;
+  ASSERT_THAT(kNextDocumentId, Lt(kDocumentId));
+  integer_index->set_last_added_document_id(kNextDocumentId);
+  // last_added_document_id() should remain unchanged.
+  EXPECT_THAT(integer_index->last_added_document_id(), Eq(kDocumentId));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, SingleKeyExactQuery) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        kDefaultSectionId, /*keys=*/{1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        kDefaultSectionId, /*keys=*/{3});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        kDefaultSectionId, /*keys=*/{2});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+        kDefaultSectionId, /*keys=*/{0});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+        kDefaultSectionId, /*keys=*/{4});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+        kDefaultSectionId, /*keys=*/{2});
+
+  int64_t query_key = 2;
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/query_key, /*key_upper=*/query_key),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, SingleKeyRangeQuery) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        kDefaultSectionId, /*keys=*/{1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        kDefaultSectionId, /*keys=*/{3});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        kDefaultSectionId, /*keys=*/{2});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+        kDefaultSectionId, /*keys=*/{0});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+        kDefaultSectionId, /*keys=*/{4});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+        kDefaultSectionId, /*keys=*/{2});
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/1, /*key_upper=*/3),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, WildcardStorageQuery) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  // This test sets its schema assuming that max property storages == 32.
+  ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32));
+
+  PropertyConfigProto int_property_config =
+      PropertyConfigBuilder()
+          .SetName("otherProperty1")
+          .SetCardinality(CARDINALITY_REPEATED)
+          .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+          .Build();
+  // Create a schema with two types:
+  // - TypeA has 34 properties:
+  //     'desiredProperty', 'otherProperty'*, 'undesiredProperty'
+  // - TypeB has 2 properties: 'anotherProperty', 'desiredProperty'
+  // 1. The 32 'otherProperty's will consume all of the individual storages
+  // 2. TypeA.desiredProperty and TypeB.anotherProperty will both be assigned
+  //    SectionId = 0 for their respective types.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("TypeA")
+                       .AddProperty(int_property_config)
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty2"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty3"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty4"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty5"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty6"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty7"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty8"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty9"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty10"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty11"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty12"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty13"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty14"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty15"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty16"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty17"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty18"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty19"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty20"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty21"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty22"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty23"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty24"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty25"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty26"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty27"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty28"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty29"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty30"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty31"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty32"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("desiredProperty"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("undesiredProperty")))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("TypeB")
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("anotherProperty"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("desiredProperty")))
+          .Build();
+  ICING_ASSERT_OK(this->schema_store_->SetSchema(
+      schema,
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Put 11 docs of "TypeA" into the document store.
+  DocumentProto doc =
+      DocumentBuilder().SetKey("ns1", "uri0").SetSchema("TypeA").Build();
+  ICING_ASSERT_OK(this->doc_store_->Put(doc));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri8").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri9").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri10").Build()));
+
+  // Put 5 docs of "TypeB" into the document store.
+  doc = DocumentBuilder(doc).SetUri("uri11").SetSchema("TypeB").Build();
+  ICING_ASSERT_OK(this->doc_store_->Put(doc));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri12").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri13").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri14").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri15").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri16").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri17").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri18").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri19").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri20").Build()));
+
+  // Ids are assigned alphabetically, so the property ids are:
+  // TypeA.desiredProperty = 0
+  // TypeA.otherPropertyN = N
+  // TypeA.undesiredProperty = 33
+  // TypeB.anotherProperty = 0
+  // TypeB.desiredProperty = 1
+  SectionId typea_desired_prop_id = 0;
+  SectionId typea_undesired_prop_id = 33;
+  SectionId typeb_another_prop_id = 0;
+  SectionId typeb_desired_prop_id = 1;
+
+  // Index numeric content for other properties to force our property into the
+  // wildcard storage.
+  std::string other_property_path = "otherProperty";
+  for (int i = 1; i <= IntegerIndex::kMaxPropertyStorages; ++i) {
+    Index(integer_index.get(),
+          absl_ports::StrCat(other_property_path, std::to_string(i)),
+          /*document_id=*/0, /*section_id=*/i, /*keys=*/{i});
+  }
+
+  // Index numeric content for TypeA.desiredProperty
+  std::string desired_property = "desiredProperty";
+  Index(integer_index.get(), desired_property, /*document_id=*/0,
+        typea_desired_prop_id, /*keys=*/{1});
+  Index(integer_index.get(), desired_property, /*document_id=*/1,
+        typea_desired_prop_id, /*keys=*/{3});
+  Index(integer_index.get(), desired_property, /*document_id=*/2,
+        typea_desired_prop_id, /*keys=*/{2});
+  Index(integer_index.get(), desired_property, /*document_id=*/3,
+        typea_desired_prop_id, /*keys=*/{0});
+  Index(integer_index.get(), desired_property, /*document_id=*/4,
+        typea_desired_prop_id, /*keys=*/{4});
+  Index(integer_index.get(), desired_property, /*document_id=*/5,
+        typea_desired_prop_id, /*keys=*/{2});
+
+  // Index the same numeric content for TypeA.undesiredProperty
+  std::string undesired_property = "undesiredProperty";
+  Index(integer_index.get(), undesired_property, /*document_id=*/6,
+        typea_undesired_prop_id, /*keys=*/{3});
+  Index(integer_index.get(), undesired_property, /*document_id=*/7,
+        typea_undesired_prop_id, /*keys=*/{2});
+  Index(integer_index.get(), undesired_property, /*document_id=*/8,
+        typea_undesired_prop_id, /*keys=*/{0});
+  Index(integer_index.get(), undesired_property, /*document_id=*/9,
+        typea_undesired_prop_id, /*keys=*/{4});
+  Index(integer_index.get(), undesired_property, /*document_id=*/10,
+        typea_undesired_prop_id, /*keys=*/{2});
+
+  // Index the same numeric content for TypeB.anotherProperty
+  std::string another_property = "anotherProperty";
+  Index(integer_index.get(), another_property, /*document_id=*/11,
+        typeb_another_prop_id, /*keys=*/{3});
+  Index(integer_index.get(), another_property, /*document_id=*/12,
+        typeb_another_prop_id, /*keys=*/{2});
+  Index(integer_index.get(), another_property, /*document_id=*/13,
+        typeb_another_prop_id, /*keys=*/{0});
+  Index(integer_index.get(), another_property, /*document_id=*/14,
+        typeb_another_prop_id, /*keys=*/{4});
+  Index(integer_index.get(), another_property, /*document_id=*/15,
+        typeb_another_prop_id, /*keys=*/{2});
+
+  // Finally, index the same numeric content for TypeB.desiredProperty
+  Index(integer_index.get(), desired_property, /*document_id=*/16,
+        typeb_desired_prop_id, /*keys=*/{3});
+  Index(integer_index.get(), desired_property, /*document_id=*/17,
+        typeb_desired_prop_id, /*keys=*/{2});
+  Index(integer_index.get(), desired_property, /*document_id=*/18,
+        typeb_desired_prop_id, /*keys=*/{0});
+  Index(integer_index.get(), desired_property, /*document_id=*/19,
+        typeb_desired_prop_id, /*keys=*/{4});
+  Index(integer_index.get(), desired_property, /*document_id=*/20,
+        typeb_desired_prop_id, /*keys=*/{2});
+
+  if (this->template is_integer_index<TypeParam>()) {
+    EXPECT_THAT(integer_index->num_property_indices(), Eq(33));
+  } else {
+    EXPECT_THAT(integer_index->num_property_indices(), Eq(35));
+  }
+
+  // Only the hits for 'desired_prop_id' should be returned.
+  std::vector<SectionId> expected_sections_typea = {typea_desired_prop_id};
+  std::vector<SectionId> expected_sections_typeb = {typeb_desired_prop_id};
+  EXPECT_THAT(
+      this->Query(integer_index.get(), desired_property,
+                  /*key_lower=*/2, /*key_upper=*/2),
+      IsOkAndHolds(ElementsAre(
+          EqualsDocHitInfo(/*document_id=*/20, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/17, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/5, expected_sections_typea),
+          EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea))));
+
+  EXPECT_THAT(
+      this->Query(integer_index.get(), desired_property,
+                  /*key_lower=*/1, /*key_upper=*/3),
+      IsOkAndHolds(ElementsAre(
+          EqualsDocHitInfo(/*document_id=*/20, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/17, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/16, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/5, expected_sections_typea),
+          EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea),
+          EqualsDocHitInfo(/*document_id=*/1, expected_sections_typea),
+          EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, EmptyResult) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        kDefaultSectionId, /*keys=*/{1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        kDefaultSectionId, /*keys=*/{3});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        kDefaultSectionId, /*keys=*/{2});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+        kDefaultSectionId, /*keys=*/{0});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+        kDefaultSectionId, /*keys=*/{4});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+        kDefaultSectionId, /*keys=*/{2});
+
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/10, /*key_upper=*/10),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/100, /*key_upper=*/200),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest,
+           NonExistingPropertyPathShouldReturnEmptyResult) {
+  constexpr std::string_view kAnotherPropertyPath = "another_property";
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        kDefaultSectionId, /*keys=*/{1});
+
+  EXPECT_THAT(this->Query(integer_index.get(), kAnotherPropertyPath,
+                          /*key_lower=*/100, /*key_upper=*/200),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest,
+           MultipleKeysShouldMergeAndDedupeDocHitInfo) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  // Construct several documents with mutiple keys under the same section.
+  // Range query [1, 3] will find hits with same (DocumentId, SectionId) for
+  // mutiple times. For example, (2, kDefaultSectionId) will be found twice
+  // (once for key = 1 and once for key = 3).
+  // Test if the iterator dedupes correctly.
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        kDefaultSectionId, /*keys=*/{-1000, 0});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        kDefaultSectionId, /*keys=*/{-100, 0, 1, 2, 3, 4, 5});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        kDefaultSectionId, /*keys=*/{3, 1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+        kDefaultSectionId, /*keys=*/{4, 1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+        kDefaultSectionId, /*keys=*/{1, 6});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+        kDefaultSectionId, /*keys=*/{2, 100});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/6,
+        kDefaultSectionId, /*keys=*/{1000, 2});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/7,
+        kDefaultSectionId, /*keys=*/{4, -1000});
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/1, /*key_upper=*/3),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/6, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, EdgeNumericValues) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        kDefaultSectionId, /*keys=*/{0});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        kDefaultSectionId, /*keys=*/{-100});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        kDefaultSectionId, /*keys=*/{-80});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+        kDefaultSectionId, /*keys=*/{std::numeric_limits<int64_t>::max()});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+        kDefaultSectionId, /*keys=*/{std::numeric_limits<int64_t>::min()});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+        kDefaultSectionId, /*keys=*/{200});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/6,
+        kDefaultSectionId, /*keys=*/{100});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/7,
+        kDefaultSectionId, /*keys=*/{std::numeric_limits<int64_t>::max()});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/8,
+        kDefaultSectionId, /*keys=*/{0});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/9,
+        kDefaultSectionId, /*keys=*/{std::numeric_limits<int64_t>::min()});
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+
+  // Negative key
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/-100, /*key_upper=*/-70),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+
+  // INT64_MAX key
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/std::numeric_limits<int64_t>::max(),
+                          /*key_upper=*/std::numeric_limits<int64_t>::max()),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/7, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+
+  // INT64_MIN key
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/std::numeric_limits<int64_t>::min(),
+                          /*key_upper=*/std::numeric_limits<int64_t>::min()),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/9, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
+
+  // Key = 0
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/0, /*key_upper=*/0),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+
+  // All keys from INT64_MIN to INT64_MAX
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/std::numeric_limits<int64_t>::min(),
+                          /*key_upper=*/std::numeric_limits<int64_t>::max()),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/9, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/7, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/6, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest,
+           MultipleSectionsShouldMergeSectionsAndDedupeDocHitInfo) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  // Construct several documents with mutiple numeric sections.
+  // Range query [1, 3] will find hits with same DocumentIds but multiple
+  // different SectionIds. For example, there will be 2 hits (1, 0), (1, 1) for
+  // DocumentId=1.
+  // Test if the iterator merges multiple sections into a single SectionIdMask
+  // correctly.
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        /*section_id=*/2, /*keys=*/{0});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        /*section_id=*/1, /*keys=*/{1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        /*section_id=*/0, /*keys=*/{-1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        /*section_id=*/2, /*keys=*/{2});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        /*section_id=*/1, /*keys=*/{1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        /*section_id=*/0, /*keys=*/{4});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        /*section_id=*/5, /*keys=*/{3});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        /*section_id=*/4, /*keys=*/{2});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        /*section_id=*/3, /*keys=*/{5});
+
+  EXPECT_THAT(
+      this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                  /*key_lower=*/1,
+                  /*key_upper=*/3),
+      IsOkAndHolds(ElementsAre(
+          EqualsDocHitInfo(/*document_id=*/2, std::vector<SectionId>{4, 5}),
+          EqualsDocHitInfo(/*document_id=*/1, std::vector<SectionId>{1, 2}),
+          EqualsDocHitInfo(/*document_id=*/0, std::vector<SectionId>{1}))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, NonRelevantPropertyShouldNotBeIncluded) {
+  constexpr std::string_view kNonRelevantProperty = "non_relevant_property";
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        kDefaultSectionId, /*keys=*/{1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        kDefaultSectionId, /*keys=*/{3});
+  Index(integer_index.get(), kNonRelevantProperty, /*document_id=*/2,
+        kDefaultSectionId, /*keys=*/{2});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+        kDefaultSectionId, /*keys=*/{0});
+  Index(integer_index.get(), kNonRelevantProperty, /*document_id=*/4,
+        kDefaultSectionId, /*keys=*/{4});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+        kDefaultSectionId, /*keys=*/{2});
+
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/1, /*key_upper=*/3),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest,
+           RangeQueryKeyLowerGreaterThanKeyUpperShouldReturnError) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        kDefaultSectionId, /*keys=*/{1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        kDefaultSectionId, /*keys=*/{3});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        kDefaultSectionId, /*keys=*/{2});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+        kDefaultSectionId, /*keys=*/{0});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/4,
+        kDefaultSectionId, /*keys=*/{4});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+        kDefaultSectionId, /*keys=*/{2});
+
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/3, /*key_upper=*/1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, Optimize) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        kDefaultSectionId, /*keys=*/{1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        kDefaultSectionId, /*keys=*/{3});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+        kDefaultSectionId, /*keys=*/{2});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+        kDefaultSectionId, /*keys=*/{0});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/8,
+        kDefaultSectionId, /*keys=*/{4});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/13,
+        kDefaultSectionId, /*keys=*/{2});
+
+  // Delete doc id = 3, 5, compress and keep the rest.
+  std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+  document_id_old_to_new[1] = 0;
+  document_id_old_to_new[2] = 1;
+  document_id_old_to_new[8] = 2;
+  document_id_old_to_new[13] = 3;
+
+  DocumentId new_last_added_document_id = 3;
+  EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+                                      new_last_added_document_id),
+              IsOk());
+  EXPECT_THAT(integer_index->last_added_document_id(),
+              Eq(new_last_added_document_id));
+
+  // Verify index and query API still work normally after Optimize().
+  std::vector<SectionId> expected_sections = {kDefaultSectionId};
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/1, /*key_upper=*/1),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/3, /*key_upper=*/3),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/0, /*key_upper=*/0),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/4, /*key_upper=*/4),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/2, /*key_upper=*/2),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+        kDefaultSectionId, /*keys=*/{123});
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/123, /*key_upper=*/123),
+              IsOkAndHolds(ElementsAre(
+                  EqualsDocHitInfo(/*document_id=*/5, expected_sections))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, OptimizeMultiplePropertyPaths) {
+  constexpr std::string_view kPropertyPath1 = "prop1";
+  constexpr SectionId kSectionId1 = 0;
+  constexpr std::string_view kPropertyPath2 = "prop2";
+  constexpr SectionId kSectionId2 = 1;
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  // Doc id = 1: insert 2 data for "prop1", "prop2"
+  Index(integer_index.get(), kPropertyPath2, /*document_id=*/1, kSectionId2,
+        /*keys=*/{1});
+  Index(integer_index.get(), kPropertyPath1, /*document_id=*/1, kSectionId1,
+        /*keys=*/{2});
+
+  // Doc id = 2: insert 1 data for "prop1".
+  Index(integer_index.get(), kPropertyPath1, /*document_id=*/2, kSectionId1,
+        /*keys=*/{3});
+
+  // Doc id = 3: insert 2 data for "prop2"
+  Index(integer_index.get(), kPropertyPath2, /*document_id=*/3, kSectionId2,
+        /*keys=*/{4});
+
+  // Doc id = 5: insert 3 data for "prop1", "prop2"
+  Index(integer_index.get(), kPropertyPath2, /*document_id=*/5, kSectionId2,
+        /*keys=*/{1});
+  Index(integer_index.get(), kPropertyPath1, /*document_id=*/5, kSectionId1,
+        /*keys=*/{2});
+
+  // Doc id = 8: insert 1 data for "prop2".
+  Index(integer_index.get(), kPropertyPath2, /*document_id=*/8, kSectionId2,
+        /*keys=*/{3});
+
+  // Doc id = 13: insert 1 data for "prop1".
+  Index(integer_index.get(), kPropertyPath1, /*document_id=*/13, kSectionId1,
+        /*keys=*/{4});
+
+  // Delete doc id = 3, 5, compress and keep the rest.
+  std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+  document_id_old_to_new[1] = 0;
+  document_id_old_to_new[2] = 1;
+  document_id_old_to_new[8] = 2;
+  document_id_old_to_new[13] = 3;
+
+  DocumentId new_last_added_document_id = 3;
+  EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+                                      new_last_added_document_id),
+              IsOk());
+  EXPECT_THAT(integer_index->last_added_document_id(),
+              Eq(new_last_added_document_id));
+
+  // Verify index and query API still work normally after Optimize().
+  // Key = 1
+  EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath1, /*key_lower=*/1,
+                          /*key_upper=*/1),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2, /*key_lower=*/1,
+                          /*key_upper=*/1),
+              IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                  /*document_id=*/0, std::vector<SectionId>{kSectionId2}))));
+
+  // key = 2
+  EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath1, /*key_lower=*/2,
+                          /*key_upper=*/2),
+              IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                  /*document_id=*/0, std::vector<SectionId>{kSectionId1}))));
+  EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2, /*key_lower=*/2,
+                          /*key_upper=*/2),
+              IsOkAndHolds(IsEmpty()));
+
+  // key = 3
+  EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath1, /*key_lower=*/3,
+                          /*key_upper=*/3),
+              IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                  /*document_id=*/1, std::vector<SectionId>{kSectionId1}))));
+  EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2, /*key_lower=*/3,
+                          /*key_upper=*/3),
+              IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                  /*document_id=*/2, std::vector<SectionId>{kSectionId2}))));
+
+  // key = 4
+  EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath1, /*key_lower=*/4,
+                          /*key_upper=*/4),
+              IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                  /*document_id=*/3, std::vector<SectionId>{kSectionId1}))));
+  EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2, /*key_lower=*/4,
+                          /*key_upper=*/4),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, OptimizeShouldDiscardEmptyPropertyStorage) {
+  constexpr std::string_view kPropertyPath1 = "prop1";
+  constexpr SectionId kSectionId1 = 0;
+  constexpr std::string_view kPropertyPath2 = "prop2";
+  constexpr SectionId kSectionId2 = 1;
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  // Doc id = 1: insert 2 data for "prop1", "prop2"
+  Index(integer_index.get(), kPropertyPath2, /*document_id=*/1, kSectionId2,
+        /*keys=*/{1});
+  Index(integer_index.get(), kPropertyPath1, /*document_id=*/1, kSectionId1,
+        /*keys=*/{2});
+
+  // Doc id = 2: insert 1 data for "prop1".
+  Index(integer_index.get(), kPropertyPath1, /*document_id=*/2, kSectionId1,
+        /*keys=*/{3});
+
+  // Doc id = 3: insert 2 data for "prop2"
+  Index(integer_index.get(), kPropertyPath2, /*document_id=*/3, kSectionId2,
+        /*keys=*/{4});
+
+  // Delete doc id = 1, 3, compress and keep the rest.
+  std::vector<DocumentId> document_id_old_to_new(4, kInvalidDocumentId);
+  document_id_old_to_new[2] = 0;
+
+  DocumentId new_last_added_document_id = 0;
+  EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+                                      new_last_added_document_id),
+              IsOk());
+  EXPECT_THAT(integer_index->last_added_document_id(),
+              Eq(new_last_added_document_id));
+
+  // All data in "prop2" as well as the underlying storage should be deleted, so
+  // when querying "prop2", we should get empty result.
+  EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2,
+                          /*key_lower=*/std::numeric_limits<int64_t>::min(),
+                          /*key_upper=*/std::numeric_limits<int64_t>::max()),
+              IsOkAndHolds(IsEmpty()));
+  if (std::is_same_v<IntegerIndex, TypeParam>) {
+    std::string prop2_storage_working_path =
+        absl_ports::StrCat(this->working_path_, "/", kPropertyPath2);
+    EXPECT_THAT(
+        this->filesystem_.DirectoryExists(prop2_storage_working_path.c_str()),
+        IsFalse());
+  }
+
+  // Verify we can still index and query for "prop2".
+  Index(integer_index.get(), kPropertyPath2, /*document_id=*/100, kSectionId2,
+        /*keys=*/{123});
+  EXPECT_THAT(this->Query(integer_index.get(), kPropertyPath2,
+                          /*key_lower=*/123, /*key_upper=*/123),
+              IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                  /*document_id=*/100, std::vector<SectionId>{kSectionId2}))));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, OptimizeOutOfRangeDocumentId) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        kDefaultSectionId, /*keys=*/{1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        kDefaultSectionId, /*keys=*/{3});
+
+  // Create document_id_old_to_new with size = 2. Optimize should handle out of
+  // range DocumentId properly.
+  std::vector<DocumentId> document_id_old_to_new(2, kInvalidDocumentId);
+
+  EXPECT_THAT(integer_index->Optimize(
+                  document_id_old_to_new,
+                  /*new_last_added_document_id=*/kInvalidDocumentId),
+              IsOk());
+  EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  // Verify all data are discarded after Optimize().
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/std::numeric_limits<int64_t>::min(),
+                          /*key_upper=*/std::numeric_limits<int64_t>::max()),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, OptimizeDeleteAll) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        kDefaultSectionId, /*keys=*/{1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        kDefaultSectionId, /*keys=*/{3});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+        kDefaultSectionId, /*keys=*/{2});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/5,
+        kDefaultSectionId, /*keys=*/{0});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/8,
+        kDefaultSectionId, /*keys=*/{4});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/13,
+        kDefaultSectionId, /*keys=*/{2});
+
+  // Delete all documents.
+  std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+
+  EXPECT_THAT(integer_index->Optimize(
+                  document_id_old_to_new,
+                  /*new_last_added_document_id=*/kInvalidDocumentId),
+              IsOk());
+  EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  // Verify all data are discarded after Optimize().
+  EXPECT_THAT(this->Query(integer_index.get(), kDefaultTestPropertyPath,
+                          /*key_lower=*/std::numeric_limits<int64_t>::min(),
+                          /*key_upper=*/std::numeric_limits<int64_t>::max()),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TYPED_TEST(NumericIndexIntegerTest, Clear) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<NumericIndex<int64_t>> integer_index,
+      this->template CreateIntegerIndex<TypeParam>());
+
+  Index(integer_index.get(), /*property_path=*/"A", /*document_id=*/0,
+        kDefaultSectionId, /*keys=*/{1});
+  Index(integer_index.get(), /*property_path=*/"B", /*document_id=*/1,
+        kDefaultSectionId, /*keys=*/{3});
+  integer_index->set_last_added_document_id(1);
+
+  ASSERT_THAT(integer_index->last_added_document_id(), Eq(1));
+  ASSERT_THAT(
+      this->Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/1,
+                  /*key_upper=*/1),
+      IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+          /*document_id=*/0, std::vector<SectionId>{kDefaultSectionId}))));
+  ASSERT_THAT(
+      this->Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/3,
+                  /*key_upper=*/3),
+      IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+          /*document_id=*/1, std::vector<SectionId>{kDefaultSectionId}))));
+
+  // After resetting, last_added_document_id should be set to
+  // kInvalidDocumentId, and the previous added keys should be deleted.
+  ICING_ASSERT_OK(integer_index->Clear());
+  EXPECT_THAT(integer_index->last_added_document_id(), Eq(kInvalidDocumentId));
+  EXPECT_THAT(
+      this->Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/1,
+                  /*key_upper=*/1),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(
+      this->Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/3,
+                  /*key_upper=*/3),
+      IsOkAndHolds(IsEmpty()));
+
+  // Integer index should be able to work normally after Clear().
+  Index(integer_index.get(), /*property_path=*/"A", /*document_id=*/3,
+        kDefaultSectionId, /*keys=*/{123});
+  Index(integer_index.get(), /*property_path=*/"B", /*document_id=*/4,
+        kDefaultSectionId, /*keys=*/{456});
+  integer_index->set_last_added_document_id(4);
+
+  EXPECT_THAT(integer_index->last_added_document_id(), Eq(4));
+  EXPECT_THAT(
+      this->Query(integer_index.get(), /*property_path=*/"A", /*key_lower=*/123,
+                  /*key_upper=*/123),
+      IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+          /*document_id=*/3, std::vector<SectionId>{kDefaultSectionId}))));
+  EXPECT_THAT(
+      this->Query(integer_index.get(), /*property_path=*/"B", /*key_lower=*/456,
+                  /*key_upper=*/456),
+      IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+          /*document_id=*/4, std::vector<SectionId>{kDefaultSectionId}))));
+}
+
+struct IntegerIndexTestParam {
+  int32_t num_data_threshold_for_bucket_split;
+  bool pre_mapping_fbv;
+
+  explicit IntegerIndexTestParam(int32_t num_data_threshold_for_bucket_split_in,
+                                 bool pre_mapping_fbv_in)
+      : num_data_threshold_for_bucket_split(
+            num_data_threshold_for_bucket_split_in),
+        pre_mapping_fbv(pre_mapping_fbv_in) {}
+};
+
+// Tests for persistent integer index only
+class IntegerIndexTest
+    : public NumericIndexIntegerTest<IntegerIndex>,
+      public ::testing::WithParamInterface<IntegerIndexTestParam> {};
+
+TEST_P(IntegerIndexTest, InvalidWorkingPath) {
+  EXPECT_THAT(
+      IntegerIndex::Create(filesystem_, "/dev/null/integer_index_test",
+                           GetParam().num_data_threshold_for_bucket_split,
+                           GetParam().pre_mapping_fbv),
+      StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_P(IntegerIndexTest, InitializeNewFiles) {
+  {
+    ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem_, working_path_,
+                             GetParam().num_data_threshold_for_bucket_split,
+                             GetParam().pre_mapping_fbv));
+
+    ICING_ASSERT_OK(integer_index->PersistToDisk());
+  }
+
+  // Metadata file should be initialized correctly for both info and crcs
+  // sections.
+  const std::string metadata_file_path =
+      absl_ports::StrCat(working_path_, "/", IntegerIndex::kFilePrefix, ".m");
+  ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+  ASSERT_TRUE(metadata_sfd.is_valid());
+
+  // Check info section
+  Info info;
+  ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+                                IntegerIndex::kInfoMetadataFileOffset));
+  EXPECT_THAT(info.magic, Eq(Info::kMagic));
+  EXPECT_THAT(info.last_added_document_id, Eq(kInvalidDocumentId));
+  EXPECT_THAT(info.num_data_threshold_for_bucket_split,
+              Eq(GetParam().num_data_threshold_for_bucket_split));
+
+  // Check crcs section
+  Crcs crcs;
+  ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+                                IntegerIndex::kCrcsMetadataFileOffset));
+  // There are no storages initially, so storages_crc should be 0.
+  EXPECT_THAT(crcs.component_crcs.storages_crc, Eq(0));
+  EXPECT_THAT(crcs.component_crcs.info_crc,
+              Eq(Crc32(std::string_view(reinterpret_cast<const char*>(&info),
+                                        sizeof(Info)))
+                     .Get()));
+  EXPECT_THAT(crcs.all_crc,
+              Eq(Crc32(std::string_view(
+                           reinterpret_cast<const char*>(&crcs.component_crcs),
+                           sizeof(Crcs::ComponentCrcs)))
+                     .Get()));
+}
+
+TEST_P(IntegerIndexTest,
+       InitializationShouldFailWithoutPersistToDiskOrDestruction) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndex> integer_index,
+      IntegerIndex::Create(filesystem_, working_path_,
+                           GetParam().num_data_threshold_for_bucket_split,
+                           GetParam().pre_mapping_fbv));
+
+  // Insert some data.
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        /*section_id=*/20, /*keys=*/{0, 100, -100});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        /*section_id=*/2, /*keys=*/{3, -1000, 500});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+  // Without calling PersistToDisk, checksums will not be recomputed or synced
+  // to disk, so initializing another instance on the same files should fail.
+  EXPECT_THAT(
+      IntegerIndex::Create(filesystem_, working_path_,
+                           GetParam().num_data_threshold_for_bucket_split,
+                           GetParam().pre_mapping_fbv),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_P(IntegerIndexTest, InitializationShouldSucceedWithPersistToDisk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndex> integer_index1,
+      IntegerIndex::Create(filesystem_, working_path_,
+                           GetParam().num_data_threshold_for_bucket_split,
+                           GetParam().pre_mapping_fbv));
+
+  // Insert some data.
+  Index(integer_index1.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        /*section_id=*/20, /*keys=*/{0, 100, -100});
+  Index(integer_index1.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        /*section_id=*/2, /*keys=*/{3, -1000, 500});
+  Index(integer_index1.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        /*section_id=*/15, /*keys=*/{-6, 321, 98});
+  integer_index1->set_last_added_document_id(2);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<DocHitInfo> doc_hit_info_vec,
+      Query(integer_index1.get(), kDefaultTestPropertyPath,
+            /*key_lower=*/std::numeric_limits<int64_t>::min(),
+            /*key_upper=*/std::numeric_limits<int64_t>::max()));
+
+  // After calling PersistToDisk, all checksums should be recomputed and synced
+  // correctly to disk, so initializing another instance on the same files
+  // should succeed, and we should be able to get the same contents.
+  ICING_EXPECT_OK(integer_index1->PersistToDisk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndex> integer_index2,
+      IntegerIndex::Create(filesystem_, working_path_,
+                           GetParam().num_data_threshold_for_bucket_split,
+                           GetParam().pre_mapping_fbv));
+  EXPECT_THAT(integer_index2->last_added_document_id(), Eq(2));
+  EXPECT_THAT(Query(integer_index2.get(), kDefaultTestPropertyPath,
+                    /*key_lower=*/std::numeric_limits<int64_t>::min(),
+                    /*key_upper=*/std::numeric_limits<int64_t>::max()),
+              IsOkAndHolds(ElementsAreArray(doc_hit_info_vec.begin(),
+                                            doc_hit_info_vec.end())));
+}
+
+TEST_P(IntegerIndexTest, InitializationShouldSucceedAfterDestruction) {
+  std::vector<DocHitInfo> doc_hit_info_vec;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem_, working_path_,
+                             GetParam().num_data_threshold_for_bucket_split,
+                             GetParam().pre_mapping_fbv));
+
+    // Insert some data.
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+          /*section_id=*/20, /*keys=*/{0, 100, -100});
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+          /*section_id=*/2, /*keys=*/{3, -1000, 500});
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+          /*section_id=*/15, /*keys=*/{-6, 321, 98});
+    integer_index->set_last_added_document_id(2);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        doc_hit_info_vec,
+        Query(integer_index.get(), kDefaultTestPropertyPath,
+              /*key_lower=*/std::numeric_limits<int64_t>::min(),
+              /*key_upper=*/std::numeric_limits<int64_t>::max()));
+  }
+
+  {
+    // The previous instance went out of scope and was destructed. Although we
+    // didn't call PersistToDisk explicitly, the destructor should invoke it and
+    // thus initializing another instance on the same files should succeed, and
+    // we should be able to get the same contents.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem_, working_path_,
+                             GetParam().num_data_threshold_for_bucket_split,
+                             GetParam().pre_mapping_fbv));
+    EXPECT_THAT(integer_index->last_added_document_id(), Eq(2));
+    EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
+                      /*key_lower=*/std::numeric_limits<int64_t>::min(),
+                      /*key_upper=*/std::numeric_limits<int64_t>::max()),
+                IsOkAndHolds(ElementsAreArray(doc_hit_info_vec.begin(),
+                                              doc_hit_info_vec.end())));
+  }
+}
+
+TEST_P(IntegerIndexTest, InitializeExistingFilesWithWrongAllCrcShouldFail) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem_, working_path_,
+                             GetParam().num_data_threshold_for_bucket_split,
+                             GetParam().pre_mapping_fbv));
+    // Insert some data.
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+          /*section_id=*/20, /*keys=*/{0, 100, -100});
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+          /*section_id=*/2, /*keys=*/{3, -1000, 500});
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+          /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+    ICING_ASSERT_OK(integer_index->PersistToDisk());
+  }
+
+  const std::string metadata_file_path =
+      absl_ports::StrCat(working_path_, "/", IntegerIndex::kFilePrefix, ".m");
+  ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+  ASSERT_TRUE(metadata_sfd.is_valid());
+
+  Crcs crcs;
+  ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+                                IntegerIndex::kCrcsMetadataFileOffset));
+
+  // Manually corrupt all_crc
+  crcs.all_crc += kCorruptedValueOffset;
+  ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+                                 IntegerIndexStorage::kCrcsMetadataFileOffset,
+                                 &crcs, sizeof(Crcs)));
+  metadata_sfd.reset();
+
+  {
+    // Attempt to create the integer index with metadata containing corrupted
+    // all_crc. This should fail.
+    libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+        integer_index_or =
+            IntegerIndex::Create(filesystem_, working_path_,
+                                 GetParam().num_data_threshold_for_bucket_split,
+                                 GetParam().pre_mapping_fbv);
+    EXPECT_THAT(integer_index_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(integer_index_or.status().error_message(),
+                HasSubstr("Invalid all crc"));
+  }
+}
+
+TEST_P(IntegerIndexTest, InitializeExistingFilesWithCorruptedInfoShouldFail) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem_, working_path_,
+                             GetParam().num_data_threshold_for_bucket_split,
+                             GetParam().pre_mapping_fbv));
+    // Insert some data.
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+          /*section_id=*/20, /*keys=*/{0, 100, -100});
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+          /*section_id=*/2, /*keys=*/{3, -1000, 500});
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+          /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+    ICING_ASSERT_OK(integer_index->PersistToDisk());
+  }
+
+  const std::string metadata_file_path =
+      absl_ports::StrCat(working_path_, "/", IntegerIndex::kFilePrefix, ".m");
+  ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+  ASSERT_TRUE(metadata_sfd.is_valid());
+
+  Info info;
+  ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+                                IntegerIndex::kInfoMetadataFileOffset));
+
+  // Modify info, but don't update the checksum. This would be similar to
+  // corruption of info.
+  info.last_added_document_id += kCorruptedValueOffset;
+  ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+                                 IntegerIndex::kInfoMetadataFileOffset, &info,
+                                 sizeof(Info)));
+  metadata_sfd.reset();
+
+  {
+    // Attempt to create the integer index with info that doesn't match its
+    // checksum and confirm that it fails.
+    libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+        integer_index_or =
+            IntegerIndex::Create(filesystem_, working_path_,
+                                 GetParam().num_data_threshold_for_bucket_split,
+                                 GetParam().pre_mapping_fbv);
+    EXPECT_THAT(integer_index_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(integer_index_or.status().error_message(),
+                HasSubstr("Invalid info crc"));
+  }
+}
+
+TEST_P(IntegerIndexTest,
+       InitializeExistingFilesWithCorruptedStoragesShouldFail) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem_, working_path_,
+                             GetParam().num_data_threshold_for_bucket_split,
+                             GetParam().pre_mapping_fbv));
+    // Insert some data.
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+          /*section_id=*/20, /*keys=*/{0, 100, -100});
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+          /*section_id=*/2, /*keys=*/{3, -1000, 500});
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+          /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+    ICING_ASSERT_OK(integer_index->PersistToDisk());
+  }
+
+  {
+    // Corrupt integer index storage for kDefaultTestPropertyPath manually.
+    PostingListIntegerIndexSerializer posting_list_integer_index_serializer;
+    std::string storage_working_path =
+        absl_ports::StrCat(working_path_, "/", kDefaultTestPropertyPath);
+    ASSERT_TRUE(filesystem_.DirectoryExists(storage_working_path.c_str()));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndexStorage> storage,
+        IntegerIndexStorage::Create(
+            filesystem_, std::move(storage_working_path),
+            IntegerIndexStorage::Options(
+                GetParam().num_data_threshold_for_bucket_split,
+                GetParam().pre_mapping_fbv),
+            &posting_list_integer_index_serializer));
+    ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, /*section_id=*/4,
+                                     /*new_keys=*/{3, 4, 5}));
+
+    ICING_ASSERT_OK(storage->PersistToDisk());
+  }
+
+  {
+    // Attempt to create the integer index with corrupted storages. This should
+    // fail.
+    libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+        integer_index_or =
+            IntegerIndex::Create(filesystem_, working_path_,
+                                 GetParam().num_data_threshold_for_bucket_split,
+                                 GetParam().pre_mapping_fbv);
+    EXPECT_THAT(integer_index_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(integer_index_or.status().error_message(),
+                HasSubstr("Invalid storages crc"));
+  }
+}
+
+TEST_P(
+    IntegerIndexTest,
+    InitializeExistingFilesWithMismatchNumDataThresholdForBucketSplitShouldFail) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem_, working_path_,
+                             GetParam().num_data_threshold_for_bucket_split,
+                             GetParam().pre_mapping_fbv));
+    // Insert some data.
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+          /*section_id=*/20, /*keys=*/{0, 100, -100});
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+          /*section_id=*/2, /*keys=*/{3, -1000, 500});
+    Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+          /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+    ICING_ASSERT_OK(integer_index->PersistToDisk());
+  }
+
+  {
+    // Attempt to create the integer index with different
+    // num_data_threshold_for_bucket_split. This should fail.
+    libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+        integer_index_or = IntegerIndex::Create(
+            filesystem_, working_path_,
+            GetParam().num_data_threshold_for_bucket_split + 1,
+            GetParam().pre_mapping_fbv);
+    EXPECT_THAT(integer_index_or,
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+    EXPECT_THAT(integer_index_or.status().error_message(),
+                HasSubstr("Mismatch num_data_threshold_for_bucket_split"));
+  }
+}
+
+TEST_P(IntegerIndexTest, WildcardStoragePersistenceQuery) {
+  // This test sets its schema assuming that max property storages == 32.
+  ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32));
+
+  PropertyConfigProto int_property_config =
+      PropertyConfigBuilder()
+          .SetName("otherProperty1")
+          .SetCardinality(CARDINALITY_REPEATED)
+          .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+          .Build();
+  // Create a schema with two types:
+  // - TypeA has 34 properties:
+  //     'desiredProperty', 'otherProperty'*, 'undesiredProperty'
+  // - TypeB has 2 properties: 'anotherProperty', 'desiredProperty'
+  // 1. The 32 'otherProperty's will consume all of the individual storages
+  // 2. TypeA.desiredProperty and TypeB.anotherProperty will both be assigned
+  //    SectionId = 0 for their respective types.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("TypeA")
+                       .AddProperty(int_property_config)
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty2"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty3"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty4"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty5"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty6"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty7"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty8"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty9"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty10"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty11"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty12"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty13"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty14"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty15"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty16"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty17"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty18"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty19"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty20"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty21"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty22"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty23"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty24"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty25"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty26"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty27"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty28"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty29"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty30"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty31"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty32"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("desiredProperty"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("undesiredProperty")))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("TypeB")
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("anotherProperty"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("desiredProperty")))
+          .Build();
+  ICING_ASSERT_OK(this->schema_store_->SetSchema(
+      schema,
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Ids are assigned alphabetically, so the property ids are:
+  // TypeA.desiredProperty = 0
+  // TypeA.otherPropertyN = N
+  // TypeA.undesiredProperty = 33
+  // TypeB.anotherProperty = 0
+  // TypeB.desiredProperty = 1
+  SectionId typea_desired_prop_id = 0;
+  SectionId typea_undesired_prop_id = 33;
+  SectionId typeb_another_prop_id = 0;
+  SectionId typeb_desired_prop_id = 1;
+  std::string desired_property = "desiredProperty";
+  std::string undesired_property = "undesiredProperty";
+  std::string another_property = "anotherProperty";
+
+  // Put 11 docs of "TypeA" into the document store.
+  DocumentProto doc =
+      DocumentBuilder().SetKey("ns1", "uri0").SetSchema("TypeA").Build();
+  ICING_ASSERT_OK(this->doc_store_->Put(doc));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri8").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri9").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri10").Build()));
+
+  // Put 10 docs of "TypeB" into the document store.
+  doc = DocumentBuilder(doc).SetUri("uri11").SetSchema("TypeB").Build();
+  ICING_ASSERT_OK(this->doc_store_->Put(doc));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri12").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri13").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri14").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri15").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri16").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri17").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri18").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri19").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri20").Build()));
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem_, working_path_,
+                             GetParam().num_data_threshold_for_bucket_split,
+                             GetParam().pre_mapping_fbv));
+
+    // Index numeric content for other properties to force our property into the
+    // wildcard storage.
+    std::string other_property_path = "otherProperty";
+    for (int i = 1; i <= IntegerIndex::kMaxPropertyStorages; ++i) {
+      Index(integer_index.get(),
+            absl_ports::StrCat(other_property_path, std::to_string(i)),
+            /*document_id=*/0, /*section_id=*/i, /*keys=*/{i});
+    }
+
+    // Index numeric content for TypeA.desiredProperty
+    Index(integer_index.get(), desired_property, /*document_id=*/0,
+          typea_desired_prop_id, /*keys=*/{1});
+    Index(integer_index.get(), desired_property, /*document_id=*/1,
+          typea_desired_prop_id, /*keys=*/{3});
+    Index(integer_index.get(), desired_property, /*document_id=*/2,
+          typea_desired_prop_id, /*keys=*/{2});
+    Index(integer_index.get(), desired_property, /*document_id=*/3,
+          typea_desired_prop_id, /*keys=*/{0});
+    Index(integer_index.get(), desired_property, /*document_id=*/4,
+          typea_desired_prop_id, /*keys=*/{4});
+    Index(integer_index.get(), desired_property, /*document_id=*/5,
+          typea_desired_prop_id, /*keys=*/{2});
+
+    // Index the same numeric content for TypeA.undesiredProperty
+    Index(integer_index.get(), undesired_property, /*document_id=*/6,
+          typea_undesired_prop_id, /*keys=*/{3});
+    Index(integer_index.get(), undesired_property, /*document_id=*/7,
+          typea_undesired_prop_id, /*keys=*/{2});
+    Index(integer_index.get(), undesired_property, /*document_id=*/8,
+          typea_undesired_prop_id, /*keys=*/{0});
+    Index(integer_index.get(), undesired_property, /*document_id=*/9,
+          typea_undesired_prop_id, /*keys=*/{4});
+    Index(integer_index.get(), undesired_property, /*document_id=*/10,
+          typea_undesired_prop_id, /*keys=*/{2});
+
+    // Index the same numeric content for TypeB.undesiredProperty
+    Index(integer_index.get(), another_property, /*document_id=*/11,
+          typeb_another_prop_id, /*keys=*/{3});
+    Index(integer_index.get(), another_property, /*document_id=*/12,
+          typeb_another_prop_id, /*keys=*/{2});
+    Index(integer_index.get(), another_property, /*document_id=*/13,
+          typeb_another_prop_id, /*keys=*/{0});
+    Index(integer_index.get(), another_property, /*document_id=*/14,
+          typeb_another_prop_id, /*keys=*/{4});
+    Index(integer_index.get(), another_property, /*document_id=*/15,
+          typeb_another_prop_id, /*keys=*/{2});
+
+    // Finally, index the same numeric content for TypeB.desiredProperty
+    Index(integer_index.get(), desired_property, /*document_id=*/16,
+          typeb_desired_prop_id, /*keys=*/{3});
+    Index(integer_index.get(), desired_property, /*document_id=*/17,
+          typeb_desired_prop_id, /*keys=*/{2});
+    Index(integer_index.get(), desired_property, /*document_id=*/18,
+          typeb_desired_prop_id, /*keys=*/{0});
+    Index(integer_index.get(), desired_property, /*document_id=*/19,
+          typeb_desired_prop_id, /*keys=*/{4});
+    Index(integer_index.get(), desired_property, /*document_id=*/20,
+          typeb_desired_prop_id, /*keys=*/{2});
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndex> integer_index,
+      IntegerIndex::Create(filesystem_, working_path_,
+                           GetParam().num_data_threshold_for_bucket_split,
+                           GetParam().pre_mapping_fbv));
+
+  EXPECT_THAT(integer_index->num_property_indices(), Eq(33));
+
+  // Only the hits for 'desired_prop_id' should be returned.
+  std::vector<SectionId> expected_sections_typea = {typea_desired_prop_id};
+  std::vector<SectionId> expected_sections_typeb = {typeb_desired_prop_id};
+  EXPECT_THAT(
+      Query(integer_index.get(), desired_property,
+            /*key_lower=*/2, /*key_upper=*/2),
+      IsOkAndHolds(ElementsAre(
+          EqualsDocHitInfo(/*document_id=*/20, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/17, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/5, expected_sections_typea),
+          EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea))));
+
+  EXPECT_THAT(
+      Query(integer_index.get(), desired_property,
+            /*key_lower=*/1, /*key_upper=*/3),
+      IsOkAndHolds(ElementsAre(
+          EqualsDocHitInfo(/*document_id=*/20, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/17, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/16, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/5, expected_sections_typea),
+          EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea),
+          EqualsDocHitInfo(/*document_id=*/1, expected_sections_typea),
+          EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea))));
+}
+
+TEST_P(IntegerIndexTest,
+       IntegerIndexShouldWorkAfterOptimizeAndReinitialization) {
+  constexpr std::string_view kPropertyPath1 = "prop1";
+  constexpr SectionId kSectionId1 = 0;
+  constexpr std::string_view kPropertyPath2 = "prop2";
+  constexpr SectionId kSectionId2 = 1;
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem_, working_path_,
+                             GetParam().num_data_threshold_for_bucket_split,
+                             GetParam().pre_mapping_fbv));
+
+    // Doc id = 1: insert 2 data for "prop1", "prop2"
+    Index(integer_index.get(), kPropertyPath2, /*document_id=*/1, kSectionId2,
+          /*keys=*/{1});
+    Index(integer_index.get(), kPropertyPath1, /*document_id=*/1, kSectionId1,
+          /*keys=*/{2});
+
+    // Doc id = 2: insert 1 data for "prop1".
+    Index(integer_index.get(), kPropertyPath1, /*document_id=*/2, kSectionId1,
+          /*keys=*/{3});
+
+    // Doc id = 3: insert 2 data for "prop2"
+    Index(integer_index.get(), kPropertyPath2, /*document_id=*/3, kSectionId2,
+          /*keys=*/{4});
+
+    // Doc id = 5: insert 3 data for "prop1", "prop2"
+    Index(integer_index.get(), kPropertyPath2, /*document_id=*/5, kSectionId2,
+          /*keys=*/{1});
+    Index(integer_index.get(), kPropertyPath1, /*document_id=*/5, kSectionId1,
+          /*keys=*/{2});
+
+    // Doc id = 8: insert 1 data for "prop2".
+    Index(integer_index.get(), kPropertyPath2, /*document_id=*/8, kSectionId2,
+          /*keys=*/{3});
+
+    // Doc id = 13: insert 1 data for "prop1".
+    Index(integer_index.get(), kPropertyPath1, /*document_id=*/13, kSectionId1,
+          /*keys=*/{4});
+
+    // Delete doc id = 3, 5, compress and keep the rest.
+    std::vector<DocumentId> document_id_old_to_new(14, kInvalidDocumentId);
+    document_id_old_to_new[1] = 0;
+    document_id_old_to_new[2] = 1;
+    document_id_old_to_new[8] = 2;
+    document_id_old_to_new[13] = 3;
+
+    DocumentId new_last_added_document_id = 3;
+    EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+                                        new_last_added_document_id),
+                IsOk());
+    EXPECT_THAT(integer_index->last_added_document_id(),
+                Eq(new_last_added_document_id));
+  }
+
+  {
+    // Reinitialize IntegerIndex and verify index and query API still work
+    // normally.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem_, working_path_,
+                             GetParam().num_data_threshold_for_bucket_split,
+                             GetParam().pre_mapping_fbv));
+
+    // Key = 1
+    EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/1,
+                      /*key_upper=*/1),
+                IsOkAndHolds(IsEmpty()));
+    EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/1,
+                      /*key_upper=*/1),
+                IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                    /*document_id=*/0, std::vector<SectionId>{kSectionId2}))));
+
+    // key = 2
+    EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/2,
+                      /*key_upper=*/2),
+                IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                    /*document_id=*/0, std::vector<SectionId>{kSectionId1}))));
+    EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/2,
+                      /*key_upper=*/2),
+                IsOkAndHolds(IsEmpty()));
+
+    // key = 3
+    EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/3,
+                      /*key_upper=*/3),
+                IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                    /*document_id=*/1, std::vector<SectionId>{kSectionId1}))));
+    EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/3,
+                      /*key_upper=*/3),
+                IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                    /*document_id=*/2, std::vector<SectionId>{kSectionId2}))));
+
+    // key = 4
+    EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/4,
+                      /*key_upper=*/4),
+                IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                    /*document_id=*/3, std::vector<SectionId>{kSectionId1}))));
+    EXPECT_THAT(Query(integer_index.get(), kPropertyPath2, /*key_lower=*/4,
+                      /*key_upper=*/4),
+                IsOkAndHolds(IsEmpty()));
+
+    // Index new data.
+    Index(integer_index.get(), kPropertyPath2, /*document_id=*/100, kSectionId2,
+          /*keys=*/{123});
+    Index(integer_index.get(), kPropertyPath1, /*document_id=*/100, kSectionId1,
+          /*keys=*/{456});
+    EXPECT_THAT(
+        Query(integer_index.get(), kPropertyPath2, /*key_lower=*/123,
+              /*key_upper=*/456),
+        IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+            /*document_id=*/100, std::vector<SectionId>{kSectionId2}))));
+    EXPECT_THAT(
+        Query(integer_index.get(), kPropertyPath1, /*key_lower=*/123,
+              /*key_upper=*/456),
+        IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+            /*document_id=*/100, std::vector<SectionId>{kSectionId1}))));
+  }
+}
+
+TEST_P(IntegerIndexTest, WildcardStorageWorksAfterOptimize) {
+  // This test sets its schema assuming that max property storages == 32.
+  ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32));
+
+  PropertyConfigProto int_property_config =
+      PropertyConfigBuilder()
+          .SetName("otherProperty1")
+          .SetCardinality(CARDINALITY_REPEATED)
+          .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+          .Build();
+  // Create a schema with two types:
+  // - TypeA has 34 properties:
+  //     'desiredProperty', 'otherProperty'*, 'undesiredProperty'
+  // - TypeB has 2 properties: 'anotherProperty', 'desiredProperty'
+  // 1. The 32 'otherProperty's will consume all of the individual storages
+  // 2. TypeA.desiredProperty and TypeB.anotherProperty will both be assigned
+  //    SectionId = 0 for their respective types.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("TypeA")
+                       .AddProperty(int_property_config)
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty2"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty3"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty4"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty5"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty6"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty7"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty8"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty9"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty10"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty11"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty12"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty13"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty14"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty15"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty16"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty17"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty18"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty19"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty20"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty21"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty22"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty23"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty24"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty25"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty26"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty27"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty28"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty29"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty30"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty31"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty32"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("desiredProperty"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("undesiredProperty")))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("TypeB")
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("anotherProperty"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("desiredProperty")))
+          .Build();
+  ICING_ASSERT_OK(this->schema_store_->SetSchema(
+      schema,
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Ids are assigned alphabetically, so the property ids are:
+  // TypeA.desiredProperty = 0
+  // TypeA.otherPropertyN = N
+  // TypeA.undesiredProperty = 33
+  // TypeB.anotherProperty = 0
+  // TypeB.desiredProperty = 1
+  SectionId typea_desired_prop_id = 0;
+  SectionId typea_undesired_prop_id = 33;
+  SectionId typeb_another_prop_id = 0;
+  SectionId typeb_desired_prop_id = 1;
+  std::string desired_property = "desiredProperty";
+  std::string undesired_property = "undesiredProperty";
+  std::string another_property = "anotherProperty";
+
+  // Only the hits for 'desired_prop_id' should be returned.
+  std::vector<SectionId> expected_sections_typea = {typea_desired_prop_id};
+  std::vector<SectionId> expected_sections_typeb = {typeb_desired_prop_id};
+
+  // Put 11 docs of "TypeA" into the document store.
+  DocumentProto doc =
+      DocumentBuilder().SetKey("ns1", "uri0").SetSchema("TypeA").Build();
+  ICING_ASSERT_OK(this->doc_store_->Put(doc));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri8").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri9").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri10").Build()));
+
+  // Put 10 docs of "TypeB" into the document store.
+  doc = DocumentBuilder(doc).SetUri("uri11").SetSchema("TypeB").Build();
+  ICING_ASSERT_OK(this->doc_store_->Put(doc));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri12").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri13").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri14").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri15").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri16").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri17").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri18").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri19").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri20").Build()));
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem_, working_path_,
+                             GetParam().num_data_threshold_for_bucket_split,
+                             GetParam().pre_mapping_fbv));
+
+    // Index numeric content for other properties to force our property into the
+    // wildcard storage.
+    std::string other_property_path = "otherProperty";
+    for (int i = 1; i <= IntegerIndex::kMaxPropertyStorages; ++i) {
+      Index(integer_index.get(),
+            absl_ports::StrCat(other_property_path, std::to_string(i)),
+            /*document_id=*/0, /*section_id=*/i, /*keys=*/{i});
+    }
+
+    // Index numeric content for TypeA.desiredProperty
+    Index(integer_index.get(), desired_property, /*document_id=*/0,
+          typea_desired_prop_id, /*keys=*/{1});
+    Index(integer_index.get(), desired_property, /*document_id=*/1,
+          typea_desired_prop_id, /*keys=*/{3});
+    Index(integer_index.get(), desired_property, /*document_id=*/2,
+          typea_desired_prop_id, /*keys=*/{2});
+    Index(integer_index.get(), desired_property, /*document_id=*/3,
+          typea_desired_prop_id, /*keys=*/{0});
+    Index(integer_index.get(), desired_property, /*document_id=*/4,
+          typea_desired_prop_id, /*keys=*/{4});
+    Index(integer_index.get(), desired_property, /*document_id=*/5,
+          typea_desired_prop_id, /*keys=*/{2});
+
+    // Index the same numeric content for TypeA.undesiredProperty
+    Index(integer_index.get(), undesired_property, /*document_id=*/6,
+          typea_undesired_prop_id, /*keys=*/{3});
+    Index(integer_index.get(), undesired_property, /*document_id=*/7,
+          typea_undesired_prop_id, /*keys=*/{2});
+    Index(integer_index.get(), undesired_property, /*document_id=*/8,
+          typea_undesired_prop_id, /*keys=*/{0});
+    Index(integer_index.get(), undesired_property, /*document_id=*/9,
+          typea_undesired_prop_id, /*keys=*/{4});
+    Index(integer_index.get(), undesired_property, /*document_id=*/10,
+          typea_undesired_prop_id, /*keys=*/{2});
+
+    // Index the same numeric content for TypeB.undesiredProperty
+    Index(integer_index.get(), another_property, /*document_id=*/11,
+          typeb_another_prop_id, /*keys=*/{3});
+    Index(integer_index.get(), another_property, /*document_id=*/12,
+          typeb_another_prop_id, /*keys=*/{2});
+    Index(integer_index.get(), another_property, /*document_id=*/13,
+          typeb_another_prop_id, /*keys=*/{0});
+    Index(integer_index.get(), another_property, /*document_id=*/14,
+          typeb_another_prop_id, /*keys=*/{4});
+    Index(integer_index.get(), another_property, /*document_id=*/15,
+          typeb_another_prop_id, /*keys=*/{2});
+
+    // Finally, index the same numeric content for TypeB.desiredProperty
+    Index(integer_index.get(), desired_property, /*document_id=*/16,
+          typeb_desired_prop_id, /*keys=*/{3});
+    Index(integer_index.get(), desired_property, /*document_id=*/17,
+          typeb_desired_prop_id, /*keys=*/{2});
+    Index(integer_index.get(), desired_property, /*document_id=*/18,
+          typeb_desired_prop_id, /*keys=*/{0});
+    Index(integer_index.get(), desired_property, /*document_id=*/19,
+          typeb_desired_prop_id, /*keys=*/{4});
+    Index(integer_index.get(), desired_property, /*document_id=*/20,
+          typeb_desired_prop_id, /*keys=*/{2});
+
+    ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/3,
+                                       clock_.GetSystemTimeMilliseconds()));
+    ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/5,
+                                       clock_.GetSystemTimeMilliseconds()));
+    // Delete doc id = 3, 5, compress and keep the rest.
+    ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocumentId> document_id_old_to_new,
+                               CompactDocStore());
+
+    DocumentId new_last_added_document_id = 18;
+    EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+                                        new_last_added_document_id),
+                IsOk());
+    EXPECT_THAT(integer_index->last_added_document_id(),
+                Eq(new_last_added_document_id));
+
+    EXPECT_THAT(
+        Query(integer_index.get(), desired_property,
+              /*key_lower=*/2, /*key_upper=*/2),
+        IsOkAndHolds(ElementsAre(
+            EqualsDocHitInfo(/*document_id=*/20 - 2, expected_sections_typeb),
+            EqualsDocHitInfo(/*document_id=*/17 - 2, expected_sections_typeb),
+            EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea))));
+
+    EXPECT_THAT(
+        Query(integer_index.get(), desired_property,
+              /*key_lower=*/1, /*key_upper=*/3),
+        IsOkAndHolds(ElementsAre(
+            EqualsDocHitInfo(/*document_id=*/20 - 2, expected_sections_typeb),
+            EqualsDocHitInfo(/*document_id=*/17 - 2, expected_sections_typeb),
+            EqualsDocHitInfo(/*document_id=*/16 - 2, expected_sections_typeb),
+            EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea),
+            EqualsDocHitInfo(/*document_id=*/1, expected_sections_typea),
+            EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea))));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndex> integer_index,
+      IntegerIndex::Create(filesystem_, working_path_,
+                           GetParam().num_data_threshold_for_bucket_split,
+                           GetParam().pre_mapping_fbv));
+
+  EXPECT_THAT(integer_index->num_property_indices(), Eq(33));
+
+  EXPECT_THAT(
+      Query(integer_index.get(), desired_property,
+            /*key_lower=*/2, /*key_upper=*/2),
+      IsOkAndHolds(ElementsAre(
+          EqualsDocHitInfo(/*document_id=*/20 - 2, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/17 - 2, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea))));
+
+  EXPECT_THAT(
+      Query(integer_index.get(), desired_property,
+            /*key_lower=*/1, /*key_upper=*/3),
+      IsOkAndHolds(ElementsAre(
+          EqualsDocHitInfo(/*document_id=*/20 - 2, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/17 - 2, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/16 - 2, expected_sections_typeb),
+          EqualsDocHitInfo(/*document_id=*/2, expected_sections_typea),
+          EqualsDocHitInfo(/*document_id=*/1, expected_sections_typea),
+          EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea))));
+}
+
+// This test covers the situation where Optimize causes us to throw out some of
+// the individual index storages (because they don't have any hits anymore).
+// In this case, any properties that added content to the wildcard storage (even
+// if all of their content was also deleted) should still be placed in the
+// wildcard storage.
+TEST_P(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) {
+  // This test sets its schema assuming that max property storages == 32.
+  ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32));
+
+  PropertyConfigProto int_property_config =
+      PropertyConfigBuilder()
+          .SetName("otherProperty1")
+          .SetCardinality(CARDINALITY_REPEATED)
+          .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+          .Build();
+  // Create a schema with two types:
+  // - TypeA has 34 properties:
+  //     'desiredProperty', 'otherProperty'*, 'undesiredProperty'
+  // - TypeB has 2 properties: 'anotherProperty', 'desiredProperty'
+  // 1. The 32 'otherProperty's will consume all of the individual storages
+  // 2. TypeA.desiredProperty and TypeB.anotherProperty will both be assigned
+  //    SectionId = 0 for their respective types.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("TypeA")
+                       .AddProperty(int_property_config)
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty2"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty3"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty4"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty5"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty6"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty7"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty8"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty9"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty10"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty11"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty12"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty13"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty14"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty15"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty16"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty17"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty18"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty19"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty20"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty21"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty22"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty23"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty24"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty25"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty26"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty27"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty28"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty29"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty30"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty31"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("otherProperty32"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("desiredProperty"))
+                       .AddProperty(PropertyConfigBuilder(int_property_config)
+                                        .SetName("undesiredProperty")))
+          .Build();
+  ICING_ASSERT_OK(this->schema_store_->SetSchema(
+      schema,
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Ids are assigned alphabetically, so the property ids are:
+  // TypeA.desiredProperty = 0
+  // TypeA.otherPropertyN = N
+  // TypeA.undesiredProperty = 33
+  // TypeB.anotherProperty = 0
+  // TypeB.desiredProperty = 1
+  SectionId typea_desired_prop_id = 0;
+  SectionId typea_undesired_prop_id = 33;
+  SectionId typea_other1_prop_id = 1;
+  std::string desired_property = "desiredProperty";
+  std::string undesired_property = "undesiredProperty";
+  std::string another_property = "anotherProperty";
+  std::string other_property_1 = "otherProperty1";
+
+  // Only the hits for 'desired_prop_id' should be returned.
+  std::vector<SectionId> expected_sections_typea = {typea_desired_prop_id};
+
+  // Put 11 docs of "TypeA" into the document store.
+  DocumentProto doc =
+      DocumentBuilder().SetKey("ns1", "uri0").SetSchema("TypeA").Build();
+  ICING_ASSERT_OK(this->doc_store_->Put(doc));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri8").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri9").Build()));
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri10").Build()));
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<IntegerIndex> integer_index,
+        IntegerIndex::Create(filesystem_, working_path_,
+                             GetParam().num_data_threshold_for_bucket_split,
+                             GetParam().pre_mapping_fbv));
+
+    // Index numeric content for other properties to force our property into the
+    // wildcard storage.
+    std::string other_property_path = "otherProperty";
+    for (int i = 1; i <= IntegerIndex::kMaxPropertyStorages; ++i) {
+      Index(integer_index.get(),
+            absl_ports::StrCat(other_property_path, std::to_string(i)),
+            /*document_id=*/0, /*section_id=*/i, /*keys=*/{i});
+    }
+
+    // Index numeric content for TypeA.desiredProperty
+    Index(integer_index.get(), desired_property, /*document_id=*/0,
+          typea_desired_prop_id, /*keys=*/{1});
+    Index(integer_index.get(), desired_property, /*document_id=*/1,
+          typea_desired_prop_id, /*keys=*/{3});
+    Index(integer_index.get(), desired_property, /*document_id=*/2,
+          typea_desired_prop_id, /*keys=*/{2});
+    Index(integer_index.get(), desired_property, /*document_id=*/3,
+          typea_desired_prop_id, /*keys=*/{0});
+    Index(integer_index.get(), desired_property, /*document_id=*/4,
+          typea_desired_prop_id, /*keys=*/{4});
+    Index(integer_index.get(), desired_property, /*document_id=*/5,
+          typea_desired_prop_id, /*keys=*/{2});
+
+    // Index the same numeric content for TypeA.undesiredProperty
+    Index(integer_index.get(), undesired_property, /*document_id=*/6,
+          typea_undesired_prop_id, /*keys=*/{3});
+    Index(integer_index.get(), undesired_property, /*document_id=*/7,
+          typea_undesired_prop_id, /*keys=*/{2});
+    Index(integer_index.get(), undesired_property, /*document_id=*/8,
+          typea_undesired_prop_id, /*keys=*/{0});
+    Index(integer_index.get(), undesired_property, /*document_id=*/9,
+          typea_undesired_prop_id, /*keys=*/{4});
+    Index(integer_index.get(), undesired_property, /*document_id=*/10,
+          typea_undesired_prop_id, /*keys=*/{2});
+
+    // Delete all the docs that had hits in otherProperty* and
+    // undesiredProperty.
+    ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/0,
+                                       clock_.GetSystemTimeMilliseconds()));
+    ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/6,
+                                       clock_.GetSystemTimeMilliseconds()));
+    ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/7,
+                                       clock_.GetSystemTimeMilliseconds()));
+    ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/8,
+                                       clock_.GetSystemTimeMilliseconds()));
+    ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/9,
+                                       clock_.GetSystemTimeMilliseconds()));
+    ICING_ASSERT_OK(doc_store_->Delete(/*document_id=*/10,
+                                       clock_.GetSystemTimeMilliseconds()));
+    // Delete doc id = 0, 6, 7, 8, 9, 10. Compress and keep the rest.
+    ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocumentId> document_id_old_to_new,
+                               CompactDocStore());
+
+    DocumentId new_last_added_document_id = 5 - 1;
+    EXPECT_THAT(integer_index->Optimize(document_id_old_to_new,
+                                        new_last_added_document_id),
+                IsOk());
+    EXPECT_THAT(integer_index->last_added_document_id(),
+                Eq(new_last_added_document_id));
+
+    EXPECT_THAT(
+        Query(integer_index.get(), desired_property,
+              /*key_lower=*/2, /*key_upper=*/2),
+        IsOkAndHolds(ElementsAre(
+            EqualsDocHitInfo(/*document_id=*/5 - 1, expected_sections_typea),
+            EqualsDocHitInfo(/*document_id=*/2 - 1, expected_sections_typea))));
+
+    EXPECT_THAT(
+        Query(integer_index.get(), desired_property,
+              /*key_lower=*/1, /*key_upper=*/3),
+        IsOkAndHolds(ElementsAre(
+            EqualsDocHitInfo(/*document_id=*/5 - 1, expected_sections_typea),
+            EqualsDocHitInfo(/*document_id=*/2 - 1, expected_sections_typea),
+            EqualsDocHitInfo(/*document_id=*/1 - 1, expected_sections_typea))));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndex> integer_index,
+      IntegerIndex::Create(filesystem_, working_path_,
+                           GetParam().num_data_threshold_for_bucket_split,
+                           GetParam().pre_mapping_fbv));
+
+  EXPECT_THAT(integer_index->num_property_indices(), Eq(1));
+
+  // Add a new doc (docid==5) and a hit for desiredProperty. This should still
+  // be placed into the wildcard integer storage.
+  doc = DocumentBuilder().SetKey("ns1", "uri11").SetSchema("TypeA").Build();
+  ICING_ASSERT_OK(this->doc_store_->Put(doc));
+  Index(integer_index.get(), desired_property, /*document_id=*/5,
+        typea_desired_prop_id, /*keys=*/{12});
+  EXPECT_THAT(integer_index->num_property_indices(), Eq(1));
+
+  EXPECT_THAT(Query(integer_index.get(), desired_property,
+                    /*key_lower=*/12, /*key_upper=*/12),
+              IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                  /*document_id=*/5, expected_sections_typea))));
+
+  // Add a new doc (docid==6) and a hit for undesiredProperty. This should still
+  // be placed into the wildcard integer storage.
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri12").Build()));
+  Index(integer_index.get(), undesired_property, /*document_id=*/6,
+        typea_undesired_prop_id, /*keys=*/{3});
+  EXPECT_THAT(integer_index->num_property_indices(), Eq(1));
+
+  expected_sections_typea = {typea_undesired_prop_id};
+  EXPECT_THAT(Query(integer_index.get(), undesired_property,
+                    /*key_lower=*/3, /*key_upper=*/3),
+              IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                  /*document_id=*/6, expected_sections_typea))));
+
+  // Add a new doc (docid==7) and a hit for otherProperty1. This should be given
+  // its own individual storage.
+  ICING_ASSERT_OK(
+      this->doc_store_->Put(DocumentBuilder(doc).SetUri("uri13").Build()));
+  Index(integer_index.get(), other_property_1, /*document_id=*/7,
+        typea_other1_prop_id, /*keys=*/{3});
+  EXPECT_THAT(integer_index->num_property_indices(), Eq(2));
+
+  expected_sections_typea = {typea_other1_prop_id};
+  EXPECT_THAT(Query(integer_index.get(), other_property_1,
+                    /*key_lower=*/3, /*key_upper=*/3),
+              IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
+                  /*document_id=*/7, expected_sections_typea))));
+}
+
+TEST_P(IntegerIndexTest, IteratorCallStats) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndex> integer_index,
+      IntegerIndex::Create(filesystem_, working_path_,
+                           GetParam().num_data_threshold_for_bucket_split,
+                           GetParam().pre_mapping_fbv));
+
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        kDefaultSectionId, /*keys=*/{1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        kDefaultSectionId, /*keys=*/{3});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        kDefaultSectionId, /*keys=*/{2});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+        kDefaultSectionId, /*keys=*/{0});
+
+  // GetIterator for range [INT_MIN, INT_MAX] and Advance all. Those 4 keys are
+  // in 1 single bucket, so there will be only 1 posting list (and 1 block).
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> iter,
+      integer_index->GetIterator(
+          kDefaultTestPropertyPath,
+          /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *doc_store_,
+          *schema_store_, clock_.GetSystemTimeMilliseconds()));
+
+  // 1 block should be read even without calling Advance(), since we read the
+  // posting list and put bucket into the priority queue in ctor.
+  EXPECT_THAT(iter->GetCallStats(),
+              EqualsDocHitInfoIteratorCallStats(
+                  /*num_leaf_advance_calls_lite_index=*/0,
+                  /*num_leaf_advance_calls_main_index=*/0,
+                  /*num_leaf_advance_calls_integer_index=*/1,
+                  /*num_leaf_advance_calls_no_index=*/0,
+                  /*num_blocks_inspected=*/1));
+
+  // 1st Advance().
+  ICING_ASSERT_OK(iter->Advance());
+  EXPECT_THAT(iter->GetCallStats(),
+              EqualsDocHitInfoIteratorCallStats(
+                  /*num_leaf_advance_calls_lite_index=*/0,
+                  /*num_leaf_advance_calls_main_index=*/0,
+                  /*num_leaf_advance_calls_integer_index=*/2,
+                  /*num_leaf_advance_calls_no_index=*/0,
+                  /*num_blocks_inspected=*/1));
+
+  // 2nd Advance().
+  ICING_ASSERT_OK(iter->Advance());
+  EXPECT_THAT(iter->GetCallStats(),
+              EqualsDocHitInfoIteratorCallStats(
+                  /*num_leaf_advance_calls_lite_index=*/0,
+                  /*num_leaf_advance_calls_main_index=*/0,
+                  /*num_leaf_advance_calls_integer_index=*/3,
+                  /*num_leaf_advance_calls_no_index=*/0,
+                  /*num_blocks_inspected=*/1));
+
+  // 3rd Advance().
+  ICING_ASSERT_OK(iter->Advance());
+  EXPECT_THAT(iter->GetCallStats(),
+              EqualsDocHitInfoIteratorCallStats(
+                  /*num_leaf_advance_calls_lite_index=*/0,
+                  /*num_leaf_advance_calls_main_index=*/0,
+                  /*num_leaf_advance_calls_integer_index=*/4,
+                  /*num_leaf_advance_calls_no_index=*/0,
+                  /*num_blocks_inspected=*/1));
+
+  // 4th Advance().
+  ICING_ASSERT_OK(iter->Advance());
+  EXPECT_THAT(iter->GetCallStats(),
+              EqualsDocHitInfoIteratorCallStats(
+                  /*num_leaf_advance_calls_lite_index=*/0,
+                  /*num_leaf_advance_calls_main_index=*/0,
+                  /*num_leaf_advance_calls_integer_index=*/4,
+                  /*num_leaf_advance_calls_no_index=*/0,
+                  /*num_blocks_inspected=*/1));
+
+  // 5th Advance().
+  ASSERT_THAT(iter->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(iter->GetCallStats(),
+              EqualsDocHitInfoIteratorCallStats(
+                  /*num_leaf_advance_calls_lite_index=*/0,
+                  /*num_leaf_advance_calls_main_index=*/0,
+                  /*num_leaf_advance_calls_integer_index=*/4,
+                  /*num_leaf_advance_calls_no_index=*/0,
+                  /*num_blocks_inspected=*/1));
+}
+
+TEST_P(IntegerIndexTest, IteratorCallStatsNonExistingProperty) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<IntegerIndex> integer_index,
+      IntegerIndex::Create(filesystem_, working_path_,
+                           GetParam().num_data_threshold_for_bucket_split,
+                           GetParam().pre_mapping_fbv));
+
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+        kDefaultSectionId, /*keys=*/{1});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+        kDefaultSectionId, /*keys=*/{3});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+        kDefaultSectionId, /*keys=*/{2});
+  Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/3,
+        kDefaultSectionId, /*keys=*/{0});
+
+  // GetIterator for property "otherProperty1".
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> iter,
+      integer_index->GetIterator(
+          "otherProperty1", /*key_lower=*/std::numeric_limits<int64_t>::min(),
+          /*key_upper=*/std::numeric_limits<int64_t>::max(), *doc_store_,
+          *schema_store_, clock_.GetSystemTimeMilliseconds()));
+
+  EXPECT_THAT(iter->GetCallStats(),
+              EqualsDocHitInfoIteratorCallStats(
+                  /*num_leaf_advance_calls_lite_index=*/0,
+                  /*num_leaf_advance_calls_main_index=*/0,
+                  /*num_leaf_advance_calls_integer_index=*/0,
+                  /*num_leaf_advance_calls_no_index=*/0,
+                  /*num_blocks_inspected=*/0));
+
+  // 1st Advance().
+  ASSERT_THAT(iter->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(iter->GetCallStats(),
+              EqualsDocHitInfoIteratorCallStats(
+                  /*num_leaf_advance_calls_lite_index=*/0,
+                  /*num_leaf_advance_calls_main_index=*/0,
+                  /*num_leaf_advance_calls_integer_index=*/0,
+                  /*num_leaf_advance_calls_no_index=*/0,
+                  /*num_blocks_inspected=*/0));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    IntegerIndexTest, IntegerIndexTest,
+    testing::Values(
+        IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/341,
+                              /*pre_mapping_fbv_in=*/false),
+        IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/341,
+                              /*pre_mapping_fbv_in=*/true),
+
+        IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/16384,
+                              /*pre_mapping_fbv_in=*/false),
+        IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/32768,
+                              /*pre_mapping_fbv_in=*/false),
+        IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/65536,
+                              /*pre_mapping_fbv_in=*/false)));
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/numeric/numeric-index.h b/icing/index/numeric/numeric-index.h
new file mode 100644
index 0000000..d094d3d
--- /dev/null
+++ b/icing/index/numeric/numeric-index.h
@@ -0,0 +1,204 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_
+#define ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class NumericIndex : public PersistentStorage {
+ public:
+  using value_type = T;
+
+  // Editor class for batch adding new records into numeric index for a given
+  // property, DocumentId and SectionId. The caller should use BufferKey to
+  // buffer a key (calls several times for multiple keys) and finally call
+  // IndexAllBufferedKeys to batch add all buffered keys (with DocumentId +
+  // SectionId info, i.e. BasicHit) into numeric index.
+  //
+  // For example, there are values = [5, 1, 10, -100] in DocumentId = 5,
+  // SectionId = 1 (property "timestamp").
+  // Then the client should call BufferKey(5), BufferKey(1), BufferKey(10),
+  // BufferKey(-100) first, and finally call IndexAllBufferedKeys once to batch
+  // add these records into numeric index.
+  class Editor {
+   public:
+    explicit Editor(std::string_view property_path, DocumentId document_id,
+                    SectionId section_id)
+        : property_path_(property_path),
+          document_id_(document_id),
+          section_id_(section_id) {}
+
+    virtual ~Editor() = default;
+
+    // Buffers a new key.
+    //
+    // Returns:
+    //   - OK on success
+    //   - Any other errors, depending on the actual implementation
+    virtual libtextclassifier3::Status BufferKey(T key) = 0;
+
+    // Adds all buffered keys into numeric index.
+    //
+    // Returns:
+    //   - OK on success
+    //   - Any other errors, depending on the actual implementation
+    virtual libtextclassifier3::Status IndexAllBufferedKeys() && = 0;
+
+   protected:
+    std::string property_path_;
+    DocumentId document_id_;
+    SectionId section_id_;
+  };
+
+  // Iterator class for numeric index range query [key_lower, key_upper]
+  // (inclusive for both side) on a given property (see GetIterator). There are
+  // some basic requirements for implementation:
+  // - Iterates through all relevant doc hits.
+  // - Merges multiple SectionIds of doc hits with same DocumentId into a single
+  //   SectionIdMask and constructs DocHitInfo.
+  // - Returns DocHitInfo in descending DocumentId order.
+  //
+  // For example, relevant doc hits (DocumentId, SectionId) are [(2, 0), (4, 3),
+  // (2, 1), (6, 2), (4, 2)]. Advance() and GetDocHitInfo() should return
+  // DocHitInfo(6, SectionIdMask(2)), DocHitInfo(4, SectionIdMask(2, 3)) and
+  // DocHitInfo(2, SectionIdMask(0, 1)).
+  class Iterator {
+   public:
+    explicit Iterator(T key_lower, T key_upper)
+        : key_lower_(key_lower), key_upper_(key_upper) {}
+
+    virtual ~Iterator() = default;
+
+    virtual libtextclassifier3::Status Advance() = 0;
+
+    virtual DocHitInfo GetDocHitInfo() const = 0;
+
+    virtual int32_t GetNumAdvanceCalls() const = 0;
+
+    virtual int32_t GetNumBlocksInspected() const = 0;
+
+   protected:
+    T key_lower_;
+    T key_upper_;
+  };
+
+  virtual ~NumericIndex() = default;
+
+  // Returns an Editor instance for adding new records into numeric index for a
+  // given property, DocumentId and SectionId. See Editor for more details.
+  virtual std::unique_ptr<Editor> Edit(std::string_view property_path,
+                                       DocumentId document_id,
+                                       SectionId section_id) = 0;
+
+  // Returns a DocHitInfoIteratorNumeric (in DocHitInfoIterator interface type
+  // format) for iterating through all docs which have the specified (numeric)
+  // property contents in range [key_lower, key_upper].
+  //
+  // In general, different numeric index implementations require different data
+  // iterator implementations, so class Iterator is an abstraction of the data
+  // iterator and DocHitInfoIteratorNumeric can work with any implementation of
+  // it. See Iterator and DocHitInfoIteratorNumeric for more details.
+  //
+  // Returns:
+  //   - std::unique_ptr<DocHitInfoIterator> on success
+  //   - NOT_FOUND_ERROR if there is no numeric index for property_path
+  //   - INVALID_ARGUMENT_ERROR if key_lower > key_upper
+  //   - Any other errors, depending on the actual implementation
+  virtual libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+  GetIterator(std::string_view property_path, T key_lower, T key_upper,
+              const DocumentStore& document_store,
+              const SchemaStore& schema_store,
+              int64_t current_time_ms) const = 0;
+
+  // Reduces internal file sizes by reclaiming space and ids of deleted
+  // documents. Numeric index will convert all data (hits) to the new document
+  // ids and regenerate all index files. If all data in a property path are
+  // completely deleted, then the underlying storage must be discarded as well.
+  //
+  // - document_id_old_to_new: a map for converting old document id to new
+  //   document id.
+  // - new_last_added_document_id: will be used to update the last added
+  //                               document id in the numeric index.
+  //
+  // Returns:
+  //   - OK on success
+  //   - Any other errors, depending on the actual implementation
+  virtual libtextclassifier3::Status Optimize(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      DocumentId new_last_added_document_id) = 0;
+
+  // Clears all data in the integer index and set last_added_document_id to
+  // kInvalidDocumentId.
+  //
+  // Returns:
+  //   - OK on success
+  //   - Any other errors, depending on the actual implementation
+  virtual libtextclassifier3::Status Clear() = 0;
+
+  // Returns the largest document_id added to the index. Note that DocumentIds
+  // are always inserted in increasing order.
+  virtual DocumentId last_added_document_id() const = 0;
+
+  // Sets last_added_document_id to document_id so long as document_id >
+  // last_added_document_id() or last_added_document_id() is invalid.
+  virtual void set_last_added_document_id(DocumentId document_id) = 0;
+
+  // The number of individual indices that the NumericIndex has created to
+  // search over all indexed properties thus far.
+  virtual int num_property_indices() const = 0;
+
+ protected:
+  explicit NumericIndex(const Filesystem& filesystem,
+                        std::string&& working_path,
+                        PersistentStorage::WorkingPathType working_path_type)
+      : PersistentStorage(filesystem, std::move(working_path),
+                          working_path_type) {}
+
+  virtual libtextclassifier3::Status PersistStoragesToDisk(
+      bool force) override = 0;
+
+  virtual libtextclassifier3::Status PersistMetadataToDisk(
+      bool force) override = 0;
+
+  virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(
+      bool force) override = 0;
+
+  virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+      bool force) override = 0;
+
+  virtual Crcs& crcs() override = 0;
+  virtual const Crcs& crcs() const override = 0;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_
diff --git a/icing/index/numeric/posting-list-integer-index-accessor.cc b/icing/index/numeric/posting-list-integer-index-accessor.cc
new file mode 100644
index 0000000..af2aea4
--- /dev/null
+++ b/icing/index/numeric/posting-list-integer-index-accessor.cc
@@ -0,0 +1,164 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/posting-list-integer-index-accessor.h"
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/index-block.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<PostingListIntegerIndexAccessor>>
+PostingListIntegerIndexAccessor::Create(
+    FlashIndexStorage* storage, PostingListIntegerIndexSerializer* serializer) {
+  uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
+      storage->block_size(), serializer->GetDataTypeBytes());
+  ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
+                         PostingListUsed::CreateFromUnitializedRegion(
+                             serializer, max_posting_list_bytes));
+  return std::unique_ptr<PostingListIntegerIndexAccessor>(
+      new PostingListIntegerIndexAccessor(
+          storage, std::move(in_memory_posting_list), serializer));
+}
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<PostingListIntegerIndexAccessor>>
+PostingListIntegerIndexAccessor::CreateFromExisting(
+    FlashIndexStorage* storage, PostingListIntegerIndexSerializer* serializer,
+    PostingListIdentifier existing_posting_list_id) {
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+      Create(storage, serializer));
+  ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
+                         storage->GetPostingList(existing_posting_list_id));
+  pl_accessor->preexisting_posting_list_ =
+      std::make_unique<PostingListHolder>(std::move(holder));
+  return pl_accessor;
+}
+
+// Returns the next batch of integer index data for the provided posting list.
+libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+PostingListIntegerIndexAccessor::GetNextDataBatch() {
+  return GetNextDataBatchImpl(/*free_posting_list=*/false);
+}
+
+libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+PostingListIntegerIndexAccessor::GetAllDataAndFree() {
+  if (preexisting_posting_list_ == nullptr) {
+    return absl_ports::FailedPreconditionError(
+        "Cannot retrieve data from a PostingListIntegerIndexAccessor that "
+        "was not created from a preexisting posting list.");
+  }
+
+  std::vector<IntegerIndexData> all_data;
+  while (true) {
+    ICING_ASSIGN_OR_RETURN(std::vector<IntegerIndexData> batch,
+                           GetNextDataBatchImpl(/*free_posting_list=*/true));
+    if (batch.empty()) {
+      break;
+    }
+    std::move(batch.begin(), batch.end(), std::back_inserter(all_data));
+  }
+
+  return all_data;
+}
+
+libtextclassifier3::Status PostingListIntegerIndexAccessor::PrependData(
+    const IntegerIndexData& data) {
+  PostingListUsed& active_pl = (preexisting_posting_list_ != nullptr)
+                                   ? preexisting_posting_list_->posting_list
+                                   : in_memory_posting_list_;
+  libtextclassifier3::Status status =
+      serializer_->PrependData(&active_pl, data);
+  if (!absl_ports::IsResourceExhausted(status)) {
+    return status;
+  }
+  // There is no more room to add data to this current posting list! Therefore,
+  // we need to either move those data to a larger posting list or flush this
+  // posting list and create another max-sized posting list in the chain.
+  if (preexisting_posting_list_ != nullptr) {
+    ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
+  } else {
+    ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
+  }
+
+  // Re-add data. Should always fit since we just cleared
+  // in_memory_posting_list_. It's fine to explicitly reference
+  // in_memory_posting_list_ here because there's no way of reaching this line
+  // while preexisting_posting_list_ is still in use.
+  return serializer_->PrependData(&in_memory_posting_list_, data);
+}
+
+libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+PostingListIntegerIndexAccessor::GetNextDataBatchImpl(bool free_posting_list) {
+  if (preexisting_posting_list_ == nullptr) {
+    if (has_reached_posting_list_chain_end_) {
+      return std::vector<IntegerIndexData>();
+    }
+    return absl_ports::FailedPreconditionError(
+        "Cannot retrieve data from a PostingListIntegerIndexAccessor that "
+        "was not created from a preexisting posting list.");
+  }
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<IntegerIndexData> batch,
+      serializer_->GetData(&preexisting_posting_list_->posting_list));
+  uint32_t next_block_index = kInvalidBlockIndex;
+  // Posting lists will only be chained when they are max-sized, in which case
+  // next_block_index will point to the next block for the next posting list.
+  // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
+  // to the next free list block, which is not relevant here.
+  if (preexisting_posting_list_->posting_list.size_in_bytes() ==
+      storage_->max_posting_list_bytes()) {
+    next_block_index = preexisting_posting_list_->next_block_index;
+  }
+
+  if (free_posting_list) {
+    ICING_RETURN_IF_ERROR(
+        storage_->FreePostingList(std::move(*preexisting_posting_list_)));
+  }
+
+  if (next_block_index != kInvalidBlockIndex) {
+    // Since we only have to deal with next block for max-sized posting list
+    // block, max_num_posting_lists is 1 and posting_list_index_bits is
+    // BitsToStore(1).
+    PostingListIdentifier next_posting_list_id(
+        next_block_index, /*posting_list_index=*/0,
+        /*posting_list_index_bits=*/BitsToStore(1));
+    ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
+                           storage_->GetPostingList(next_posting_list_id));
+    preexisting_posting_list_ =
+        std::make_unique<PostingListHolder>(std::move(holder));
+  } else {
+    has_reached_posting_list_chain_end_ = true;
+    preexisting_posting_list_.reset();
+  }
+  return batch;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/numeric/posting-list-integer-index-accessor.h b/icing/index/numeric/posting-list-integer-index-accessor.h
new file mode 100644
index 0000000..4f667a0
--- /dev/null
+++ b/icing/index/numeric/posting-list-integer-index-accessor.h
@@ -0,0 +1,130 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_ACCESSOR_H_
+#define ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_ACCESSOR_H_
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-accessor.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+
+namespace icing {
+namespace lib {
+
+// TODO(b/259743562): Refactor PostingListAccessor derived classes
+
+// This class is used to provide a simple abstraction for adding integer index
+// data to posting lists. PostingListIntegerIndexAccessor handles:
+// 1) selection of properly-sized posting lists for the accumulated integer
+//    index data during Finalize()
+// 2) chaining of max-sized posting lists.
+class PostingListIntegerIndexAccessor : public PostingListAccessor {
+ public:
+  // Creates an empty PostingListIntegerIndexAccessor.
+  //
+  // RETURNS:
+  //   - On success, a valid instance of PostingListIntegerIndexAccessor
+  //   - INVALID_ARGUMENT error if storage has an invalid block_size.
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<PostingListIntegerIndexAccessor>>
+  Create(FlashIndexStorage* storage,
+         PostingListIntegerIndexSerializer* serializer);
+
+  // Creates a PostingListIntegerIndexAccessor with an existing posting list
+  // identified by existing_posting_list_id.
+  //
+  // RETURNS:
+  //   - On success, a valid instance of PostingListIntegerIndexAccessor
+  //   - INVALID_ARGUMENT if storage has an invalid block_size.
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<PostingListIntegerIndexAccessor>>
+  CreateFromExisting(FlashIndexStorage* storage,
+                     PostingListIntegerIndexSerializer* serializer,
+                     PostingListIdentifier existing_posting_list_id);
+
+  PostingListSerializer* GetSerializer() override { return serializer_; }
+
+  // Retrieves the next batch of data in the posting list chain.
+  //
+  // RETURNS:
+  //   - On success, a vector of integer index data in the posting list chain
+  //   - FAILED_PRECONDITION_ERROR if called on an instance that was created via
+  //     Create.
+  //   - INTERNAL_ERROR if unable to read the next posting list in the chain or
+  //     if the posting list has been corrupted somehow.
+  libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+  GetNextDataBatch();
+
+  // Retrieves all data from the posting list chain and frees all posting
+  // list(s).
+  //
+  // RETURNS:
+  //   - On success, a vector of integer index data in the posting list chain
+  //   - FAILED_PRECONDITION_ERROR if called on an instance that was created via
+  //     Create.
+  //   - INTERNAL_ERROR if unable to read the next posting list in the chain or
+  //     if the posting list has been corrupted somehow.
+  libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+  GetAllDataAndFree();
+
+  // Prepends one data. This may result in flushing the posting list to disk (if
+  // the PostingListIntegerIndexAccessor holds a max-sized posting list that
+  // is full) or freeing a pre-existing posting list if it is too small to fit
+  // all data necessary.
+  //
+  // RETURNS:
+  //   - OK, on success
+  //   - INVALID_ARGUMENT if !data.is_valid() or if data is greater than the
+  //     previously added data.
+  //   - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
+  //     posting list.
+  libtextclassifier3::Status PrependData(const IntegerIndexData& data);
+
+ private:
+  explicit PostingListIntegerIndexAccessor(
+      FlashIndexStorage* storage, PostingListUsed in_memory_posting_list,
+      PostingListIntegerIndexSerializer* serializer)
+      : PostingListAccessor(storage, std::move(in_memory_posting_list)),
+        serializer_(serializer) {}
+
+  // Retrieves the next batch of data in the posting list chain.
+  //
+  // - free_posting_list: a boolean flag indicating whether freeing all posting
+  //   lists after retrieving batch data.
+  //
+  // RETURNS:
+  //   - On success, a vector of integer index data in the posting list chain
+  //   - FAILED_PRECONDITION_ERROR if called on an instance that was created via
+  //     Create.
+  //   - INTERNAL_ERROR if unable to read the next posting list in the chain or
+  //     if the posting list has been corrupted somehow.
+  libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+  GetNextDataBatchImpl(bool free_posting_list);
+
+  PostingListIntegerIndexSerializer* serializer_;  // Does not own.
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_ACCESSOR_H_
diff --git a/icing/index/numeric/posting-list-integer-index-accessor_test.cc b/icing/index/numeric/posting-list-integer-index-accessor_test.cc
new file mode 100644
index 0000000..f655fea
--- /dev/null
+++ b/icing/index/numeric/posting-list-integer-index-accessor_test.cc
@@ -0,0 +1,535 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/posting-list-integer-index-accessor.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::Lt;
+using ::testing::Ne;
+using ::testing::SizeIs;
+
+class PostingListIntegerIndexAccessorTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/test_dir";
+    file_name_ = test_dir_ + "/test_file.idx.index";
+
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
+
+    serializer_ = std::make_unique<PostingListIntegerIndexSerializer>();
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FlashIndexStorage flash_index_storage,
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+    flash_index_storage_ =
+        std::make_unique<FlashIndexStorage>(std::move(flash_index_storage));
+  }
+
+  void TearDown() override {
+    flash_index_storage_.reset();
+    serializer_.reset();
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+  }
+
+  Filesystem filesystem_;
+  std::string test_dir_;
+  std::string file_name_;
+  std::unique_ptr<PostingListIntegerIndexSerializer> serializer_;
+  std::unique_ptr<FlashIndexStorage> flash_index_storage_;
+};
+
+std::vector<IntegerIndexData> CreateData(int num_data,
+                                         DocumentId start_document_id,
+                                         int64_t start_key) {
+  SectionId section_id = kMaxSectionId;
+
+  std::vector<IntegerIndexData> data;
+  data.reserve(num_data);
+  for (int i = 0; i < num_data; ++i) {
+    data.push_back(IntegerIndexData(section_id, start_document_id, start_key));
+
+    if (section_id == kMinSectionId) {
+      section_id = kMaxSectionId;
+    } else {
+      --section_id;
+    }
+    ++start_document_id;
+    ++start_key;
+  }
+  return data;
+}
+
+TEST_F(PostingListIntegerIndexAccessorTest, DataAddAndRetrieveProperly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+                                              serializer_.get()));
+  // Add some integer index data
+  std::vector<IntegerIndexData> data_vec =
+      CreateData(/*num_data=*/5, /*start_document_id=*/0, /*start_key=*/819);
+  for (const IntegerIndexData& data : data_vec) {
+    EXPECT_THAT(pl_accessor->PrependData(data), IsOk());
+  }
+  PostingListAccessor::FinalizeResult result =
+      std::move(*pl_accessor).Finalize();
+  EXPECT_THAT(result.status, IsOk());
+  EXPECT_THAT(result.id.block_index(), Eq(1));
+  EXPECT_THAT(result.id.posting_list_index(), Eq(0));
+
+  // Retrieve some data.
+  ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+                             flash_index_storage_->GetPostingList(result.id));
+  EXPECT_THAT(
+      serializer_->GetData(&pl_holder.posting_list),
+      IsOkAndHolds(ElementsAreArray(data_vec.rbegin(), data_vec.rend())));
+  EXPECT_THAT(pl_holder.next_block_index, Eq(kInvalidBlockIndex));
+}
+
+TEST_F(PostingListIntegerIndexAccessorTest, PreexistingPLKeepOnSameBlock) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+                                              serializer_.get()));
+  // Add a single data. This will fit in a min-sized posting list.
+  IntegerIndexData data1(/*section_id=*/1, /*document_id=*/0, /*key=*/12345);
+  ICING_ASSERT_OK(pl_accessor->PrependData(data1));
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result1.status);
+  // Should be allocated to the first block.
+  ASSERT_THAT(result1.id.block_index(), Eq(1));
+  ASSERT_THAT(result1.id.posting_list_index(), Eq(0));
+
+  // Add one more data. The minimum size for a posting list must be able to fit
+  // two data, so this should NOT cause the previous pl to be reallocated.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_accessor,
+      PostingListIntegerIndexAccessor::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  IntegerIndexData data2(/*section_id=*/1, /*document_id=*/1, /*key=*/23456);
+  ICING_ASSERT_OK(pl_accessor->PrependData(data2));
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result2.status);
+  // Should be in the same posting list.
+  EXPECT_THAT(result2.id, Eq(result1.id));
+
+  // The posting list at result2.id should hold all of the data that have been
+  // added.
+  ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+                             flash_index_storage_->GetPostingList(result2.id));
+  EXPECT_THAT(serializer_->GetData(&pl_holder.posting_list),
+              IsOkAndHolds(ElementsAre(data2, data1)));
+}
+
+TEST_F(PostingListIntegerIndexAccessorTest, PreexistingPLReallocateToLargerPL) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+                                              serializer_.get()));
+  // Adding 3 data should cause Finalize allocating a 48-byte posting list,
+  // which can store at most 4 data.
+  std::vector<IntegerIndexData> data_vec1 =
+      CreateData(/*num_data=*/3, /*start_document_id=*/0, /*start_key=*/819);
+  for (const IntegerIndexData& data : data_vec1) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result1.status);
+  // Should be allocated to the first block.
+  ASSERT_THAT(result1.id.block_index(), Eq(1));
+  ASSERT_THAT(result1.id.posting_list_index(), Eq(0));
+
+  // Now add more data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_accessor,
+      PostingListIntegerIndexAccessor::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  // The current posting list can fit 1 more data. Adding 12 more data should
+  // result in these data being moved to a larger posting list. Also the total
+  // size of these data won't exceed max size posting list, so there will be
+  // only one single posting list and no chain.
+  std::vector<IntegerIndexData> data_vec2 = CreateData(
+      /*num_data=*/12,
+      /*start_document_id=*/data_vec1.back().basic_hit().document_id() + 1,
+      /*start_key=*/819);
+
+  for (const IntegerIndexData& data : data_vec2) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result2.status);
+  // Should be allocated to the second (new) block because the posting list
+  // should grow beyond the size that the first block maintains.
+  EXPECT_THAT(result2.id.block_index(), Eq(2));
+  EXPECT_THAT(result2.id.posting_list_index(), Eq(0));
+
+  // The posting list at result2.id should hold all of the data that have been
+  // added.
+  std::vector<IntegerIndexData> all_data_vec;
+  all_data_vec.reserve(data_vec1.size() + data_vec2.size());
+  all_data_vec.insert(all_data_vec.end(), data_vec1.begin(), data_vec1.end());
+  all_data_vec.insert(all_data_vec.end(), data_vec2.begin(), data_vec2.end());
+  ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+                             flash_index_storage_->GetPostingList(result2.id));
+  EXPECT_THAT(serializer_->GetData(&pl_holder.posting_list),
+              IsOkAndHolds(ElementsAreArray(all_data_vec.rbegin(),
+                                            all_data_vec.rend())));
+}
+
+TEST_F(PostingListIntegerIndexAccessorTest, MultiBlockChainsBlocksProperly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+                                              serializer_.get()));
+  // Block size is 4096, sizeof(BlockHeader) is 12 and sizeof(IntegerIndexData)
+  // is 12, so the max size posting list can store (4096 - 12) / 12 = 340 data.
+  // Adding 341 data should cause:
+  // - 2 max size posting lists being allocated to block 1 and block 2.
+  // - Chaining: block 2 -> block 1
+  std::vector<IntegerIndexData> data_vec =
+      CreateData(/*num_data=*/341, /*start_document_id=*/0, /*start_key=*/819);
+  for (const IntegerIndexData& data : data_vec) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result1.status);
+  PostingListIdentifier second_block_id = result1.id;
+  // Should be allocated to the second block.
+  EXPECT_THAT(second_block_id, Eq(PostingListIdentifier(
+                                   /*block_index=*/2, /*posting_list_index=*/0,
+                                   /*posting_list_index_bits=*/0)));
+
+  // We should be able to retrieve all data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListHolder pl_holder,
+      flash_index_storage_->GetPostingList(second_block_id));
+  // This pl_holder will only hold a posting list with the data that didn't fit
+  // on the first block.
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<IntegerIndexData> second_block_data,
+                             serializer_->GetData(&pl_holder.posting_list));
+  ASSERT_THAT(second_block_data, SizeIs(Lt(data_vec.size())));
+  auto first_block_data_start = data_vec.rbegin() + second_block_data.size();
+  EXPECT_THAT(second_block_data,
+              ElementsAreArray(data_vec.rbegin(), first_block_data_start));
+
+  // Now retrieve all of the data that were on the first block.
+  uint32_t first_block_id = pl_holder.next_block_index;
+  EXPECT_THAT(first_block_id, Eq(1));
+
+  PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
+                              /*posting_list_index_bits=*/0);
+  ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
+                             flash_index_storage_->GetPostingList(pl_id));
+  EXPECT_THAT(
+      serializer_->GetData(&pl_holder.posting_list),
+      IsOkAndHolds(ElementsAreArray(first_block_data_start, data_vec.rend())));
+}
+
+TEST_F(PostingListIntegerIndexAccessorTest,
+       PreexistingMultiBlockReusesBlocksProperly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+                                              serializer_.get()));
+  // Block size is 4096, sizeof(BlockHeader) is 12 and sizeof(IntegerIndexData)
+  // is 12, so the max size posting list can store (4096 - 12) / 12 = 340 data.
+  // Adding 341 data will cause:
+  // - 2 max size posting lists being allocated to block 1 and block 2.
+  // - Chaining: block 2 -> block 1
+  std::vector<IntegerIndexData> data_vec1 =
+      CreateData(/*num_data=*/341, /*start_document_id=*/0, /*start_key=*/819);
+  for (const IntegerIndexData& data : data_vec1) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result1.status);
+  PostingListIdentifier first_add_id = result1.id;
+  EXPECT_THAT(first_add_id, Eq(PostingListIdentifier(
+                                /*block_index=*/2, /*posting_list_index=*/0,
+                                /*posting_list_index_bits=*/0)));
+
+  // Now add more data. These should fit on the existing second block and not
+  // fill it up.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_accessor,
+      PostingListIntegerIndexAccessor::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), first_add_id));
+  std::vector<IntegerIndexData> data_vec2 = CreateData(
+      /*num_data=*/10,
+      /*start_document_id=*/data_vec1.back().basic_hit().document_id() + 1,
+      /*start_key=*/819);
+  for (const IntegerIndexData& data : data_vec2) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result2.status);
+  PostingListIdentifier second_add_id = result2.id;
+  EXPECT_THAT(second_add_id, Eq(first_add_id));
+
+  // We should be able to retrieve all data.
+  std::vector<IntegerIndexData> all_data_vec;
+  all_data_vec.reserve(data_vec1.size() + data_vec2.size());
+  all_data_vec.insert(all_data_vec.end(), data_vec1.begin(), data_vec1.end());
+  all_data_vec.insert(all_data_vec.end(), data_vec2.begin(), data_vec2.end());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListHolder pl_holder,
+      flash_index_storage_->GetPostingList(second_add_id));
+  // This pl_holder will only hold a posting list with the data that didn't fit
+  // on the first block.
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<IntegerIndexData> second_block_data,
+                             serializer_->GetData(&pl_holder.posting_list));
+  ASSERT_THAT(second_block_data, SizeIs(Lt(all_data_vec.size())));
+  auto first_block_data_start =
+      all_data_vec.rbegin() + second_block_data.size();
+  EXPECT_THAT(second_block_data,
+              ElementsAreArray(all_data_vec.rbegin(), first_block_data_start));
+
+  // Now retrieve all of the data that were on the first block.
+  uint32_t first_block_id = pl_holder.next_block_index;
+  EXPECT_THAT(first_block_id, Eq(1));
+
+  PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
+                              /*posting_list_index_bits=*/0);
+  ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
+                             flash_index_storage_->GetPostingList(pl_id));
+  EXPECT_THAT(serializer_->GetData(&pl_holder.posting_list),
+              IsOkAndHolds(ElementsAreArray(first_block_data_start,
+                                            all_data_vec.rend())));
+}
+
+TEST_F(PostingListIntegerIndexAccessorTest,
+       InvalidDataShouldReturnInvalidArgument) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+                                              serializer_.get()));
+  IntegerIndexData invalid_data;
+  EXPECT_THAT(pl_accessor->PrependData(invalid_data),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListIntegerIndexAccessorTest,
+       BasicHitIncreasingShouldReturnInvalidArgument) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+                                              serializer_.get()));
+  IntegerIndexData data1(/*section_id=*/3, /*document_id=*/1, /*key=*/12345);
+  ICING_ASSERT_OK(pl_accessor->PrependData(data1));
+
+  IntegerIndexData data2(/*section_id=*/6, /*document_id=*/1, /*key=*/12345);
+  EXPECT_THAT(pl_accessor->PrependData(data2),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  IntegerIndexData data3(/*section_id=*/2, /*document_id=*/0, /*key=*/12345);
+  EXPECT_THAT(pl_accessor->PrependData(data3),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListIntegerIndexAccessorTest,
+       NewPostingListNoDataAddedShouldReturnInvalidArgument) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+                                              serializer_.get()));
+  PostingListAccessor::FinalizeResult result =
+      std::move(*pl_accessor).Finalize();
+  EXPECT_THAT(result.status,
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListIntegerIndexAccessorTest,
+       PreexistingPostingListNoDataAddedShouldSucceed) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor1,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+                                              serializer_.get()));
+  IntegerIndexData data1(/*section_id=*/3, /*document_id=*/1, /*key=*/12345);
+  ICING_ASSERT_OK(pl_accessor1->PrependData(data1));
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor1).Finalize();
+  ICING_ASSERT_OK(result1.status);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor2,
+      PostingListIntegerIndexAccessor::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor2).Finalize();
+  EXPECT_THAT(result2.status, IsOk());
+}
+
+TEST_F(PostingListIntegerIndexAccessorTest, GetAllDataAndFree) {
+  IntegerIndexData data1(/*section_id=*/3, /*document_id=*/1, /*key=*/123);
+  IntegerIndexData data2(/*section_id=*/3, /*document_id=*/2, /*key=*/456);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor1,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+                                              serializer_.get()));
+  // Add 2 data.
+  ICING_ASSERT_OK(pl_accessor1->PrependData(data1));
+  ICING_ASSERT_OK(pl_accessor1->PrependData(data2));
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor1).Finalize();
+  ICING_ASSERT_OK(result1.status);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor2,
+      PostingListIntegerIndexAccessor::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  EXPECT_THAT(pl_accessor2->GetAllDataAndFree(),
+              IsOkAndHolds(ElementsAre(data2, data1)));
+
+  // Allocate a new posting list with same size again.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor3,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+                                              serializer_.get()));
+  // Add 2 data.
+  ICING_ASSERT_OK(pl_accessor3->PrependData(data1));
+  ICING_ASSERT_OK(pl_accessor3->PrependData(data2));
+  PostingListAccessor::FinalizeResult result3 =
+      std::move(*pl_accessor3).Finalize();
+  ICING_ASSERT_OK(result3.status);
+  // We should get the same id if the previous one has been freed correctly by
+  // GetAllDataAndFree.
+  EXPECT_THAT(result3.id, Eq(result1.id));
+}
+
+TEST_F(PostingListIntegerIndexAccessorTest, GetAllDataAndFreePostingListChain) {
+  uint32_t block_size = FlashIndexStorage::SelectBlockSize();
+  uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
+      block_size, serializer_->GetDataTypeBytes());
+  uint32_t max_num_data_single_posting_list =
+      max_posting_list_bytes / serializer_->GetDataTypeBytes();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor1,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+                                              serializer_.get()));
+
+  // Prepend max_num_data_single_posting_list + 1 data.
+  std::vector<IntegerIndexData> data_vec;
+  for (uint32_t i = 0; i < max_num_data_single_posting_list + 1; ++i) {
+    IntegerIndexData data(/*section_id=*/3, static_cast<DocumentId>(i),
+                          /*key=*/i);
+    ICING_ASSERT_OK(pl_accessor1->PrependData(data));
+    data_vec.push_back(data);
+  }
+
+  // This will cause:
+  // - Allocate the first max-sized posting list at block index = 1, storing
+  //   max_num_data_single_posting_list data.
+  // - Allocate the second max-sized posting list at block index = 2, storing 1
+  //   data. Also its next_block_index is 1.
+  // - IOW, we will get 2 -> 1 and result1.id points to 2.
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor1).Finalize();
+  ICING_ASSERT_OK(result1.status);
+
+  uint32_t first_pl_block_index = kInvalidBlockIndex;
+  {
+    // result1.id points at the second (max-sized) PL, and next_block_index of
+    // the second PL points to the first PL's block. Fetch the first PL's block
+    // index manually.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PostingListHolder pl_holder,
+        flash_index_storage_->GetPostingList(result1.id));
+    first_pl_block_index = pl_holder.next_block_index;
+  }
+  ASSERT_THAT(first_pl_block_index, Ne(kInvalidBlockIndex));
+
+  // Call GetAllDataAndFree. This will free block 2 and block 1.
+  // Free block list: 1 -> 2 (since free block list is LIFO).
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor2,
+      PostingListIntegerIndexAccessor::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  EXPECT_THAT(
+      pl_accessor2->GetAllDataAndFree(),
+      IsOkAndHolds(ElementsAreArray(data_vec.rbegin(), data_vec.rend())));
+  pl_accessor2.reset();
+
+  // Allocate a new posting list with same size again.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor3,
+      PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+                                              serializer_.get()));
+  // Add same set of data.
+  for (uint32_t i = 0; i < max_num_data_single_posting_list + 1; ++i) {
+    ICING_ASSERT_OK(pl_accessor3->PrependData(data_vec[i]));
+  }
+
+  // This will cause:
+  // - Allocate the first max-sized posting list from the free block list, which
+  //   is block index = 1, storing max_num_data_single_posting_list data.
+  // - Allocate the second max-sized posting list from the next block in free
+  //   block list, which is block index = 2, storing 1 data. Also its
+  //   next_block_index should be 1.
+  PostingListAccessor::FinalizeResult result3 =
+      std::move(*pl_accessor3).Finalize();
+  ICING_ASSERT_OK(result3.status);
+  // We should get the same id if the previous one has been freed correctly by
+  // GetAllDataAndFree.
+  EXPECT_THAT(result3.id, Eq(result1.id));
+  // Also the first PL should be the same if it has been freed correctly by
+  // GetAllDataAndFree. Since it is a max-sized posting list, we just need to
+  // verify the block index.
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PostingListHolder pl_holder,
+        flash_index_storage_->GetPostingList(result3.id));
+    EXPECT_THAT(pl_holder.next_block_index, Eq(first_pl_block_index));
+  }
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/numeric/posting-list-integer-index-serializer.cc b/icing/index/numeric/posting-list-integer-index-serializer.cc
new file mode 100644
index 0000000..99f14f9
--- /dev/null
+++ b/icing/index/numeric/posting-list-integer-index-serializer.cc
@@ -0,0 +1,512 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+
+#include <cstdint>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+uint32_t PostingListIntegerIndexSerializer::GetBytesUsed(
+    const PostingListUsed* posting_list_used) const {
+  // The special data will be included if they represent actual data. If they
+  // represent the data start offset or the invalid data sentinel, they are not
+  // included.
+  return posting_list_used->size_in_bytes() -
+         GetStartByteOffset(posting_list_used);
+}
+
+uint32_t PostingListIntegerIndexSerializer::GetMinPostingListSizeToFit(
+    const PostingListUsed* posting_list_used) const {
+  if (IsFull(posting_list_used) || IsAlmostFull(posting_list_used)) {
+    // If in either the FULL state or ALMOST_FULL state, this posting list *is*
+    // the minimum size posting list that can fit these data. So just return the
+    // size of the posting list.
+    return posting_list_used->size_in_bytes();
+  }
+
+  // In NOT_FULL state, BytesUsed contains no special data. The minimum sized
+  // posting list that would be guaranteed to fit these data would be
+  // ALMOST_FULL, with kInvalidData in special data 0, the uncompressed data in
+  // special data 1 and the n compressed data in the compressed region.
+  // BytesUsed contains one uncompressed data and n compressed data. Therefore,
+  // fitting these data into a posting list would require BytesUsed plus one
+  // extra data.
+  return GetBytesUsed(posting_list_used) + GetDataTypeBytes();
+}
+
+void PostingListIntegerIndexSerializer::Clear(
+    PostingListUsed* posting_list_used) const {
+  // Safe to ignore return value because posting_list_used->size_in_bytes() is
+  // a valid argument.
+  SetStartByteOffset(posting_list_used,
+                     /*offset=*/posting_list_used->size_in_bytes());
+}
+
+libtextclassifier3::Status PostingListIntegerIndexSerializer::MoveFrom(
+    PostingListUsed* dst, PostingListUsed* src) const {
+  ICING_RETURN_ERROR_IF_NULL(dst);
+  ICING_RETURN_ERROR_IF_NULL(src);
+  if (GetMinPostingListSizeToFit(src) > dst->size_in_bytes()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "src MinPostingListSizeToFit %d must be larger than size %d.",
+        GetMinPostingListSizeToFit(src), dst->size_in_bytes()));
+  }
+
+  if (!IsPostingListValid(dst)) {
+    return absl_ports::FailedPreconditionError(
+        "Dst posting list is in an invalid state and can't be used!");
+  }
+  if (!IsPostingListValid(src)) {
+    return absl_ports::InvalidArgumentError(
+        "Cannot MoveFrom an invalid src posting list!");
+  }
+
+  // Pop just enough data that all of src's compressed data fit in
+  // dst posting_list's compressed area. Then we can memcpy that area.
+  std::vector<IntegerIndexData> data_arr;
+  while (IsFull(src) || IsAlmostFull(src) ||
+         (dst->size_in_bytes() - kSpecialDataSize < GetBytesUsed(src))) {
+    if (!GetDataInternal(src, /*limit=*/1, /*pop=*/true, &data_arr).ok()) {
+      return absl_ports::AbortedError(
+          "Unable to retrieve data from src posting list.");
+    }
+  }
+
+  // memcpy the area and set up start byte offset.
+  Clear(dst);
+  memcpy(dst->posting_list_buffer() + dst->size_in_bytes() - GetBytesUsed(src),
+         src->posting_list_buffer() + GetStartByteOffset(src),
+         GetBytesUsed(src));
+  // Because we popped all data from src outside of the compressed area and we
+  // guaranteed that GetBytesUsed(src) is less than dst->size_in_bytes() -
+  // kSpecialDataSize. This is guaranteed to be a valid byte offset for the
+  // NOT_FULL state, so ignoring the value is safe.
+  SetStartByteOffset(dst, dst->size_in_bytes() - GetBytesUsed(src));
+
+  // Put back remaining data.
+  for (auto riter = data_arr.rbegin(); riter != data_arr.rend(); ++riter) {
+    // PrependData may return:
+    // - INVALID_ARGUMENT: if data is invalid or not less than the previous data
+    // - RESOURCE_EXHAUSTED
+    // RESOURCE_EXHAUSTED should be impossible because we've already assured
+    // that there is enough room above.
+    ICING_RETURN_IF_ERROR(PrependData(dst, *riter));
+  }
+
+  Clear(src);
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status
+PostingListIntegerIndexSerializer::PrependDataToAlmostFull(
+    PostingListUsed* posting_list_used, const IntegerIndexData& data) const {
+  SpecialDataType special_data = GetSpecialData(posting_list_used, /*index=*/1);
+  if (special_data.data().basic_hit() < data.basic_hit()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "BasicHit %d being prepended must not be greater than the most recent"
+        "BasicHit %d",
+        data.basic_hit().value(), special_data.data().basic_hit().value()));
+  }
+
+  // TODO(b/259743562): [Optimization 2] compression
+  // Without compression, prepend a new data into ALMOST_FULL posting list will
+  // change the posting list to FULL state. Therefore, set special data 0
+  // directly.
+  SetSpecialData(posting_list_used, /*index=*/0, SpecialDataType(data));
+  return libtextclassifier3::Status::OK;
+}
+
+void PostingListIntegerIndexSerializer::PrependDataToEmpty(
+    PostingListUsed* posting_list_used, const IntegerIndexData& data) const {
+  // First data to be added. Just add verbatim, no compression.
+  if (posting_list_used->size_in_bytes() == kSpecialDataSize) {
+    // First data will be stored at special data 1.
+    // Safe to ignore the return value because 1 < kNumSpecialData
+    SetSpecialData(posting_list_used, /*index=*/1, SpecialDataType(data));
+    // Safe to ignore the return value because sizeof(IntegerIndexData) is a
+    // valid argument.
+    SetStartByteOffset(posting_list_used,
+                       /*offset=*/sizeof(IntegerIndexData));
+  } else {
+    // Since this is the first data, size != kSpecialDataSize and
+    // size % sizeof(IntegerIndexData) == 0, we know that there is room to fit
+    // 'data' into the compressed region, so ValueOrDie is safe.
+    uint32_t offset =
+        PrependDataUncompressed(posting_list_used, data,
+                                /*offset=*/posting_list_used->size_in_bytes())
+            .ValueOrDie();
+    // Safe to ignore the return value because PrependDataUncompressed is
+    // guaranteed to return a valid offset.
+    SetStartByteOffset(posting_list_used, offset);
+  }
+}
+
+libtextclassifier3::Status
+PostingListIntegerIndexSerializer::PrependDataToNotFull(
+    PostingListUsed* posting_list_used, const IntegerIndexData& data,
+    uint32_t offset) const {
+  IntegerIndexData cur;
+  memcpy(&cur, posting_list_used->posting_list_buffer() + offset,
+         sizeof(IntegerIndexData));
+  if (cur.basic_hit() < data.basic_hit()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "BasicHit %d being prepended must not be greater than the most recent"
+        "BasicHit %d",
+        data.basic_hit().value(), cur.basic_hit().value()));
+  }
+
+  // TODO(b/259743562): [Optimization 2] compression
+  if (offset >= kSpecialDataSize + sizeof(IntegerIndexData)) {
+    offset =
+        PrependDataUncompressed(posting_list_used, data, offset).ValueOrDie();
+    SetStartByteOffset(posting_list_used, offset);
+  } else {
+    // The new data must be put in special data 1.
+    SetSpecialData(posting_list_used, /*index=*/1, SpecialDataType(data));
+    // State ALMOST_FULL. Safe to ignore the return value because
+    // sizeof(IntegerIndexData) is a valid argument.
+    SetStartByteOffset(posting_list_used, /*offset=*/sizeof(IntegerIndexData));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status PostingListIntegerIndexSerializer::PrependData(
+    PostingListUsed* posting_list_used, const IntegerIndexData& data) const {
+  static_assert(
+      sizeof(BasicHit::Value) <= sizeof(uint64_t),
+      "BasicHit::Value cannot be larger than 8 bytes because the delta "
+      "must be able to fit in 8 bytes.");
+
+  if (!data.is_valid()) {
+    return absl_ports::InvalidArgumentError("Cannot prepend an invalid data!");
+  }
+  if (!IsPostingListValid(posting_list_used)) {
+    return absl_ports::FailedPreconditionError(
+        "This PostingListUsed is in an invalid state and can't add any data!");
+  }
+
+  if (IsFull(posting_list_used)) {
+    // State FULL: no space left.
+    return absl_ports::ResourceExhaustedError("No more room for data");
+  } else if (IsAlmostFull(posting_list_used)) {
+    return PrependDataToAlmostFull(posting_list_used, data);
+  } else if (IsEmpty(posting_list_used)) {
+    PrependDataToEmpty(posting_list_used, data);
+    return libtextclassifier3::Status::OK;
+  } else {
+    uint32_t offset = GetStartByteOffset(posting_list_used);
+    return PrependDataToNotFull(posting_list_used, data, offset);
+  }
+}
+
+libtextclassifier3::StatusOr<uint32_t>
+PostingListIntegerIndexSerializer::PrependDataArray(
+    PostingListUsed* posting_list_used, const IntegerIndexData* array,
+    uint32_t num_data, bool keep_prepended) const {
+  if (!IsPostingListValid(posting_list_used)) {
+    return 0;
+  }
+
+  uint32_t i;
+  for (i = 0; i < num_data; ++i) {
+    if (!PrependData(posting_list_used, array[i]).ok()) {
+      break;
+    }
+  }
+  if (i != num_data && !keep_prepended) {
+    // Didn't fit. Undo everything and check that we have the same offset as
+    // before. PopFrontData guarantees that it will remove all 'i' data so long
+    // as there are at least 'i' data in the posting list, which we know there
+    // are.
+    ICING_RETURN_IF_ERROR(PopFrontData(posting_list_used, /*num_data=*/i));
+    return 0;
+  }
+  return i;
+}
+
+libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
+PostingListIntegerIndexSerializer::GetData(
+    const PostingListUsed* posting_list_used) const {
+  std::vector<IntegerIndexData> data_arr_out;
+  ICING_RETURN_IF_ERROR(GetData(posting_list_used, &data_arr_out));
+  return data_arr_out;
+}
+
+libtextclassifier3::Status PostingListIntegerIndexSerializer::GetData(
+    const PostingListUsed* posting_list_used,
+    std::vector<IntegerIndexData>* data_arr_out) const {
+  return GetDataInternal(posting_list_used,
+                         /*limit=*/std::numeric_limits<uint32_t>::max(),
+                         /*pop=*/false, data_arr_out);
+}
+
+libtextclassifier3::Status PostingListIntegerIndexSerializer::PopFrontData(
+    PostingListUsed* posting_list_used, uint32_t num_data) const {
+  if (num_data == 1 && IsFull(posting_list_used)) {
+    // The PL is in FULL state which means that we save 2 uncompressed data in
+    // the 2 special postions. But FULL state may be reached by 2 different
+    // states.
+    // (1) In ALMOST_FULL state
+    // +------------------+-----------------+-----+---------------------------+
+    // |Data::Invalid     |1st data         |(pad)|(compressed) data          |
+    // |                  |                 |     |                           |
+    // +------------------+-----------------+-----+---------------------------+
+    // When we prepend another data, we can only put it at special data 0, and
+    // thus get a FULL PL
+    // +------------------+-----------------+-----+---------------------------+
+    // |new 1st data      |original 1st data|(pad)|(compressed) data          |
+    // |                  |                 |     |                           |
+    // +------------------+-----------------+-----+---------------------------+
+    //
+    // (2) In NOT_FULL state
+    // +------------------+-----------------+-------+---------+---------------+
+    // |data-start-offset |Data::Invalid    |(pad)  |1st data |(compressed)   |
+    // |                  |                 |       |         |data           |
+    // +------------------+-----------------+-------+---------+---------------+
+    // When we prepend another data, we can reach any of the 3 following
+    // scenarios:
+    // (2.1) NOT_FULL
+    // if the space of pad and original 1st data can accommodate the new 1st
+    // data and the encoded delta value.
+    // +------------------+-----------------+-----+--------+------------------+
+    // |data-start-offset |Data::Invalid    |(pad)|new     |(compressed) data |
+    // |                  |                 |     |1st data|                  |
+    // +------------------+-----------------+-----+--------+------------------+
+    // (2.2) ALMOST_FULL
+    // If the space of pad and original 1st data cannot accommodate the new 1st
+    // data and the encoded delta value but can accommodate the encoded delta
+    // value only. We can put the new 1st data at special position 1.
+    // +------------------+-----------------+---------+-----------------------+
+    // |Data::Invalid     |new 1st data     |(pad)    |(compressed) data      |
+    // |                  |                 |         |                       |
+    // +------------------+-----------------+---------+-----------------------+
+    // (2.3) FULL
+    // In very rare case, it cannot even accommodate only the encoded delta
+    // value. we can move the original 1st data into special position 1 and the
+    // new 1st data into special position 0. This may happen because we use
+    // VarInt encoding method which may make the encoded value longer (about
+    // 4/3 times of original)
+    // +------------------+-----------------+--------------+------------------+
+    // |new 1st data      |original 1st data|(pad)         |(compressed) data |
+    // |                  |                 |              |                  |
+    // +------------------+-----------------+--------------+------------------+
+    //
+    // Suppose now the PL is in FULL state. But we don't know whether it arrived
+    // this state from NOT_FULL (like (2.3)) or from ALMOST_FULL (like (1)).
+    // We'll return to ALMOST_FULL state like (1) if we simply pop the new 1st
+    // data, but we want to make the prepending operation "reversible". So
+    // there should be some way to return to NOT_FULL if possible. A simple way
+    // to do is:
+    // - Pop 2 data out of the PL to state ALMOST_FULL or NOT_FULL.
+    // - Add the second data ("original 1st data") back.
+    //
+    // Then we can return to the correct original states of (2.1) or (1). This
+    // makes our prepending operation reversible.
+    std::vector<IntegerIndexData> out;
+
+    // Popping 2 data should never fail because we've just ensured that the
+    // posting list is in the FULL state.
+    ICING_RETURN_IF_ERROR(
+        GetDataInternal(posting_list_used, /*limit=*/2, /*pop=*/true, &out));
+
+    // PrependData should never fail because:
+    // - out[1] is a valid data less than all previous data in the posting list.
+    // - There's no way that the posting list could run out of room because it
+    //   previously stored these 2 data.
+    ICING_RETURN_IF_ERROR(PrependData(posting_list_used, out[1]));
+  } else if (num_data > 0) {
+    return GetDataInternal(posting_list_used, /*limit=*/num_data, /*pop=*/true,
+                           /*out=*/nullptr);
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status PostingListIntegerIndexSerializer::GetDataInternal(
+    const PostingListUsed* posting_list_used, uint32_t limit, bool pop,
+    std::vector<IntegerIndexData>* out) const {
+  // TODO(b/259743562): [Optimization 2] handle compressed data
+
+  uint32_t offset = GetStartByteOffset(posting_list_used);
+  uint32_t count = 0;
+
+  // First traverse the first two special positions.
+  while (count < limit && offset < kSpecialDataSize) {
+    // offset / sizeof(IntegerIndexData) < kNumSpecialData because of the check
+    // above.
+    SpecialDataType special_data =
+        GetSpecialData(posting_list_used,
+                       /*index=*/offset / sizeof(IntegerIndexData));
+    if (out != nullptr) {
+      out->push_back(special_data.data());
+    }
+    offset += sizeof(IntegerIndexData);
+    ++count;
+  }
+
+  // - We don't compress the data now.
+  // - The posting list size is a multiple of data type bytes.
+  // So offset of the first non-special data is guaranteed to be at
+  // kSpecialDataSize if in ALMOST_FULL or FULL state. In fact, we must not
+  // apply padding skipping logic here when still storing uncompressed data,
+  // because in this case 0 bytes are meanful (e.g. inverted doc id byte = 0).
+  // TODO(b/259743562): [Optimization 2] deal with padding skipping logic when
+  //                    apply data compression.
+
+  while (count < limit && offset < posting_list_used->size_in_bytes()) {
+    IntegerIndexData data;
+    memcpy(&data, posting_list_used->posting_list_buffer() + offset,
+           sizeof(IntegerIndexData));
+    offset += sizeof(IntegerIndexData);
+    if (out != nullptr) {
+      out->push_back(data);
+    }
+    ++count;
+  }
+
+  if (pop) {
+    PostingListUsed* mutable_posting_list_used =
+        const_cast<PostingListUsed*>(posting_list_used);
+    // Modify the posting list so that we pop all data actually traversed.
+    if (offset >= kSpecialDataSize &&
+        offset < posting_list_used->size_in_bytes()) {
+      memset(
+          mutable_posting_list_used->posting_list_buffer() + kSpecialDataSize,
+          0, offset - kSpecialDataSize);
+    }
+    SetStartByteOffset(mutable_posting_list_used, offset);
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+PostingListIntegerIndexSerializer::SpecialDataType
+PostingListIntegerIndexSerializer::GetSpecialData(
+    const PostingListUsed* posting_list_used, uint32_t index) const {
+  // It is ok to temporarily construct a SpecialData with offset = 0 since we're
+  // going to overwrite it by memcpy.
+  SpecialDataType special_data(0);
+  memcpy(&special_data,
+         posting_list_used->posting_list_buffer() +
+             index * sizeof(SpecialDataType),
+         sizeof(SpecialDataType));
+  return special_data;
+}
+
+void PostingListIntegerIndexSerializer::SetSpecialData(
+    PostingListUsed* posting_list_used, uint32_t index,
+    const SpecialDataType& special_data) const {
+  memcpy(posting_list_used->posting_list_buffer() +
+             index * sizeof(SpecialDataType),
+         &special_data, sizeof(SpecialDataType));
+}
+
+bool PostingListIntegerIndexSerializer::IsPostingListValid(
+    const PostingListUsed* posting_list_used) const {
+  if (IsAlmostFull(posting_list_used)) {
+    // Special data 1 should hold a valid data.
+    if (!GetSpecialData(posting_list_used, /*index=*/1).data().is_valid()) {
+      ICING_LOG(ERROR)
+          << "Both special data cannot be invalid at the same time.";
+      return false;
+    }
+  } else if (!IsFull(posting_list_used)) {
+    // NOT_FULL. Special data 0 should hold a valid offset.
+    SpecialDataType special_data =
+        GetSpecialData(posting_list_used, /*index=*/0);
+    if (special_data.data_start_offset() > posting_list_used->size_in_bytes() ||
+        special_data.data_start_offset() < kSpecialDataSize) {
+      ICING_LOG(ERROR) << "Offset: " << special_data.data_start_offset()
+                       << " size: " << posting_list_used->size_in_bytes()
+                       << " sp size: " << kSpecialDataSize;
+      return false;
+    }
+  }
+  return true;
+}
+
+uint32_t PostingListIntegerIndexSerializer::GetStartByteOffset(
+    const PostingListUsed* posting_list_used) const {
+  if (IsFull(posting_list_used)) {
+    return 0;
+  } else if (IsAlmostFull(posting_list_used)) {
+    return sizeof(IntegerIndexData);
+  } else {
+    return GetSpecialData(posting_list_used, /*index=*/0).data_start_offset();
+  }
+}
+
+bool PostingListIntegerIndexSerializer::SetStartByteOffset(
+    PostingListUsed* posting_list_used, uint32_t offset) const {
+  if (offset > posting_list_used->size_in_bytes()) {
+    ICING_LOG(ERROR) << "offset cannot be a value greater than size "
+                     << posting_list_used->size_in_bytes() << ". offset is "
+                     << offset << ".";
+    return false;
+  }
+  if (offset < kSpecialDataSize && offset > sizeof(IntegerIndexData)) {
+    ICING_LOG(ERROR) << "offset cannot be a value between ("
+                     << sizeof(IntegerIndexData) << ", " << kSpecialDataSize
+                     << "). offset is " << offset << ".";
+    return false;
+  }
+  if (offset < sizeof(IntegerIndexData) && offset != 0) {
+    ICING_LOG(ERROR) << "offset cannot be a value between (0, "
+                     << sizeof(IntegerIndexData) << "). offset is " << offset
+                     << ".";
+    return false;
+  }
+
+  if (offset >= kSpecialDataSize) {
+    // NOT_FULL state.
+    SetSpecialData(posting_list_used, /*index=*/0, SpecialDataType(offset));
+    SetSpecialData(posting_list_used, /*index=*/1,
+                   SpecialDataType(IntegerIndexData()));
+  } else if (offset == sizeof(IntegerIndexData)) {
+    // ALMOST_FULL state.
+    SetSpecialData(posting_list_used, /*index=*/0,
+                   SpecialDataType(IntegerIndexData()));
+  }
+  // Nothing to do for the FULL state - the offset isn't actually stored
+  // anywhere and both 2 special data hold valid data.
+  return true;
+}
+
+libtextclassifier3::StatusOr<uint32_t>
+PostingListIntegerIndexSerializer::PrependDataUncompressed(
+    PostingListUsed* posting_list_used, const IntegerIndexData& data,
+    uint32_t offset) const {
+  if (offset < kSpecialDataSize + sizeof(IntegerIndexData)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Not enough room to prepend IntegerIndexData at offset %d.", offset));
+  }
+  offset -= sizeof(IntegerIndexData);
+  memcpy(posting_list_used->posting_list_buffer() + offset, &data,
+         sizeof(IntegerIndexData));
+  return offset;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/numeric/posting-list-integer-index-serializer.h b/icing/index/numeric/posting-list-integer-index-serializer.h
new file mode 100644
index 0000000..cbaed33
--- /dev/null
+++ b/icing/index/numeric/posting-list-integer-index-serializer.h
@@ -0,0 +1,338 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_SERIALIZER_H_
+#define ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_SERIALIZER_H_
+
+#include <cstdint>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/index/numeric/integer-index-data.h"
+
+namespace icing {
+namespace lib {
+
+// A serializer class to serialize IntegerIndexData to PostingListUsed.
+class PostingListIntegerIndexSerializer : public PostingListSerializer {
+ public:
+  using SpecialDataType = SpecialData<IntegerIndexData>;
+  static_assert(sizeof(SpecialDataType) == sizeof(IntegerIndexData), "");
+
+  static constexpr uint32_t kSpecialDataSize =
+      kNumSpecialData * sizeof(SpecialDataType);
+
+  uint32_t GetDataTypeBytes() const override {
+    return sizeof(IntegerIndexData);
+  }
+
+  uint32_t GetMinPostingListSize() const override {
+    static constexpr uint32_t kMinPostingListSize = kSpecialDataSize;
+    static_assert(sizeof(PostingListIndex) <= kMinPostingListSize,
+                  "PostingListIndex must be small enough to fit in a "
+                  "minimum-sized Posting List.");
+
+    return kMinPostingListSize;
+  }
+
+  uint32_t GetMinPostingListSizeToFit(
+      const PostingListUsed* posting_list_used) const override;
+
+  uint32_t GetBytesUsed(
+      const PostingListUsed* posting_list_used) const override;
+
+  void Clear(PostingListUsed* posting_list_used) const override;
+
+  libtextclassifier3::Status MoveFrom(PostingListUsed* dst,
+                                      PostingListUsed* src) const override;
+
+  // Prepend an IntegerIndexData to the posting list.
+  //
+  // RETURNS:
+  //   - INVALID_ARGUMENT if !data.is_valid() or if data is not less than the
+  //       previously added data.
+  //   - RESOURCE_EXHAUSTED if there is no more room to add data to the posting
+  //       list.
+  libtextclassifier3::Status PrependData(PostingListUsed* posting_list_used,
+                                         const IntegerIndexData& data) const;
+
+  // Prepend multiple IntegerIndexData to the posting list. Data should be
+  // sorted in ascending order (as defined by the less than operator for
+  // IntegerIndexData)
+  // If keep_prepended is true, whatever could be prepended is kept, otherwise
+  // the posting list is reverted and left in its original state.
+  //
+  // RETURNS:
+  //   The number of data that have been prepended to the posting list. If
+  //   keep_prepended is false and reverted, then it returns 0.
+  libtextclassifier3::StatusOr<uint32_t> PrependDataArray(
+      PostingListUsed* posting_list_used, const IntegerIndexData* array,
+      uint32_t num_data, bool keep_prepended) const;
+
+  // Retrieves all data stored in the posting list.
+  //
+  // RETURNS:
+  //   - On success, a vector of IntegerIndexData sorted by the reverse order of
+  //     prepending.
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::StatusOr<std::vector<IntegerIndexData>> GetData(
+      const PostingListUsed* posting_list_used) const;
+
+  // Same as GetData but appends data to data_arr_out.
+  //
+  // RETURNS:
+  //   - OK on success, and data_arr_out will be appended IntegerIndexData
+  //     sorted by the reverse order of prepending.
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::Status GetData(
+      const PostingListUsed* posting_list_used,
+      std::vector<IntegerIndexData>* data_arr_out) const;
+
+  // Undo the last num_data data prepended. If num_data > number of data, then
+  // we clear all data.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::Status PopFrontData(PostingListUsed* posting_list_used,
+                                          uint32_t num_data) const;
+
+  // Helper function to determine if posting list is full.
+  bool IsFull(const PostingListUsed* posting_list_used) const {
+    return GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
+           GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+  }
+
+ private:
+  // Posting list layout formats:
+  //
+  // NOT_FULL
+  // +-special-data-0--+-special-data-1--+------------+-----------------------+
+  // |                 |                 |            |                       |
+  // |data-start-offset|  Data::Invalid  | 0x00000000 |   (compressed) data   |
+  // |                 |                 |            |                       |
+  // +-----------------+-----------------+------------+-----------------------+
+  //
+  // ALMOST_FULL
+  // +-special-data-0--+-special-data-1--+-----+------------------------------+
+  // |                 |                 |     |                              |
+  // |  Data::Invalid  |    1st data     |(pad)|      (compressed) data       |
+  // |                 |                 |     |                              |
+  // +-----------------+-----------------+-----+------------------------------+
+  //
+  // FULL
+  // +-special-data-0--+-special-data-1--+-----+------------------------------+
+  // |                 |                 |     |                              |
+  // |    1st data     |    2nd data     |(pad)|      (compressed) data       |
+  // |                 |                 |     |                              |
+  // +-----------------+-----------------+-----+------------------------------+
+  //
+  // The first two uncompressed (special) data also implicitly encode
+  // information about the size of the compressed data region.
+  //
+  // 1. If the posting list is NOT_FULL, then special_data_0 contains the byte
+  //    offset of the start of the compressed data. Thus, the size of the
+  //    compressed data is
+  //    posting_list_used->size_in_bytes() - special_data_0.data_start_offset().
+  //
+  // 2. If posting list is ALMOST_FULL or FULL, then the compressed data region
+  //    starts somewhere between
+  //    [kSpecialDataSize, kSpecialDataSize + sizeof(IntegerIndexData) - 1] and
+  //    ends at posting_list_used->size_in_bytes() - 1.
+  //
+  // EXAMPLE
+  // Posting list storage. Posting list size: 36 bytes
+  //
+  // EMPTY!
+  // +--- byte 0-11 ---+----- 12-23 -----+-------------- 24-35 ---------------+
+  // |                 |                 |                                    |
+  // |       36        |  Data::Invalid  |             0x00000000             |
+  // |                 |                 |                                    |
+  // +-----------------+-----------------+------------------------------------+
+  //
+  // Add IntegerIndexData(0x0FFFFCC3, 5)
+  //   (DocumentId = 12, SectionId = 3; Key = 5)
+  //   (VarInt64(5) is encoded as 10 (b'1010), requires 1 byte)
+  // NOT FULL!
+  // +--- byte 0-11 ---+----- 12-23 -----+------- 24-30 -------+--- 31-35 ----+
+  // |                 |                 |                     | 0x0FFFFCC3   |
+  // |       31        |  Data::Invalid  |     0x00000000      | VI64(5)      |
+  // |                 |                 |                     |              |
+  // +-----------------+-----------------+---------------------+--------------+
+  //
+  // Add IntegerIndexData(0x0FFFFB40, -2)
+  //   (DocumentId = 18, SectionId = 0; Key = -2)
+  //   (VarInt64(-2) is encoded as 3 (b'11), requires 1 byte)
+  // Previous IntegerIndexData BasicHit delta varint encoding:
+  //   0x0FFFFCC3 - 0x0FFFFB40 = 387, VarUnsignedInt(387) requires 2 bytes
+  // +--- byte 0-11 ---+----- 12-23 -----+-- 24-27 ---+--- 28-32 ----+ 33-35 -+
+  // |                 |                 |            | 0x0FFFFB40   |VUI(387)|
+  // |       28        |  Data::Invalid  |    0x00    | VI64(-2)     |VI64(5) |
+  // |                 |                 |            |              |        |
+  // +-----------------+-----------------+------------+--------------+--------+
+  //
+  // Add IntegerIndexData(0x0FFFFA4A, 3)
+  //   (DocumentId = 22, SectionId = 10; Key = 3)
+  //   (VarInt64(3) is encoded as 6 (b'110), requires 1 byte)
+  // Previous IntegerIndexData BasicHit delta varint encoding:
+  //   0x0FFFFB40 - 0x0FFFFA4A = 246, VarUnsignedInt(246) requires 2 bytes
+  // +--- byte 0-11 ---+----- 12-23 -----+---+--- 25-29 ----+ 30-32 -+ 33-35 -+
+  // |                 |                 |   | 0x0FFFFA4A   |VUI(246)|VUI(387)|
+  // |       25        |  Data::Invalid  |   | VI64(3)      |VI64(-2)|VI64(5) |
+  // |                 |                 |   |              |        |        |
+  // +-----------------+-----------------+---+--------------+--------+--------+
+  //
+  // Add IntegerIndexData(0x0FFFFA01, -4)
+  //   (DocumentId = 23, SectionId = 1; Key = -4)
+  //   (No VarInt64 for key, since it is stored in special data section)
+  // Previous IntegerIndexData BasicHit delta varint encoding:
+  //   0x0FFFFA4A - 0x0FFFFA01 = 73, VarUnsignedInt(73) requires 1 byte)
+  // ALMOST_FULL!
+  // +--- byte 0-11 ---+----- 12-23 -----+-- 24-27 ---+28-29+ 30-32 -+ 33-35 -+
+  // |                 |   0x0FFFFA01    |            |(73) |VUI(246)|VUI(387)|
+  // |  Data::Invalid  |   0xFFFFFFFF    |   (pad)    |(3)  |VI64(-2)|VI64(5) |
+  // |                 |   0xFFFFFFFC    |            |     |        |        |
+  // +-----------------+-----------------+------------+-----+--------+--------+
+  //
+  // Add IntegerIndexData(0x0FFFF904, 0)
+  //   (DocumentId = 27, SectionId = 4; Key = 0)
+  //   (No VarInt64 for key, since it is stored in special data section)
+  // Previous IntegerIndexData:
+  //   Since 0x0FFFFA01 - 0x0FFFF904 = 253 and VarInt64(-4) is encoded as 7
+  //   (b'111), it requires only 3 bytes after compression. It's able to fit
+  //   into the padding section.
+  // Still ALMOST_FULL!
+  // +--- byte 0-11 ---+----- 12-23 -----+---+ 25-27 -+28-29+ 30-32 -+ 33-35 -+
+  // |                 |   0x0FFFF904    |   |VUI(253)|(73) |VUI(246)|VUI(387)|
+  // |  Data::Invalid  |   0x00000000    |   |VI64(-4)|(3)  |VI64(-2)|VI64(5) |
+  // |                 |   0x00000000    |   |        |     |        |        |
+  // +-----------------+-----------------+---+--------+-----+--------+--------+
+  //
+  // Add IntegerIndexData(0x0FFFF8C3, -1)
+  //   (DocumentId = 28, SectionId = 3; Key = -1)
+  //   (No VarInt64 for key, since it is stored in special data section)
+  //   (No VarUnsignedInt for previous IntegerIndexData BasicHit)
+  // FULL!
+  // +--- byte 0-11 ---+----- 12-23 -----+---+ 25-27 -+28-29+ 30-32 -+ 33-35 -+
+  // |   0x0FFFF8C3    |   0x0FFFF904    |   |VUI(253)|(73) |VUI(246)|VUI(387)|
+  // |   0xFFFFFFFF    |   0x00000000    |   |VI64(-4)|(3)  |VI64(-2)|VI64(5) |
+  // |   0xFFFFFFFF    |   0x00000000    |   |        |     |        |        |
+  // +-----------------+-----------------+---+--------+-----+--------+--------+
+
+  // Helpers to determine what state the posting list is in.
+  bool IsAlmostFull(const PostingListUsed* posting_list_used) const {
+    return !GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
+           GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+  }
+
+  bool IsEmpty(const PostingListUsed* posting_list_used) const {
+    return GetSpecialData(posting_list_used, /*index=*/0).data_start_offset() ==
+               posting_list_used->size_in_bytes() &&
+           !GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+  }
+
+  // Returns false if both special data are invalid or if data start offset
+  // stored in the special data is less than kSpecialDataSize or greater than
+  // posting_list_used->size_in_bytes(). Returns true, otherwise.
+  bool IsPostingListValid(const PostingListUsed* posting_list_used) const;
+
+  // Prepend data to a posting list that is in the ALMOST_FULL state.
+  //
+  // RETURNS:
+  //  - OK, if successful
+  //  - INVALID_ARGUMENT if data is not less than the previously added data.
+  libtextclassifier3::Status PrependDataToAlmostFull(
+      PostingListUsed* posting_list_used, const IntegerIndexData& data) const;
+
+  // Prepend data to a posting list that is in the EMPTY state. This will always
+  // succeed because there are no pre-existing data and no validly constructed
+  // posting list could fail to fit one data.
+  void PrependDataToEmpty(PostingListUsed* posting_list_used,
+                          const IntegerIndexData& data) const;
+
+  // Prepend data to a posting list that is in the NOT_FULL state.
+  //
+  // RETURNS:
+  //  - OK, if successful
+  //  - INVALID_ARGUMENT if data is not less than the previously added data.
+  libtextclassifier3::Status PrependDataToNotFull(
+      PostingListUsed* posting_list_used, const IntegerIndexData& data,
+      uint32_t offset) const;
+
+  // Returns either 0 (FULL state), sizeof(IntegerIndexData) (ALMOST_FULL state)
+  // or a byte offset between kSpecialDataSize and
+  // posting_list_used->size_in_bytes() (inclusive) (NOT_FULL state).
+  uint32_t GetStartByteOffset(const PostingListUsed* posting_list_used) const;
+
+  // Sets special data 0 to properly reflect what start byte offset is (see
+  // layout comment for further details).
+  //
+  // Returns false if offset > posting_list_used->size_in_bytes() or offset is
+  // in range (kSpecialDataSize, sizeof(IntegerIndexData)) or
+  // (sizeof(IntegerIndexData), 0). True, otherwise.
+  bool SetStartByteOffset(PostingListUsed* posting_list_used,
+                          uint32_t offset) const;
+
+  // Helper for MoveFrom/GetData/PopFrontData. Adds limit number of data to out
+  // or all data in the posting list if the posting list contains less than
+  // limit number of data. out can be NULL.
+  //
+  // NOTE: If called with limit=1, pop=true on a posting list that transitioned
+  // from NOT_FULL directly to FULL, GetDataInternal will not return the posting
+  // list to NOT_FULL. Instead it will leave it in a valid state, but it will be
+  // ALMOST_FULL.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::Status GetDataInternal(
+      const PostingListUsed* posting_list_used, uint32_t limit, bool pop,
+      std::vector<IntegerIndexData>* out) const;
+
+  // Retrieves the value stored in the index-th special data.
+  //
+  // REQUIRES:
+  //   0 <= index < kNumSpecialData.
+  //
+  // RETURNS:
+  //   - A valid SpecialData<IntegerIndexData>.
+  SpecialDataType GetSpecialData(const PostingListUsed* posting_list_used,
+                                 uint32_t index) const;
+
+  // Sets the value stored in the index-th special data to special_data.
+  //
+  // REQUIRES:
+  //   0 <= index < kNumSpecialData.
+  void SetSpecialData(PostingListUsed* posting_list_used, uint32_t index,
+                      const SpecialDataType& special_data) const;
+
+  // Prepends data to the memory region [offset - sizeof(IntegerIndexData),
+  // offset - 1] and returns the new beginning of the region.
+  //
+  // RETURNS:
+  //   - The new beginning of the padded region, if successful.
+  //   - INVALID_ARGUMENT if data will not fit (uncompressed) between
+  //       [kSpecialDataSize, offset - 1]
+  libtextclassifier3::StatusOr<uint32_t> PrependDataUncompressed(
+      PostingListUsed* posting_list_used, const IntegerIndexData& data,
+      uint32_t offset) const;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_SERIALIZER_H_
diff --git a/icing/index/numeric/posting-list-integer-index-serializer_test.cc b/icing/index/numeric/posting-list-integer-index-serializer_test.cc
new file mode 100644
index 0000000..716d1aa
--- /dev/null
+++ b/icing/index/numeric/posting-list-integer-index-serializer_test.cc
@@ -0,0 +1,491 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+
+#include <memory>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/testing/common-matchers.h"
+
+using testing::ElementsAre;
+using testing::ElementsAreArray;
+using testing::Eq;
+using testing::IsEmpty;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// TODO(b/259743562): [Optimization 2] update unit tests after applying
+//                    compression. Remember to create varint/delta encoding
+//                    overflow (which causes state NOT_FULL -> FULL directly
+//                    without ALMOST_FULL) test cases, including for
+//                    PopFrontData.
+
+TEST(PostingListIntegerIndexSerializerTest, GetMinPostingListSizeToFitNotNull) {
+  PostingListIntegerIndexSerializer serializer;
+
+  int size = 2551 * sizeof(IntegerIndexData);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  ASSERT_THAT(serializer.PrependData(
+                  &pl_used, IntegerIndexData(/*section_id=*/0,
+                                             /*document_id=*/0, /*key=*/2)),
+              IsOk());
+  EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used),
+              Eq(2 * sizeof(IntegerIndexData)));
+
+  ASSERT_THAT(serializer.PrependData(
+                  &pl_used, IntegerIndexData(/*section_id=*/0,
+                                             /*document_id=*/1, /*key=*/5)),
+              IsOk());
+  EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used),
+              Eq(3 * sizeof(IntegerIndexData)));
+}
+
+TEST(PostingListIntegerIndexSerializerTest,
+     GetMinPostingListSizeToFitAlmostFull) {
+  PostingListIntegerIndexSerializer serializer;
+
+  int size = 3 * sizeof(IntegerIndexData);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  ASSERT_THAT(serializer.PrependData(
+                  &pl_used, IntegerIndexData(/*section_id=*/0,
+                                             /*document_id=*/0, /*key=*/2)),
+              IsOk());
+  ASSERT_THAT(serializer.PrependData(
+                  &pl_used, IntegerIndexData(/*section_id=*/0,
+                                             /*document_id=*/1, /*key=*/5)),
+              IsOk());
+  EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(size));
+}
+
+TEST(PostingListIntegerIndexSerializerTest, GetMinPostingListSizeToFitFull) {
+  PostingListIntegerIndexSerializer serializer;
+
+  int size = 3 * sizeof(IntegerIndexData);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  ASSERT_THAT(serializer.PrependData(
+                  &pl_used, IntegerIndexData(/*section_id=*/0,
+                                             /*document_id=*/0, /*key=*/2)),
+              IsOk());
+  ASSERT_THAT(serializer.PrependData(
+                  &pl_used, IntegerIndexData(/*section_id=*/0,
+                                             /*document_id=*/1, /*key=*/5)),
+              IsOk());
+  ASSERT_THAT(serializer.PrependData(
+                  &pl_used, IntegerIndexData(/*section_id=*/0,
+                                             /*document_id=*/2, /*key=*/0)),
+              IsOk());
+  EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(size));
+}
+
+TEST(PostingListIntegerIndexSerializerTest, PrependDataNotFull) {
+  PostingListIntegerIndexSerializer serializer;
+
+  int size = 2551 * sizeof(IntegerIndexData);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  // Make used.
+  IntegerIndexData data0(/*section_id=*/0, /*document_id=*/0, /*key=*/2);
+  EXPECT_THAT(serializer.PrependData(&pl_used, data0), IsOk());
+  // Size = sizeof(uncompressed data0)
+  int expected_size = sizeof(IntegerIndexData);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used), IsOkAndHolds(ElementsAre(data0)));
+
+  IntegerIndexData data1(/*section_id=*/0, /*document_id=*/1, /*key=*/5);
+  EXPECT_THAT(serializer.PrependData(&pl_used, data1), IsOk());
+  // Size = sizeof(uncompressed data1)
+  //        + sizeof(uncompressed data0)
+  expected_size += sizeof(IntegerIndexData);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data1, data0)));
+
+  IntegerIndexData data2(/*section_id=*/0, /*document_id=*/2, /*key=*/0);
+  EXPECT_THAT(serializer.PrependData(&pl_used, data2), IsOk());
+  // Size = sizeof(uncompressed data2)
+  //        + sizeof(uncompressed data1)
+  //        + sizeof(uncompressed data0)
+  expected_size += sizeof(IntegerIndexData);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data2, data1, data0)));
+}
+
+TEST(PostingListIntegerIndexSerializerTest, PrependDataAlmostFull) {
+  PostingListIntegerIndexSerializer serializer;
+
+  int size = 4 * sizeof(IntegerIndexData);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  // Fill up the compressed region.
+  // Transitions:
+  // Adding data0: EMPTY -> NOT_FULL
+  // Adding data1: NOT_FULL -> NOT_FULL
+  IntegerIndexData data0(/*section_id=*/0, /*document_id=*/0, /*key=*/2);
+  IntegerIndexData data1(/*section_id=*/0, /*document_id=*/1, /*key=*/5);
+  EXPECT_THAT(serializer.PrependData(&pl_used, data0), IsOk());
+  EXPECT_THAT(serializer.PrependData(&pl_used, data1), IsOk());
+  int expected_size = 2 * sizeof(IntegerIndexData);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data1, data0)));
+
+  // Add one more data to transition NOT_FULL -> ALMOST_FULL
+  IntegerIndexData data2(/*section_id=*/0, /*document_id=*/2, /*key=*/0);
+  EXPECT_THAT(serializer.PrependData(&pl_used, data2), IsOk());
+  expected_size = 3 * sizeof(IntegerIndexData);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data2, data1, data0)));
+
+  // Add one more data to transition ALMOST_FULL -> FULL
+  IntegerIndexData data3(/*section_id=*/0, /*document_id=*/3, /*key=*/-3);
+  EXPECT_THAT(serializer.PrependData(&pl_used, data3), IsOk());
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data3, data2, data1, data0)));
+
+  // The posting list is FULL. Adding another data should fail.
+  IntegerIndexData data4(/*section_id=*/0, /*document_id=*/4, /*key=*/100);
+  EXPECT_THAT(serializer.PrependData(&pl_used, data4),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST(PostingListIntegerIndexSerializerTest, PrependDataPostingListUsedMinSize) {
+  PostingListIntegerIndexSerializer serializer;
+
+  int size = serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  // PL State: EMPTY
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
+  EXPECT_THAT(serializer.GetData(&pl_used), IsOkAndHolds(IsEmpty()));
+
+  // Add a data. PL should shift to ALMOST_FULL state
+  IntegerIndexData data0(/*section_id=*/0, /*document_id=*/0, /*key=*/2);
+  EXPECT_THAT(serializer.PrependData(&pl_used, data0), IsOk());
+  // Size = sizeof(uncompressed data0)
+  int expected_size = sizeof(IntegerIndexData);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used), IsOkAndHolds(ElementsAre(data0)));
+
+  // Add another data. PL should shift to FULL state.
+  IntegerIndexData data1(/*section_id=*/0, /*document_id=*/1, /*key=*/5);
+  EXPECT_THAT(serializer.PrependData(&pl_used, data1), IsOk());
+  // Size = sizeof(uncompressed data1) + sizeof(uncompressed data0)
+  expected_size += sizeof(IntegerIndexData);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data1, data0)));
+
+  // The posting list is FULL. Adding another data should fail.
+  IntegerIndexData data2(/*section_id=*/0, /*document_id=*/2, /*key=*/0);
+  EXPECT_THAT(serializer.PrependData(&pl_used, data2),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST(PostingListIntegerIndexSerializerTest,
+     PrependDataArrayDoNotKeepPrepended) {
+  PostingListIntegerIndexSerializer serializer;
+
+  int size = 6 * sizeof(IntegerIndexData);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<IntegerIndexData> data_in;
+  std::vector<IntegerIndexData> data_pushed;
+
+  // Add 3 data. The PL is in the empty state and should be able to fit all 3
+  // data without issue, transitioning the PL from EMPTY -> NOT_FULL.
+  data_in.push_back(
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2));
+  data_in.push_back(
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/1, /*key=*/5));
+  data_in.push_back(
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/2, /*key=*/0));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_in.size()));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() * sizeof(IntegerIndexData)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+  // Add 2 data. The PL should transition from NOT_FULL to ALMOST_FULL.
+  data_in.clear();
+  data_in.push_back(
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/3, /*key=*/-3));
+  data_in.push_back(
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/4, /*key=*/100));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_in.size()));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() * sizeof(IntegerIndexData)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+  // Add 2 data. The PL should remain ALMOST_FULL since the remaining space can
+  // only fit 1 data.
+  data_in.clear();
+  data_in.push_back(
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/5, /*key=*/-200));
+  data_in.push_back(IntegerIndexData(/*section_id=*/0, /*document_id=*/6,
+                                     /*key=*/2147483647));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(0));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() * sizeof(IntegerIndexData)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+  // Add 1 data. The PL should transition from ALMOST_FULL to FULL.
+  data_in.resize(1);
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_in.size()));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() * sizeof(IntegerIndexData)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+}
+
+TEST(PostingListIntegerIndexSerializerTest, PrependDataArrayKeepPrepended) {
+  PostingListIntegerIndexSerializer serializer;
+
+  int size = 6 * sizeof(IntegerIndexData);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<IntegerIndexData> data_in;
+  std::vector<IntegerIndexData> data_pushed;
+
+  // Add 3 data. The PL is in the empty state and should be able to fit all 3
+  // data without issue, transitioning the PL from EMPTY -> NOT_FULL.
+  data_in.push_back(
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2));
+  data_in.push_back(
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/1, /*key=*/5));
+  data_in.push_back(
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/2, /*key=*/0));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/true),
+      IsOkAndHolds(data_in.size()));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() * sizeof(IntegerIndexData)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+  // Add 4 data. The PL should prepend 3 data and transition from NOT_FULL to
+  // FULL.
+  data_in.clear();
+  data_in.push_back(
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/3, /*key=*/-3));
+  data_in.push_back(
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/4, /*key=*/100));
+  data_in.push_back(
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/5, /*key=*/-200));
+  data_in.push_back(IntegerIndexData(/*section_id=*/0, /*document_id=*/6,
+                                     /*key=*/2147483647));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/true),
+      IsOkAndHolds(3));
+  data_in.resize(3);
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() * sizeof(IntegerIndexData)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+}
+
+TEST(PostingListIntegerIndexSerializerTest, MoveFrom) {
+  PostingListIntegerIndexSerializer serializer;
+
+  int size = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used1,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<IntegerIndexData> data_arr1 = {
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2),
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/1, /*key=*/5)};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used1, data_arr1.data(), data_arr1.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr1.size()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used2,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<IntegerIndexData> data_arr2 = {
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/2, /*key=*/0),
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/3, /*key=*/-3),
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/4, /*key=*/100),
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/5, /*key=*/-200)};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used2, data_arr2.data(), data_arr2.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr2.size()));
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
+              IsOk());
+  EXPECT_THAT(
+      serializer.GetData(&pl_used2),
+      IsOkAndHolds(ElementsAreArray(data_arr1.rbegin(), data_arr1.rend())));
+  EXPECT_THAT(serializer.GetData(&pl_used1), IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PostingListIntegerIndexSerializerTest,
+     MoveToNullReturnsFailedPrecondition) {
+  PostingListIntegerIndexSerializer serializer;
+
+  int size = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<IntegerIndexData> data_arr = {
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2),
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/1, /*key=*/5)};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used, data_arr.data(), data_arr.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr.size()));
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used, /*src=*/nullptr),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/nullptr, /*src=*/&pl_used),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+}
+
+TEST(PostingListIntegerIndexSerializerTest, MoveToPostingListTooSmall) {
+  PostingListIntegerIndexSerializer serializer;
+
+  int size1 = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used1,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size1));
+  std::vector<IntegerIndexData> data_arr1 = {
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2),
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/1, /*key=*/5),
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/2, /*key=*/0),
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/3, /*key=*/-3),
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/4, /*key=*/100)};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used1, data_arr1.data(), data_arr1.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr1.size()));
+
+  int size2 = serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used2,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size2));
+  std::vector<IntegerIndexData> data_arr2 = {
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/5, /*key=*/-200)};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used2, data_arr2.data(), data_arr2.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr2.size()));
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used1),
+      IsOkAndHolds(ElementsAreArray(data_arr1.rbegin(), data_arr1.rend())));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used2),
+      IsOkAndHolds(ElementsAreArray(data_arr2.rbegin(), data_arr2.rend())));
+}
+
+TEST(PostingListIntegerIndexSerializerTest, PopFrontData) {
+  PostingListIntegerIndexSerializer serializer;
+
+  int size = 2 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<IntegerIndexData> data_arr = {
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/0, /*key=*/2),
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/1, /*key=*/5),
+      IntegerIndexData(/*section_id=*/0, /*document_id=*/2, /*key=*/0)};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used, data_arr.data(), data_arr.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr.size()));
+  ASSERT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+
+  // Now, pop the last data. The posting list should contain the first three
+  // data.
+  EXPECT_THAT(serializer.PopFrontData(&pl_used, /*num_data=*/1), IsOk());
+  data_arr.pop_back();
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/posting-list-used.cc b/icing/index/posting-list-used.cc
deleted file mode 100644
index 708b13b..0000000
--- a/icing/index/posting-list-used.cc
+++ /dev/null
@@ -1,613 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/index/posting-list-used.h"
-
-#include <algorithm>
-#include <cinttypes>
-#include <cstdint>
-#include <limits>
-
-#include "icing/absl_ports/canonical_errors.h"
-#include "icing/index/posting-list-utils.h"
-#include "icing/legacy/core/icing-string-util.h"
-#include "icing/legacy/index/icing-bit-util.h"
-#include "icing/util/status-macros.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-
-uint32_t GetScoreByteSize(const Hit &hit) {
-  return hit.has_score() ? sizeof(Hit::Score) : 0;
-}
-
-}  // namespace
-
-libtextclassifier3::StatusOr<PostingListUsed>
-PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
-    void *posting_list_buffer, uint32_t size_in_bytes) {
-  ICING_RETURN_ERROR_IF_NULL(posting_list_buffer);
-  if (!posting_list_utils::IsValidPostingListSize(size_in_bytes)) {
-    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-        "Requested posting list size %d is invalid!", size_in_bytes));
-  }
-  return PostingListUsed(posting_list_buffer, size_in_bytes);
-}
-
-libtextclassifier3::StatusOr<PostingListUsed>
-PostingListUsed::CreateFromUnitializedRegion(void *posting_list_buffer,
-                                             uint32_t size_in_bytes) {
-  ICING_ASSIGN_OR_RETURN(PostingListUsed posting_list_used,
-                         CreateFromPreexistingPostingListUsedRegion(
-                             posting_list_buffer, size_in_bytes));
-  posting_list_used.Clear();
-  return posting_list_used;
-}
-
-void PostingListUsed::Clear() { set_start_byte_offset(size_in_bytes_); }
-
-libtextclassifier3::Status PostingListUsed::MoveFrom(PostingListUsed *other) {
-  ICING_RETURN_ERROR_IF_NULL(other);
-  if (other->MinPostingListSizeToFit() > size_in_bytes_) {
-    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-        "other->MinPostingListSizeToFit %d must be larger than size %d.",
-        other->MinPostingListSizeToFit(), size_in_bytes_));
-  }
-
-  if (!IsPostingListValid()) {
-    return absl_ports::FailedPreconditionError(
-        "This posting list is in an invalid state and can't be used!");
-  }
-  if (other->IsPostingListValid()) {
-    return absl_ports::InvalidArgumentError(
-        "Cannot MoveFrom an invalid posting list!");
-  }
-
-  // Pop just enough hits that all of other's compressed hits fit in
-  // this posting_list's compressed area. Then we can memcpy that area.
-  std::vector<Hit> hits;
-  while (other->full() || other->almost_full() ||
-         (size_in_bytes_ - posting_list_utils::kSpecialHitsSize <
-          other->BytesUsed())) {
-    if (other->GetHitsInternal(/*limit=*/1, /*pop=*/true, &hits) != 1) {
-      return absl_ports::AbortedError(
-          "Unable to retrieve hits from other posting list.");
-    }
-  }
-
-  // memcpy the area and set up start byte offset.
-  Clear();
-  memcpy(posting_list_buffer_ + size_in_bytes_ - other->BytesUsed(),
-         other->posting_list_buffer_ + other->get_start_byte_offset(),
-         other->BytesUsed());
-  // Because we popped all hits from other outside of the compressed area and we
-  // guaranteed that other->BytesUsed is less than size_in_bytes_ -
-  // kSpecialHitSize. This is guaranteed to be a valid byte offset for the
-  // NOT_FULL state.
-  set_start_byte_offset(size_in_bytes_ - other->BytesUsed());
-
-  // Put back remaining hits.
-  for (size_t i = 0; i < hits.size(); i++) {
-    const Hit &hit = hits[hits.size() - i - 1];
-    // PrependHit can return either INVALID_ARGUMENT - if hit is invalid or not
-    // less than the previous hit - or RESOURCE_EXHAUSTED. RESOURCE_EXHAUSTED
-    // should be impossible because we've already assured that there is enough
-    // room above.
-    ICING_RETURN_IF_ERROR(PrependHit(hit));
-  }
-
-  other->Clear();
-  return libtextclassifier3::Status::OK;
-}
-
-uint32_t PostingListUsed::GetPadEnd(uint32_t offset) const {
-  Hit::Value pad;
-  uint32_t pad_end = offset;
-  while (pad_end < size_in_bytes_) {
-    size_t pad_len = VarInt::Decode(posting_list_buffer_ + pad_end, &pad);
-    if (pad != 0) {
-      // No longer a pad.
-      break;
-    }
-    pad_end += pad_len;
-  }
-  return pad_end;
-}
-
-void PostingListUsed::PadToEnd(uint32_t start, uint32_t end) {
-  if (end > size_in_bytes_) {
-    ICING_LOG(ERROR) << "Cannot pad a region that ends after size!";
-    return;
-  }
-  // In VarInt a value of 0 encodes to 0.
-  memset(posting_list_buffer_ + start, 0, end - start);
-}
-
-libtextclassifier3::Status PostingListUsed::PrependHitToAlmostFull(
-    const Hit &hit) {
-  // Get delta between first hit and the new hit. Try to fit delta
-  // in the padded area and put new hit at the special position 1.
-  Hit cur = get_special_hit(1);
-  if (cur.value() <= hit.value()) {
-    return absl_ports::InvalidArgumentError(
-        "Hit being prepended must be strictly less than the most recent Hit");
-  }
-  uint64_t delta = cur.value() - hit.value();
-  uint8_t delta_buf[VarInt::kMaxEncodedLen64];
-  size_t delta_len = VarInt::Encode(delta, delta_buf);
-  uint32_t cur_score_bytes = GetScoreByteSize(cur);
-
-  uint32_t pad_end = GetPadEnd(posting_list_utils::kSpecialHitsSize);
-
-  if (pad_end >=
-      posting_list_utils::kSpecialHitsSize + delta_len + cur_score_bytes) {
-    // Pad area has enough space for delta and score of existing hit
-    // (cur). Write delta at pad_end - delta_len - cur_score_bytes.
-    uint8_t *delta_offset =
-        posting_list_buffer_ + pad_end - delta_len - cur_score_bytes;
-    memcpy(delta_offset, delta_buf, delta_len);
-    // Now copy score.
-    Hit::Score score = cur.score();
-    uint8_t *score_offset = delta_offset + delta_len;
-    memcpy(score_offset, &score, cur_score_bytes);
-
-    // Now first hit is the new hit, at special position 1.
-    set_special_hit(1, hit);
-    set_start_byte_offset(sizeof(Hit));
-  } else {
-    // No space for delta. We put the new hit at special position 0
-    // and go to the full state.
-    set_special_hit(0, hit);
-  }
-  return libtextclassifier3::Status::OK;
-}
-
-void PostingListUsed::PrependHitToEmpty(const Hit &hit) {
-  // First hit to be added. Just add verbatim, no compression.
-  if (size_in_bytes_ == posting_list_utils::kSpecialHitsSize) {
-    set_special_hit(1, hit);
-    set_start_byte_offset(sizeof(Hit));
-  } else {
-    // Since this is the first hit, size != kSpecialHitsSize and
-    // size % sizeof(Hit) == 0, we know that there is room to fit 'hit' into
-    // the compressed region.
-    uint32_t offset = PrependHitUncompressed(hit, size_in_bytes_);
-    set_start_byte_offset(offset);
-  }
-}
-
-libtextclassifier3::Status PostingListUsed::PrependHitToNotFull(
-    const Hit &hit, uint32_t offset) {
-  // First hit in compressed area. It is uncompressed. See if delta
-  // between the first hit and new hit will still fit in the
-  // compressed area.
-  if (offset + sizeof(Hit::Value) > size_in_bytes_) {
-    // The first hit in the compressed region *should* be uncompressed, but
-    // somehow there isn't enough room between offset and the end of the
-    // compressed area to fit an uncompressed hit. This should NEVER happen.
-    return absl_ports::FailedPreconditionError(
-        "Posting list is in an invalid state.");
-  }
-  Hit::Value cur_value;
-  memcpy(&cur_value, posting_list_buffer_ + offset, sizeof(Hit::Value));
-  if (cur_value <= hit.value()) {
-    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-        "Hit %d being prepended must be strictly less than the most recent "
-        "Hit %d",
-        hit.value(), cur_value));
-  }
-  uint64_t delta = cur_value - hit.value();
-  uint8_t delta_buf[VarInt::kMaxEncodedLen64];
-  size_t delta_len = VarInt::Encode(delta, delta_buf);
-  uint32_t hit_score_bytes = GetScoreByteSize(hit);
-
-  // offset now points to one past the end of the first hit.
-  offset += sizeof(Hit::Value);
-  if (posting_list_utils::kSpecialHitsSize + sizeof(Hit::Value) + delta_len +
-          hit_score_bytes <=
-      offset) {
-    // Enough space for delta in compressed area.
-
-    // Prepend delta.
-    offset -= delta_len;
-    memcpy(posting_list_buffer_ + offset, delta_buf, delta_len);
-
-    // Prepend new hit with (possibly) its score. We know that there is room
-    // for 'hit' because of the if statement above.
-    offset = PrependHitUncompressed(hit, offset);
-    // offset is guaranteed to be valid here. The if above will guarantee that
-    // offset >= kSpecialHitSize and < size_in_bytes_ because the if ensures
-    // that there is enough room between offset and kSpecialHitSize to fit the
-    // delta of the previous hit, any score and the uncompressed hit.
-    set_start_byte_offset(offset);
-  } else if (posting_list_utils::kSpecialHitsSize + delta_len <= offset) {
-    // Only have space for delta. The new hit must be put in special
-    // position 1.
-
-    // Prepend delta.
-    offset -= delta_len;
-    memcpy(posting_list_buffer_ + offset, delta_buf, delta_len);
-
-    // Prepend pad.
-    PadToEnd(posting_list_utils::kSpecialHitsSize, offset);
-
-    // Put new hit in special position 1.
-    set_special_hit(1, hit);
-
-    // State almost_full.
-    set_start_byte_offset(sizeof(Hit));
-  } else {
-    // Very rare case where delta is larger than sizeof(Hit::Value)
-    // (i.e. varint delta encoding expanded required storage). We
-    // move first hit to special position 1 and put new hit in
-    // special position 0.
-    Hit cur(cur_value);
-    if (cur.has_score()) {
-      cur = Hit(cur_value, ReadScore(offset));
-      offset += sizeof(Hit::Score);
-    }
-    PadToEnd(posting_list_utils::kSpecialHitsSize, offset);
-    set_special_hit(1, cur);
-    set_special_hit(0, hit);
-  }
-  return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::Status PostingListUsed::PrependHit(const Hit &hit) {
-  static_assert(sizeof(Hit::Value) <= sizeof(uint64_t),
-                "Hit::Value cannot be larger than 8 bytes because the delta "
-                "must be able to fit in 8 bytes.");
-  if (!hit.is_valid()) {
-    return absl_ports::InvalidArgumentError("Cannot prepend an invalid hit!");
-  }
-  if (!IsPostingListValid()) {
-    return absl_ports::FailedPreconditionError(
-        "This PostingListUsed is in an invalid state and can't add any hits!");
-  }
-
-  if (full()) {
-    // State full: no space left.
-    return absl_ports::ResourceExhaustedError("No more room for hits");
-  } else if (almost_full()) {
-    return PrependHitToAlmostFull(hit);
-  } else if (empty()) {
-    PrependHitToEmpty(hit);
-    return libtextclassifier3::Status::OK;
-  } else {
-    uint32_t offset = get_start_byte_offset();
-    return PrependHitToNotFull(hit, offset);
-  }
-}
-
-std::vector<Hit> PostingListUsed::GetHits() const {
-  std::vector<Hit> hits_out;
-  GetHits(&hits_out);
-  return hits_out;
-}
-
-void PostingListUsed::GetHits(std::vector<Hit> *hits_out) const {
-  GetHitsInternal(/*limit=*/std::numeric_limits<uint32_t>::max(), /*pop=*/false,
-                  hits_out);
-}
-
-void PostingListUsed::PopFrontHits(uint32_t num_hits) {
-  if (num_hits == 1 && full()) {
-    // The PL is in full status which means that we save 2 uncompressed hits in
-    // the 2 special postions. But full status may be reached by 2 different
-    // statuses.
-    // (1) In "almost full" status
-    // +-----------------+----------------+-------+-----------------+
-    // |Hit::kInvalidVal |1st hit         |(pad)  |(compressed) hits|
-    // +-----------------+----------------+-------+-----------------+
-    // When we prepend another hit, we can only put it at the special
-    // position 0. And we get a full PL
-    // +-----------------+----------------+-------+-----------------+
-    // |new 1st hit      |original 1st hit|(pad)  |(compressed) hits|
-    // +-----------------+----------------+-------+-----------------+
-    // (2) In "not full" status
-    // +-----------------+----------------+------+-------+------------------+
-    // |hits-start-offset|Hit::kInvalidVal|(pad) |1st hit|(compressed) hits |
-    // +-----------------+----------------+------+-------+------------------+
-    // When we prepend another hit, we can reach any of the 3 following
-    // scenarios:
-    // (2.1) not full
-    // if the space of pad and original 1st hit can accommodate the new 1st hit
-    // and the encoded delta value.
-    // +-----------------+----------------+------+-----------+-----------------+
-    // |hits-start-offset|Hit::kInvalidVal|(pad) |new 1st hit|(compressed) hits|
-    // +-----------------+----------------+------+-----------+-----------------+
-    // (2.2) almost full
-    // If the space of pad and original 1st hit cannot accommodate the new 1st
-    // hit and the encoded delta value but can accommodate the encoded delta
-    // value only. We can put the new 1st hit at special position 1.
-    // +-----------------+----------------+-------+-----------------+
-    // |Hit::kInvalidVal |new 1st hit     |(pad)  |(compressed) hits|
-    // +-----------------+----------------+-------+-----------------+
-    // (2.3) full
-    // In very rare case, it cannot even accommodate only the encoded delta
-    // value. we can move the original 1st hit into special position 1 and the
-    // new 1st hit into special position 0. This may happen because we use
-    // VarInt encoding method which may make the encoded value longer (about
-    // 4/3 times of original)
-    // +-----------------+----------------+-------+-----------------+
-    // |new 1st hit      |original 1st hit|(pad)  |(compressed) hits|
-    // +-----------------+----------------+-------+-----------------+
-    // Suppose now the PL is full. But we don't know whether it arrived to
-    // this status from "not full" like (2.3) or from "almost full" like (1).
-    // We'll return to "almost full" status like (1) if we simply pop the new
-    // 1st hit but we want to make the prepending operation "reversible". So
-    // there should be some way to return to "not full" if possible. A simple
-    // way to do it is to pop 2 hits out of the PL to status "almost full" or
-    // "not full".  And add the original 1st hit back. We can return to the
-    // correct original statuses of (2.1) or (1). This makes our prepending
-    // operation reversible.
-    std::vector<Hit> out;
-
-    // Popping 2 hits should never fail because we've just ensured that the
-    // posting list is in the FULL state.
-    GetHitsInternal(/*limit=*/2, /*pop=*/true, &out);
-
-    // PrependHit should never fail because out[1] is a valid hit less than
-    // previous hits in the posting list and because there's no way that the
-    // posting list could run out of room because it previously stored this hit
-    // AND another hit.
-    PrependHit(out[1]);
-  } else if (num_hits > 0) {
-    GetHitsInternal(/*limit=*/num_hits, /*pop=*/true, nullptr);
-  }
-}
-
-uint32_t PostingListUsed::GetHitsInternal(uint32_t limit, bool pop,
-                                          std::vector<Hit> *out) const {
-  // Put current uncompressed val here.
-  Hit::Value val = Hit::kInvalidValue;
-  uint32_t offset = get_start_byte_offset();
-  uint32_t count = 0;
-
-  // First traverse the first two special positions.
-  while (count < limit && offset < posting_list_utils::kSpecialHitsSize) {
-    Hit hit = get_special_hit(offset / sizeof(Hit));
-    val = hit.value();
-    if (out != nullptr) {
-      out->push_back(hit);
-    }
-    offset += sizeof(Hit);
-    count++;
-  }
-
-  // If special position 1 was set then we need to skip padding.
-  if (val != Hit::kInvalidValue &&
-      offset == posting_list_utils::kSpecialHitsSize) {
-    offset = GetPadEnd(offset);
-  }
-
-  while (count < limit && offset < size_in_bytes_) {
-    if (val == Hit::kInvalidValue) {
-      // First hit is in compressed area. Put that in val.
-      memcpy(&val, posting_list_buffer_ + offset, sizeof(Hit::Value));
-      offset += sizeof(Hit::Value);
-    } else {
-      // Now we have delta encoded subsequent hits. Decode and push.
-      uint64_t delta;
-      offset += VarInt::Decode(posting_list_buffer_ + offset, &delta);
-      val += delta;
-    }
-    Hit hit(val);
-    if (hit.has_score()) {
-      hit = Hit(val, ReadScore(offset));
-      offset += sizeof(Hit::Score);
-    }
-    if (out != nullptr) {
-      out->push_back(hit);
-    }
-    count++;
-  }
-
-  if (pop) {
-    PostingListUsed *mutable_this = const_cast<PostingListUsed *>(this);
-    // Modify the posting list so that we pop all hits actually
-    // traversed.
-    if (offset >= posting_list_utils::kSpecialHitsSize &&
-        offset < size_in_bytes_) {
-      // In the compressed area. Pop and reconstruct. offset/val is
-      // the last traversed hit, which we must discard. So move one
-      // more forward.
-      uint64_t delta;
-      offset += VarInt::Decode(posting_list_buffer_ + offset, &delta);
-      val += delta;
-
-      // Now val is the first hit of the new posting list.
-      if (posting_list_utils::kSpecialHitsSize + sizeof(Hit::Value) <= offset) {
-        // val fits in compressed area. Simply copy.
-        offset -= sizeof(Hit::Value);
-        memcpy(posting_list_buffer_ + offset, &val, sizeof(Hit::Value));
-      } else {
-        // val won't fit in compressed area. Also see if there is a
-        // score.
-        Hit hit(val);
-        if (hit.has_score()) {
-          hit = Hit(val, ReadScore(offset));
-        }
-        mutable_this->set_special_hit(1, hit);
-        mutable_this->PadToEnd(posting_list_utils::kSpecialHitsSize, offset);
-        offset = sizeof(Hit);
-      }
-    }
-    // offset is guaranteed to be valid. It falls into one of four scenarios:
-    // Scenario 1: the above if was false because offset is not < size_in_bytes_
-    //   In this case, offset must be == size_in_bytes_ because we reached
-    //   offset by unwinding hits on the posting list.
-    // Scenario 2: offset is < kSpecialHitSize
-    //   In this case, offset is guaranteed to be either 0 or sizeof(Hit)
-    //   because offset is incremented by sizeof(Hit) within the first while
-    //   loop.
-    // Scenario 3: offset is within the compressed region and the new first hit
-    //   in the posting list (the value that 'val' holds) will fit as an
-    //   uncompressed hit in the compressed region. The resulting offset from
-    //   decompressing val must be >= kSpecialHitSize because otherwise we'd be
-    //   in Scenario 4
-    // Scenario 4: offset is within the compressed region, but the new first hit
-    //   in the posting list is too large to fit as an uncompressed hit in the
-    //   in the compressed region. Therefore, it must be stored in a special hit
-    //   and offset will be sizeof(Hit).
-    mutable_this->set_start_byte_offset(offset);
-  }
-
-  return count;
-}
-
-Hit PostingListUsed::get_special_hit(uint32_t index) const {
-  static_assert(sizeof(Hit::Value) >= sizeof(uint32_t), "HitTooSmall");
-  if (index >= posting_list_utils::kSpecialHitsSize / sizeof(Hit)) {
-    ICING_LOG(ERROR) << "Special hits only exist at indices 0 and 1";
-    return Hit();
-  }
-  Hit val;
-  memcpy(&val, posting_list_buffer_ + index * sizeof(val), sizeof(val));
-  return val;
-}
-
-void PostingListUsed::set_special_hit(uint32_t index, const Hit &val) {
-  if (index >= posting_list_utils::kSpecialHitsSize / sizeof(Hit)) {
-    ICING_LOG(ERROR) << "Special hits only exist at indices 0 and 1";
-    return;
-  }
-  memcpy(posting_list_buffer_ + index * sizeof(val), &val, sizeof(val));
-}
-
-uint32_t PostingListUsed::BytesUsed() const {
-  // The special hits will be included if they represent actual hits. If they
-  // represent the hit offset or the invalid hit sentinel, they are not
-  // included.
-  return size_in_bytes_ - get_start_byte_offset();
-}
-
-uint32_t PostingListUsed::MinPostingListSizeToFit() const {
-  if (full() || almost_full()) {
-    // If in either the FULL state or ALMOST_FULL state, this posting list *is*
-    // the minimum size posting list that can fit these hits. So just return the
-    // size of the posting list.
-    return size_in_bytes_;
-  }
-
-  // In NOT_FULL status BytesUsed contains no special hits. The minimum sized
-  // posting list that would be guaranteed to fit these hits would be
-  // ALMOST_FULL, with kInvalidHit in special_hit(0), the uncompressed Hit in
-  // special_hit(1) and the n compressed hits in the compressed region.
-  // BytesUsed contains one uncompressed Hit and n compressed hits. Therefore,
-  // fitting these hits into a posting list would require BytesUsed plus one
-  // extra hit.
-  return BytesUsed() + sizeof(Hit);
-}
-
-bool PostingListUsed::IsPostingListValid() const {
-  if (almost_full()) {
-    // Special Hit 1 should hold a Hit.
-    if (!get_special_hit(1).is_valid()) {
-      ICING_LOG(ERROR)
-          << "Both special hits cannot be invalid at the same time.";
-      return false;
-    }
-  } else if (!full()) {
-    // NOT_FULL. Special Hit 0 should hold a valid offset.
-    if (get_special_hit(0).value() > size_in_bytes_ ||
-        get_special_hit(0).value() < posting_list_utils::kSpecialHitsSize) {
-      ICING_LOG(ERROR) << "Hit: " << get_special_hit(0).value()
-                       << " size: " << size_in_bytes_
-                       << " sp size: " << posting_list_utils::kSpecialHitsSize;
-      return false;
-    }
-  }
-  return true;
-}
-
-uint32_t PostingListUsed::get_start_byte_offset() const {
-  if (full()) {
-    return 0;
-  } else if (almost_full()) {
-    return sizeof(Hit);
-  } else {
-    // NOT_FULL
-    return get_special_hit(0).value();
-  }
-}
-
-void PostingListUsed::set_start_byte_offset(uint32_t offset) {
-  if (offset > size_in_bytes_) {
-    ICING_LOG(ERROR) << "offset cannot be a value greater than size "
-                     << size_in_bytes_ << ". offset is " << offset << ".";
-    return;
-  }
-  if (offset < posting_list_utils::kSpecialHitsSize && offset > sizeof(Hit)) {
-    ICING_LOG(ERROR) << "offset cannot be a value between (" << sizeof(Hit)
-                     << ", " << posting_list_utils::kSpecialHitsSize
-                     << "). offset is " << offset << ".";
-    return;
-  }
-  if (offset < sizeof(Hit) && offset != 0) {
-    ICING_LOG(ERROR) << "offset cannot be a value between (0, " << sizeof(Hit)
-                     << "). offset is " << offset << ".";
-    return;
-  }
-  if (offset >= posting_list_utils::kSpecialHitsSize) {
-    // not_full state.
-    set_special_hit(0, Hit(offset));
-    set_special_hit(1, Hit());
-  } else if (offset == sizeof(Hit)) {
-    // almost_full state.
-    set_special_hit(0, Hit());
-  }
-  // Nothing to do for the FULL state - the offset isn't actually stored
-  // anywhere and both special hits hold valid hits.
-}
-
-uint32_t PostingListUsed::PrependHitUncompressed(const Hit &hit,
-                                                 uint32_t offset) {
-  if (hit.has_score()) {
-    if (offset < posting_list_utils::kSpecialHitsSize + sizeof(Hit)) {
-      ICING_LOG(ERROR) << "Not enough room to prepend Hit at offset " << offset
-                       << ".";
-      return offset;
-    }
-    offset -= sizeof(Hit);
-    memcpy(posting_list_buffer_ + offset, &hit, sizeof(Hit));
-  } else {
-    if (offset < posting_list_utils::kSpecialHitsSize + sizeof(Hit::Value)) {
-      ICING_LOG(ERROR) << "Not enough room to prepend Hit::Value at offset "
-                       << offset << ".";
-      return offset;
-    }
-    offset -= sizeof(Hit::Value);
-    Hit::Value val = hit.value();
-    memcpy(posting_list_buffer_ + offset, &val, sizeof(Hit::Value));
-  }
-  return offset;
-}
-
-Hit::Score PostingListUsed::ReadScore(uint32_t offset) const {
-  if (offset + sizeof(Hit::Score) > size_in_bytes_) {
-    ICING_LOG(FATAL)
-        << "offset " << offset
-        << " must not point past the end of the posting list of size "
-        << size_in_bytes_ << ".";
-  }
-  Hit::Score score;
-  memcpy(&score, posting_list_buffer_ + offset, sizeof(Hit::Score));
-  return score;
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/index/posting-list-used.h b/icing/index/posting-list-used.h
deleted file mode 100644
index 492435b..0000000
--- a/icing/index/posting-list-used.h
+++ /dev/null
@@ -1,321 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_INDEX_POSTING_LIST_USED_H_
-#define ICING_INDEX_POSTING_LIST_USED_H_
-
-#include <string.h>
-#include <sys/mman.h>
-
-#include <algorithm>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/index/hit/hit.h"
-#include "icing/index/posting-list-utils.h"
-#include "icing/util/logging.h"
-
-namespace icing {
-namespace lib {
-
-// A posting list with hits in it. Layout described in comments in
-// posting-list-used.cc.
-class PostingListUsed {
- public:
-  // Creates a PostingListUsed that points to a buffer of size_in_bytes bytes.
-  // 'Preexisting' means that posting_list_buffer was previously modified by
-  // another instance of PostingListUsed.
-  //
-  // Caller owns the hits buffer and must not free it while using a
-  // PostingListUsed.
-  //
-  // RETURNS:
-  //   - A valid PostingListUsed if successful
-  //   - INVALID_ARGUMENT if size_in_bytes < min_posting_list_size()
-  //       || size_in_bytes % sizeof(Hit) != 0.
-  //   - FAILED_PRECONDITION if posting_list_buffer is null
-  static libtextclassifier3::StatusOr<PostingListUsed>
-  CreateFromPreexistingPostingListUsedRegion(void *posting_list_buffer,
-                                             uint32_t size_in_bytes);
-
-  // Creates a PostingListUsed that points to a buffer of size_in_bytes bytes
-  // and initializes the content of the buffer so that the returned
-  // PostingListUsed is empty.
-  //
-  // Caller owns the posting_list_buffer buffer and must not free it while using
-  // a PostingListUsed.
-  //
-  // RETURNS:
-  //   - A valid PostingListUsed if successful
-  //   - INVALID_ARGUMENT if size_in_bytes < min_posting_list_size()
-  //       || size_in_bytes % sizeof(Hit) != 0.
-  //   - FAILED_PRECONDITION if posting_list_buffer is null
-  static libtextclassifier3::StatusOr<PostingListUsed>
-  CreateFromUnitializedRegion(void *posting_list_buffer,
-                              uint32_t size_in_bytes);
-
-  // Move contents from another posting list. Clears other.
-  //
-  // RETURNS:
-  //   - OK, if successful
-  //   - INVALID_ARGUMENT if 'other' is not valid or 'other' is too large to fit
-  //       in 'this'.
-  //   - FAILED_PRECONDITION if 'this' posting list is in a corrupted state.
-  libtextclassifier3::Status MoveFrom(PostingListUsed *other);
-
-  // Min size of posting list that can fit these used bytes. (See
-  // MoveFrom.)
-  uint32_t MinPostingListSizeToFit() const;
-
-  // Prepend a hit to the posting list.
-  // RETURNS:
-  //   - INVALID_ARGUMENT if !hit.is_valid() or if hit is not less than the
-  //   previously added hit.
-  //   - RESOURCE_EXHAUSTED if there is no more room to add hit to the posting
-  //   list.
-  libtextclassifier3::Status PrependHit(const Hit &hit);
-
-  // Prepend hits to the posting list. Hits should be sorted in
-  // descending order (as defined by the less than operator for Hit)
-  //
-  // Returns the number of hits that could be prepended to the posting list. If
-  // keep_prepended is true, whatever could be prepended is kept, otherwise the
-  // posting list is left in its original state.
-  template <class T, Hit (*GetHit)(const T &)>
-  uint32_t PrependHitArray(const T *array, uint32_t num_hits,
-                           bool keep_prepended);
-
-  // Return hits sorted by the reverse order of prepending.
-  std::vector<Hit> GetHits() const;
-
-  // Same as GetHits but appends hits to hits_out.
-  void GetHits(std::vector<Hit> *hits_out) const;
-
-  // Undo the last num_hits hits prepended. If num_hits > number of
-  // hits we clear all hits.
-  void PopFrontHits(uint32_t num_hits);
-
-  // Returns bytes used by actual hits.
-  uint32_t BytesUsed() const;
-
- private:
-  // Posting list layout formats:
-  //
-  // not_full
-  //
-  // +-----------------+----------------+-------+-----------------+
-  // |hits-start-offset|Hit::kInvalidVal|xxxxxxx|(compressed) hits|
-  // +-----------------+----------------+-------+-----------------+
-  //
-  // almost_full
-  //
-  // +-----------------+----------------+-------+-----------------+
-  // |Hit::kInvalidVal |1st hit         |(pad)  |(compressed) hits|
-  // +-----------------+----------------+-------+-----------------+
-  //
-  // full()
-  //
-  // +-----------------+----------------+-------+-----------------+
-  // |1st hit          |2nd hit         |(pad)  |(compressed) hits|
-  // +-----------------+----------------+-------+-----------------+
-  //
-  // The first two uncompressed hits also implicitly encode information about
-  // the size of the compressed hits region.
-  //
-  // 1. If the posting list is NOT_FULL, then
-  // posting_list_buffer_[0] contains the byte offset of the start of the
-  // compressed hits - and, thus, the size of the compressed hits region is
-  // size_in_bytes - posting_list_buffer_[0].
-  //
-  // 2. If posting list is ALMOST_FULL or FULL, then the compressed hits region
-  // starts somewhere between [kSpecialHitsSize, kSpecialHitsSize + sizeof(Hit)
-  // - 1] and ends at size_in_bytes - 1.
-  //
-  // Hit scores are stored after the hit value, compressed or
-  // uncompressed. For the first two special hits, we always have a
-  // space for the score. For hits in the compressed area, we only have
-  // the score following the hit value of hit.has_score() is true. This
-  // allows good compression in the common case where hits don't have a
-  // specific score.
-  //
-  // EXAMPLE
-  // Posting list storage. Posting list size: 20 bytes
-  // EMPTY!
-  // +--bytes 0-4--+----- 5-9 ------+---------------- 10-19 -----------------+
-  // |     20      |Hit::kInvalidVal|                 0x000                  |
-  // +-------------+----------------+----------------+-----------------------+
-  //
-  // Add Hit 0x07FFF998 (DocumentId = 12, SectionId = 3, Flags = 0)
-  // NOT FULL!
-  // +--bytes 0-4--+----- 5-9 ------+----- 10-15 -----+-------- 16-19 -------+
-  // |     16      |Hit::kInvalidVal|      0x000      |       0x07FFF998     |
-  // +-------------+----------------+-----------------+----------------------+
-  //
-  // Add Hit 0x07FFF684 (DocumentId = 18, SectionId = 0, Flags = 4, Score=125)
-  // (Hit 0x07FFF998 - Hit 0x07FFF684 = 788)
-  // +--bytes 0-4--+----- 5-9 ------+-- 10-12 --+-- 13-16 --+- 17 -+-- 18-19 --+
-  // |      13     |Hit::kInvalidVal|   0x000   | 0x07FFF684| 125  |    788    |
-  // +-------------+----------------+-----------+-----------+------+-----------+
-  //
-  // Add Hit 0x07FFF4D2 (DocumentId = 22, SectionId = 10, Flags = 2)
-  // (Hit 0x07FFF684 - Hit 0x07FFF4D2 = 434)
-  // +--bytes 0-4--+--- 5-9 ----+-- 10 --+-- 11-14 -+- 15-16 -+- 17 -+- 18-19 -+
-  // |      9      |Hit::kInvVal|  0x00  |0x07FFF4D2|   434   | 125  |   788   |
-  // +-------------+------------+--------+----------+---------+------+---------+
-  //
-  // Add Hit 0x07FFF40E (DocumentId = 23, SectionId = 1, Flags = 6, Score = 87)
-  // (Hit 0x07FFF684 - Hit 0x07FFF4D2 = 196)
-  // ALMOST FULL!
-  // +--bytes 0-4-+---- 5-9 ----+- 10-12 -+- 13-14 -+- 15-16 -+- 17 -+- 18-19 -+
-  // |Hit::kInvVal|0x07FFF40E,87|  0x000  |    196  |   434   |  125 |   788   |
-  // +-------------+------------+---------+---------+---------+------+---------+
-  //
-  // Add Hit 0x07FFF320 (DocumentId = 27, SectionId = 4, Flags = 0)
-  // FULL!
-  // +--bytes 0-4--+---- 5-9 ----+- 10-13 -+-- 14-15 -+- 16-17 -+- 18 -+- 19-20
-  // -+ | 0x07FFF320  |0x07FFF40E,87|  0x000  |    196   |   434   |  125 | 788
-  // |
-  // +-------------+-------------+---------+----------+---------+------+---------+
-  PostingListUsed(void *posting_list_buffer, uint32_t size_in_bytes)
-      : posting_list_buffer_(static_cast<uint8_t *>(posting_list_buffer)),
-        size_in_bytes_(size_in_bytes) {}
-
-  // Helpers to determine what state the posting list is in.
-  bool full() const {
-    return get_special_hit(0).is_valid() && get_special_hit(1).is_valid();
-  }
-  bool almost_full() const { return !get_special_hit(0).is_valid(); }
-  bool empty() const {
-    return get_special_hit(0).value() == size_in_bytes_ &&
-           !get_special_hit(1).is_valid();
-  }
-
-  // Returns false if both special hits are invalid or if the offset value
-  // stored in the special hit is less than kSpecialHitsSize or greater than
-  // size_in_bytes_. Returns true, otherwise.
-  bool IsPostingListValid() const;
-
-  // Prepend hit to a posting list that is in the ALMOST_FULL state.
-  // RETURNS:
-  //  - OK, if successful
-  //  - INVALID_ARGUMENT if hit is not less than the previously added hit.
-  libtextclassifier3::Status PrependHitToAlmostFull(const Hit &hit);
-
-  // Prepend hit to a posting list that is in the EMPTY state. This will always
-  // succeed because there are no pre-existing hits and no validly constructed
-  // posting list could fail to fit one hit.
-  void PrependHitToEmpty(const Hit &hit);
-
-  // Prepend hit to a posting list that is in the NOT_FULL state.
-  // RETURNS:
-  //  - OK, if successful
-  //  - INVALID_ARGUMENT if hit is not less than the previously added hit.
-  libtextclassifier3::Status PrependHitToNotFull(const Hit &hit,
-                                                 uint32_t offset);
-
-  // Reset contents to an empty posting list. This *must* be called if the
-  // posting_list_buffer_ region is uninitialized.
-  void Clear();
-
-  // Returns either 0 (full state), sizeof(Hit) (almost_full state) or
-  // a byte offset between kSpecialHitsSize and size_in_bytes_ (inclusive)
-  // (not_full state).
-  uint32_t get_start_byte_offset() const;
-
-  // Sets the special hits to properly reflect what offset is (see layout
-  // comment for further details).
-  // If offset > size_in_bytes_ or offset is (kSpecialHitsSize, sizeof(Hit)) or
-  // offset is (sizeof(Hit), 0), then offset is considered invalid and this
-  // function has no effect.
-  void set_start_byte_offset(uint32_t offset);
-
-  // Manipulate padded areas. We never store the same hit value twice
-  // so a delta of 0 is a pad byte.
-
-  // Returns offset of first non-pad byte.
-  uint32_t GetPadEnd(uint32_t offset) const;
-
-  // Fill padding between offset start and offset end with 0s. If end >
-  // size_in_bytes_, this function has no effect.
-  void PadToEnd(uint32_t start, uint32_t end);
-
-  // Helper for AppendHits/PopFrontHits. Returns number actually traversed (also
-  // the size of out if non-NULL), which will always be equal to 'limit' unless
-  // there are fewer than 'limit' hits in the posting list. out can be NULL.
-  //
-  // NOTE: If called with limit=1, pop=true on a posting list that transitioned
-  // from NOT_FULL directly to FULL, GetHitsInternal will not return the posting
-  // list to NOT_FULL. Instead it will leave it in a valid state, but it will be
-  // ALMOST_FULL.
-  uint32_t GetHitsInternal(uint32_t limit, bool pop,
-                           std::vector<Hit> *out) const;
-
-  // Retrieves the value stored in the index-th special hit. If index is not
-  // less than kSpecialHitSize / sizeof(Hit), returns Hit::kInvalidValue.
-  Hit get_special_hit(uint32_t index) const;
-
-  // Sets the value stored in the index-th special hit to val. If index is not
-  // less than kSpecialHitSize / sizeof(Hit), this has no effect.
-  void set_special_hit(uint32_t index, const Hit &val);
-
-  // Prepends hit to the memory region [offset - sizeof(Hit), offset] and
-  // returns the new beginning of the padded region.
-  //
-  // If offset - kSpecialHitSize < sizeof(Hit/Hit::Value), then this function
-  // has no effect.
-  uint32_t PrependHitUncompressed(const Hit &hit, uint32_t offset);
-
-  // Reads the score located at offset and returns it. Callers are responsible
-  // for ensuring that the bytes starting at offset actually represent a score.
-  //
-  // REQUIRES: offset + sizeof(Hit::Score) < size_in_bytes_
-  // REQUIRES enforced by CHECK
-  Hit::Score ReadScore(uint32_t offset) const;
-
-  // A byte array of size size_in_bytes_ containing encoded hits for this
-  // posting list.
-  uint8_t *posting_list_buffer_;  // does not own!
-  uint32_t size_in_bytes_;
-};
-
-// Inlined functions. Implementation details below. Avert eyes!
-template <class T, Hit (*GetHit)(const T &)>
-uint32_t PostingListUsed::PrependHitArray(const T *array, uint32_t num_hits,
-                                          bool keep_prepended) {
-  if (!IsPostingListValid()) {
-    return 0;
-  }
-
-  // Prepend hits working backwards from array[num_hits - 1].
-  uint32_t i;
-  for (i = 0; i < num_hits; ++i) {
-    if (!PrependHit(GetHit(array[num_hits - i - 1])).ok()) {
-      break;
-    }
-  }
-  if (i != num_hits && !keep_prepended) {
-    // Didn't fit. Undo everything and check that we have the same offset as
-    // before. PopFrontHits guarantees that it will remove all 'i' hits so long
-    // as there are at least 'i' hits in the posting list, which we know there
-    // are.
-    PopFrontHits(i);
-  }
-  return i;
-}
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_INDEX_POSTING_LIST_USED_H_
diff --git a/icing/index/posting-list-used_test.cc b/icing/index/posting-list-used_test.cc
deleted file mode 100644
index a0e9514..0000000
--- a/icing/index/posting-list-used_test.cc
+++ /dev/null
@@ -1,537 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/index/posting-list-used.h"
-
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include <algorithm>
-#include <cstdint>
-#include <deque>
-#include <iterator>
-#include <memory>
-#include <random>
-#include <string>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "icing/index/posting-list-utils.h"
-#include "icing/legacy/index/icing-bit-util.h"
-#include "icing/schema/section.h"
-#include "icing/store/document-id.h"
-#include "icing/testing/common-matchers.h"
-
-using std::min;
-using std::reverse;
-using std::vector;
-using testing::ElementsAre;
-using testing::ElementsAreArray;
-using testing::IsEmpty;
-
-namespace icing {
-namespace lib {
-
-struct HitElt {
-  HitElt() = default;
-  explicit HitElt(const Hit &hit_in) : hit(hit_in) {}
-
-  static Hit get_hit(const HitElt &hit_elt) {
-    return hit_elt.hit;
-  }
-
-  Hit hit;
-};
-
-// Produces a vector with num_hits HitElts. When delta encoded each hit should
-// be 1 byte with a 1 byte Hit::Score.
-std::vector<HitElt> CreateHits(DocumentId start_docid, int num_hits) {
-  std::vector<HitElt> hits;
-  hits.reserve(num_hits);
-  while (num_hits--) {
-    Hit::Score score = (start_docid % 7) + 1;
-    SectionId section_id = (start_docid + 2) % (kMaxSectionId + 1);
-    hits.emplace_back(Hit(section_id, start_docid, score));
-    ++start_docid;
-  }
-  std::reverse(hits.begin(), hits.end());
-  return hits;
-}
-
-Hit CreateHit(Hit last_hit, int desired_byte_length) {
-  Hit hit =
-      (last_hit.section_id() == kMinSectionId)
-          ? Hit(kMaxSectionId, last_hit.document_id() + 1, last_hit.score())
-          : Hit(last_hit.section_id() - 1, last_hit.document_id(),
-                last_hit.score());
-  uint8_t buf[5];
-  while (VarInt::Encode(last_hit.value() - hit.value(), buf) <
-         desired_byte_length) {
-    hit = (hit.section_id() == kMinSectionId)
-              ? Hit(kMaxSectionId, hit.document_id() + 1, hit.score())
-              : Hit(hit.section_id() - 1, hit.document_id(), hit.score());
-  }
-  return hit;
-}
-
-DocumentId InvertDocumentId(DocumentId document_id) {
-  return kMaxDocumentId - document_id;
-}
-
-TEST(PostingListTest, PostingListUsedPrependHitNotFull) {
-  static const int kNumHits = 2551;
-  static const size_t kHitsSize = kNumHits * sizeof(Hit);
-
-  std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      PostingListUsed pl_used,
-      PostingListUsed::CreateFromUnitializedRegion(
-          static_cast<void *>(hits_buf.get()), kHitsSize));
-
-  // Make used.
-  Hit hit0(/*section_id=*/0, 0, /*score=*/56);
-  pl_used.PrependHit(hit0);
-  // Size = sizeof(uncompressed hit0)
-  int expected_size = sizeof(Hit);
-  EXPECT_LE(pl_used.BytesUsed(), expected_size);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit0));
-
-  Hit hit1(/*section_id=*/0, 1, Hit::kMaxHitScore);
-  pl_used.PrependHit(hit1);
-  // Size = sizeof(uncompressed hit1)
-  //        + sizeof(hit0-hit1) + sizeof(hit0::score)
-  expected_size += 2 + sizeof(Hit::Score);
-  EXPECT_LE(pl_used.BytesUsed(), expected_size);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit1, hit0));
-
-  Hit hit2(/*section_id=*/0, 2, /*score=*/56);
-  pl_used.PrependHit(hit2);
-  // Size = sizeof(uncompressed hit2)
-  //        + sizeof(hit1-hit2)
-  //        + sizeof(hit0-hit1) + sizeof(hit0::score)
-  expected_size += 2;
-  EXPECT_LE(pl_used.BytesUsed(), expected_size);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit2, hit1, hit0));
-
-  Hit hit3(/*section_id=*/0, 3, Hit::kMaxHitScore);
-  pl_used.PrependHit(hit3);
-  // Size = sizeof(uncompressed hit3)
-  //        + sizeof(hit2-hit3) + sizeof(hit2::score)
-  //        + sizeof(hit1-hit2)
-  //        + sizeof(hit0-hit1) + sizeof(hit0::score)
-  expected_size += 2 + sizeof(Hit::Score);
-  EXPECT_LE(pl_used.BytesUsed(), expected_size);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit3, hit2, hit1, hit0));
-}
-
-TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) {
-  constexpr int kHitsSize = 2 * posting_list_utils::min_posting_list_size();
-  std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      PostingListUsed pl_used,
-      PostingListUsed::CreateFromUnitializedRegion(
-          static_cast<void *>(hits_buf.get()), kHitsSize));
-
-  // Fill up the compressed region.
-  // Transitions:
-  // Adding hit0: EMPTY -> NOT_FULL
-  // Adding hit1: NOT_FULL -> NOT_FULL
-  // Adding hit2: NOT_FULL -> NOT_FULL
-  Hit hit0(/*section_id=*/0, 0, Hit::kMaxHitScore);
-  Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2);
-  Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2);
-  ICING_EXPECT_OK(pl_used.PrependHit(hit0));
-  ICING_EXPECT_OK(pl_used.PrependHit(hit1));
-  ICING_EXPECT_OK(pl_used.PrependHit(hit2));
-  // Size used will be 2+2+4=8 bytes
-  int expected_size = sizeof(Hit::Value) + 2 + 2;
-  EXPECT_LE(pl_used.BytesUsed(), expected_size);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit2, hit1, hit0));
-
-  // Add one more hit to transition NOT_FULL -> ALMOST_FULL
-  Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/3);
-  ICING_EXPECT_OK(pl_used.PrependHit(hit3));
-  // Compressed region would be 2+2+3+4=11 bytes, but the compressed region is
-  // only 10 bytes. So instead, the posting list will transition to ALMOST_FULL.
-  // The in-use compressed region will actually shrink from 8 bytes to 7 bytes
-  // because the uncompressed version of hit2 will be overwritten with the
-  // compressed delta of hit2. hit3 will be written to one of the special hits.
-  // Because we're in ALMOST_FULL, the expected size is the size of the pl minus
-  // the one hit used to mark the posting list as ALMOST_FULL.
-  expected_size = kHitsSize - sizeof(Hit);
-  EXPECT_LE(pl_used.BytesUsed(), expected_size);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit3, hit2, hit1, hit0));
-
-  // Add one more hit to transition ALMOST_FULL -> ALMOST_FULL
-  Hit hit4 = CreateHit(hit3, /*desired_byte_length=*/2);
-  ICING_EXPECT_OK(pl_used.PrependHit(hit4));
-  // There are currently 7 bytes in use in the compressed region. hit3 will have
-  // a 2-byte delta. That delta will fit in the compressed region (which will
-  // now have 9 bytes in use), hit4 will be placed in one of the special hits
-  // and the posting list will remain in ALMOST_FULL.
-  EXPECT_LE(pl_used.BytesUsed(), expected_size);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit4, hit3, hit2, hit1, hit0));
-
-  // Add one more hit to transition ALMOST_FULL -> FULL
-  Hit hit5 = CreateHit(hit4, /*desired_byte_length=*/2);
-  ICING_EXPECT_OK(pl_used.PrependHit(hit5));
-  // There are currently 9 bytes in use in the compressed region. hit4 will have
-  // a 2-byte delta which will not fit in the compressed region. So hit4 will
-  // remain in one of the special hits and hit5 will occupy the other, making
-  // the posting list FULL.
-  EXPECT_LE(pl_used.BytesUsed(), kHitsSize);
-  EXPECT_THAT(pl_used.GetHits(),
-              ElementsAre(hit5, hit4, hit3, hit2, hit1, hit0));
-
-  // The posting list is FULL. Adding another hit should fail.
-  Hit hit6 = CreateHit(hit5, /*desired_byte_length=*/1);
-  EXPECT_THAT(pl_used.PrependHit(hit6),
-              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
-}
-
-TEST(PostingListTest, PostingListUsedMinSize) {
-  std::unique_ptr<char[]> hits_buf =
-      std::make_unique<char[]>(posting_list_utils::min_posting_list_size());
-
-  ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used,
-                             PostingListUsed::CreateFromUnitializedRegion(
-                                 static_cast<void *>(hits_buf.get()),
-                                 posting_list_utils::min_posting_list_size()));
-  // PL State: EMPTY
-  EXPECT_LE(pl_used.BytesUsed(), 0);
-  EXPECT_THAT(pl_used.GetHits(), IsEmpty());
-
-  // Add a hit, PL should shift to ALMOST_FULL state
-  Hit hit0(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/false,
-           /*is_prefix_hit=*/true);
-  ICING_EXPECT_OK(pl_used.PrependHit(hit0));
-  // Size = sizeof(uncompressed hit0)
-  int expected_size = sizeof(Hit);
-  EXPECT_LE(pl_used.BytesUsed(), expected_size);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit0));
-
-  // Add the smallest hit possible - no score and a delta of 1. PL should shift
-  // to FULL state.
-  Hit hit1(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/true,
-           /*is_prefix_hit=*/false);
-  ICING_EXPECT_OK(pl_used.PrependHit(hit1));
-  // Size = sizeof(uncompressed hit1) + sizeof(uncompressed hit0)
-  expected_size += sizeof(Hit);
-  EXPECT_LE(pl_used.BytesUsed(), expected_size);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit1, hit0));
-
-  // Try to add the smallest hit possible. Should fail
-  Hit hit2(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/false,
-           /*is_prefix_hit=*/false);
-  EXPECT_THAT(pl_used.PrependHit(hit2),
-              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
-  EXPECT_LE(pl_used.BytesUsed(), expected_size);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit1, hit0));
-}
-
-TEST(PostingListTest, PostingListPrependHitArrayMinSizePostingList) {
-  constexpr int kFinalSize = 1025;
-  std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kFinalSize);
-
-  // Min Size = 10
-  int size = posting_list_utils::min_posting_list_size();
-  ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used,
-                             PostingListUsed::CreateFromUnitializedRegion(
-                                 static_cast<void *>(hits_buf.get()), size));
-
-  std::vector<HitElt> hits_in;
-  hits_in.emplace_back(Hit(1, 0, Hit::kMaxHitScore));
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
-  std::reverse(hits_in.begin(), hits_in.end());
-
-  // Add five hits. The PL is in the empty state and an empty min size PL can
-  // only fit two hits. So PrependHitArray should fail.
-  uint32_t num_can_prepend = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
-      &hits_in[0], hits_in.size(), false);
-  EXPECT_EQ(num_can_prepend, 2);
-
-  int can_fit_hits = num_can_prepend;
-  // The PL has room for 2 hits. We should be able to add them without any
-  // problem, transitioning the PL from EMPTY -> ALMOST_FULL -> FULL
-  const HitElt *hits_in_ptr = hits_in.data() + (hits_in.size() - 2);
-  num_can_prepend = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
-      hits_in_ptr, can_fit_hits, false);
-  EXPECT_EQ(num_can_prepend, can_fit_hits);
-  EXPECT_EQ(size, pl_used.BytesUsed());
-  std::deque<Hit> hits_pushed;
-  std::transform(hits_in.rbegin(),
-                 hits_in.rend() - hits_in.size() + can_fit_hits,
-                 std::front_inserter(hits_pushed), HitElt::get_hit);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed));
-}
-
-TEST(PostingListTest, PostingListPrependHitArrayPostingList) {
-  // Size = 30
-  int size = 3 * posting_list_utils::min_posting_list_size();
-  std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(size);
-  ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used,
-                             PostingListUsed::CreateFromUnitializedRegion(
-                                 static_cast<void *>(hits_buf.get()), size));
-
-  std::vector<HitElt> hits_in;
-  hits_in.emplace_back(Hit(1, 0, Hit::kMaxHitScore));
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
-  std::reverse(hits_in.begin(), hits_in.end());
-  // The last hit is uncompressed and the four before it should only take one
-  // byte. Total use = 8 bytes.
-  // ----------------------
-  // 29     delta(Hit #1)
-  // 28     delta(Hit #2)
-  // 27     delta(Hit #3)
-  // 26     delta(Hit #4)
-  // 25-22  Hit #5
-  // 21-10  <unused>
-  // 9-5    kSpecialHit
-  // 4-0    Offset=22
-  // ----------------------
-  int byte_size = sizeof(Hit::Value) + hits_in.size() - 1;
-
-  // Add five hits. The PL is in the empty state and should be able to fit all
-  // five hits without issue, transitioning the PL from EMPTY -> NOT_FULL.
-  uint32_t num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
-      &hits_in[0], hits_in.size(), false);
-  EXPECT_EQ(num_could_fit, hits_in.size());
-  EXPECT_EQ(byte_size, pl_used.BytesUsed());
-  std::deque<Hit> hits_pushed;
-  std::transform(hits_in.rbegin(), hits_in.rend(),
-                 std::front_inserter(hits_pushed), HitElt::get_hit);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed));
-
-  Hit first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/1);
-  hits_in.clear();
-  hits_in.emplace_back(first_hit);
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2));
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2));
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/3));
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2));
-  std::reverse(hits_in.begin(), hits_in.end());
-  // Size increased by the deltas of these hits (1+2+1+2+3+2) = 11 bytes
-  // ----------------------
-  // 29     delta(Hit #1)
-  // 28     delta(Hit #2)
-  // 27     delta(Hit #3)
-  // 26     delta(Hit #4)
-  // 25     delta(Hit #5)
-  // 24-23  delta(Hit #6)
-  // 22     delta(Hit #7)
-  // 21-20  delta(Hit #8)
-  // 19-17  delta(Hit #9)
-  // 16-15  delta(Hit #10)
-  // 14-11  Hit #11
-  // 10  <unused>
-  // 9-5    kSpecialHit
-  // 4-0    Offset=22
-  // ----------------------
-  byte_size += 11;
-
-  // Add these 6 hits. The PL is currently in the NOT_FULL state and should
-  // remain in the NOT_FULL state.
-  num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
-      &hits_in[0], hits_in.size(), false);
-  EXPECT_EQ(num_could_fit, hits_in.size());
-  EXPECT_EQ(byte_size, pl_used.BytesUsed());
-  // All hits from hits_in were added.
-  std::transform(hits_in.rbegin(), hits_in.rend(),
-                 std::front_inserter(hits_pushed), HitElt::get_hit);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed));
-
-  first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/3);
-  hits_in.clear();
-  hits_in.emplace_back(first_hit);
-  // ----------------------
-  // 29     delta(Hit #1)
-  // 28     delta(Hit #2)
-  // 27     delta(Hit #3)
-  // 26     delta(Hit #4)
-  // 25     delta(Hit #5)
-  // 24-23  delta(Hit #6)
-  // 22     delta(Hit #7)
-  // 21-20  delta(Hit #8)
-  // 19-17  delta(Hit #9)
-  // 16-15  delta(Hit #10)
-  // 14-12  delta(Hit #11)
-  // 11-10  <unused>
-  // 9-5    Hit #12
-  // 4-0    kSpecialHit
-  // ----------------------
-  byte_size = 25;
-
-  // Add this 1 hit. The PL is currently in the NOT_FULL state and should
-  // transition to the ALMOST_FULL state - even though there is still some
-  // unused space.
-  num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
-      &hits_in[0], hits_in.size(), false);
-  EXPECT_EQ(num_could_fit, hits_in.size());
-  EXPECT_EQ(byte_size, pl_used.BytesUsed());
-  // All hits from hits_in were added.
-  std::transform(hits_in.rbegin(), hits_in.rend(),
-                 std::front_inserter(hits_pushed), HitElt::get_hit);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed));
-
-  first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/1);
-  hits_in.clear();
-  hits_in.emplace_back(first_hit);
-  hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2));
-  std::reverse(hits_in.begin(), hits_in.end());
-  // ----------------------
-  // 29     delta(Hit #1)
-  // 28     delta(Hit #2)
-  // 27     delta(Hit #3)
-  // 26     delta(Hit #4)
-  // 25     delta(Hit #5)
-  // 24-23  delta(Hit #6)
-  // 22     delta(Hit #7)
-  // 21-20  delta(Hit #8)
-  // 19-17  delta(Hit #9)
-  // 16-15  delta(Hit #10)
-  // 14-12  delta(Hit #11)
-  // 11     delta(Hit #12)
-  // 10     <unused>
-  // 9-5    Hit #13
-  // 4-0    Hit #14
-  // ----------------------
-
-  // Add these 2 hits. The PL is currently in the ALMOST_FULL state. Adding the
-  // first hit should keep the PL in ALMOST_FULL because the delta between Hit
-  // #12 and Hit #13 (1 byte) can fit in the unused area (2 bytes). Adding the
-  // second hit should tranisition to the FULL state because the delta between
-  // Hit #13 and Hit #14 (2 bytes) is larger than the remaining unused area
-  // (1 byte).
-  num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
-      &hits_in[0], hits_in.size(), false);
-  EXPECT_EQ(num_could_fit, hits_in.size());
-  EXPECT_EQ(size, pl_used.BytesUsed());
-  // All hits from hits_in were added.
-  std::transform(hits_in.rbegin(), hits_in.rend(),
-                 std::front_inserter(hits_pushed), HitElt::get_hit);
-  EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed));
-}
-
-TEST(PostingListTest, PostingListPrependHitArrayTooManyHits) {
-  static constexpr int kNumHits = 128;
-  static constexpr int kDeltaSize = 1;
-  static constexpr int kScoreSize = 1;
-  static constexpr size_t kHitsSize =
-      ((kNumHits * (kDeltaSize + kScoreSize)) / 5) * 5;
-
-  std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize);
-
-  // Create an array with one too many hits
-  vector<HitElt> hits_in_too_many = CreateHits(0, kNumHits + 1);
-  ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used,
-                             PostingListUsed::CreateFromUnitializedRegion(
-                                 static_cast<void *>(hits_buf.get()),
-                                 posting_list_utils::min_posting_list_size()));
-
-  // PrependHitArray should fail because hits_in_too_many is far too large for
-  // the minimum size pl.
-  uint32_t num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
-      &hits_in_too_many[0], hits_in_too_many.size(), false);
-  ASSERT_LT(num_could_fit, hits_in_too_many.size());
-  ASSERT_EQ(pl_used.BytesUsed(), 0);
-  ASSERT_THAT(pl_used.GetHits(), testing::IsEmpty());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      pl_used, PostingListUsed::CreateFromUnitializedRegion(
-                   static_cast<void *>(hits_buf.get()), kHitsSize));
-  // PrependHitArray should fail because hits_in_too_many is one hit too large
-  // for this pl.
-  num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
-      &hits_in_too_many[0], hits_in_too_many.size(), false);
-  ASSERT_LT(num_could_fit, hits_in_too_many.size());
-  ASSERT_EQ(pl_used.BytesUsed(), 0);
-  ASSERT_THAT(pl_used.GetHits(), testing::IsEmpty());
-}
-
-TEST(PostingListTest, PostingListStatusJumpFromNotFullToFullAndBack) {
-  const uint32_t pl_size = 3 * sizeof(Hit);
-  char hits_buf[pl_size];
-  ICING_ASSERT_OK_AND_ASSIGN(
-      PostingListUsed pl,
-      PostingListUsed::CreateFromUnitializedRegion(hits_buf, pl_size));
-  ICING_ASSERT_OK(pl.PrependHit(Hit(Hit::kInvalidValue - 1, 0)));
-  uint32_t bytes_used = pl.BytesUsed();
-  // Status not full.
-  CHECK_LE(bytes_used, pl_size - posting_list_utils::kSpecialHitsSize);
-  ICING_ASSERT_OK(pl.PrependHit(Hit(Hit::kInvalidValue >> 2, 0)));
-  // Status should jump to full directly.
-  CHECK_EQ(pl.BytesUsed(), pl_size);
-  pl.PopFrontHits(1);
-  // Status should return to not full as before.
-  CHECK_EQ(pl.BytesUsed(), bytes_used);
-}
-
-TEST(PostingListTest, DeltaOverflow) {
-  char hits_buf[1000];
-  ICING_ASSERT_OK_AND_ASSIGN(
-      PostingListUsed pl,
-      PostingListUsed::CreateFromUnitializedRegion(hits_buf, 4 * sizeof(Hit)));
-
-  static const Hit::Value kOverflow[4] = {
-    Hit::kInvalidValue >> 2,
-    (Hit::kInvalidValue >> 2) * 2,
-    (Hit::kInvalidValue >> 2) * 3,
-    Hit::kInvalidValue - 1,
-  };
-
-  // Fit at least 4 ordinary values.
-  for (Hit::Value v = 0; v < 4; v++) {
-    ICING_EXPECT_OK(pl.PrependHit(Hit(4 - v)));
-  }
-
-  // Cannot fit 4 overflow values.
-  ICING_ASSERT_OK_AND_ASSIGN(pl, PostingListUsed::CreateFromUnitializedRegion(
-                                     hits_buf, 4 * sizeof(Hit)));
-  ICING_EXPECT_OK(pl.PrependHit(Hit(kOverflow[3])));
-  ICING_EXPECT_OK(pl.PrependHit(Hit(kOverflow[2])));
-
-  // Can fit only one more.
-  ICING_EXPECT_OK(pl.PrependHit(Hit(kOverflow[1])));
-  EXPECT_THAT(pl.PrependHit(Hit(kOverflow[0])),
-              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/index/property-existence-indexing-handler.cc b/icing/index/property-existence-indexing-handler.cc
new file mode 100644
index 0000000..504f380
--- /dev/null
+++ b/icing/index/property-existence-indexing-handler.cc
@@ -0,0 +1,127 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/property-existence-indexing-handler.h"
+
+#include <memory>
+#include <string>
+#include <unordered_set>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/index.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+void ConstructPropertyExistenceMetaToken(
+    const std::string& current_path, const DocumentProto& document,
+    std::unordered_set<std::string>& meta_tokens) {
+  for (const PropertyProto& property : document.properties()) {
+    std::string new_path = current_path;
+    if (!new_path.empty()) {
+      new_path.append(".");
+    }
+    new_path.append(property.name());
+    for (const DocumentProto& nested_document : property.document_values()) {
+      ConstructPropertyExistenceMetaToken(new_path, nested_document,
+                                          meta_tokens);
+    }
+    // A string property exists if and only if there is at least one non-empty
+    // string in the property.
+    bool has_string_value = false;
+    for (const std::string& string_value : property.string_values()) {
+      if (!string_value.empty()) {
+        has_string_value = true;
+        break;
+      }
+    }
+    if (has_string_value || property.int64_values_size() > 0 ||
+        property.double_values_size() > 0 ||
+        property.boolean_values_size() > 0 ||
+        property.bytes_values_size() > 0 ||
+        property.document_values_size() > 0) {
+      meta_tokens.insert(
+          absl_ports::StrCat(kPropertyExistenceTokenPrefix, new_path));
+    }
+  }
+}
+
+}  // namespace
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<PropertyExistenceIndexingHandler>>
+PropertyExistenceIndexingHandler::Create(const Clock* clock, Index* index) {
+  ICING_RETURN_ERROR_IF_NULL(clock);
+  ICING_RETURN_ERROR_IF_NULL(index);
+
+  return std::unique_ptr<PropertyExistenceIndexingHandler>(
+      new PropertyExistenceIndexingHandler(*clock, index));
+}
+
+libtextclassifier3::Status PropertyExistenceIndexingHandler::Handle(
+    const TokenizedDocument& tokenized_document, DocumentId document_id,
+    PutDocumentStatsProto* put_document_stats) {
+  std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
+
+  libtextclassifier3::Status status;
+  // Section id is irrelevant to metadata tokens that is used to support
+  // property existence check.
+  Index::Editor editor =
+      index_.Edit(document_id, /*section_id=*/0, TermMatchType::EXACT_ONLY,
+                  /*namespace_id=*/0);
+  std::unordered_set<std::string> meta_tokens;
+  ConstructPropertyExistenceMetaToken(
+      /*current_path=*/"", tokenized_document.document(), meta_tokens);
+  for (const std::string& meta_token : meta_tokens) {
+    status = editor.BufferTerm(meta_token.c_str());
+    if (!status.ok()) {
+      // We've encountered a failure. Bail out. We'll mark this doc as deleted
+      // and signal a failure to the client.
+      ICING_LOG(WARNING) << "Failed to buffer term in lite lexicon due to: "
+                         << status.error_message();
+      break;
+    }
+  }
+
+  if (status.ok()) {
+    // Add all the metadata tokens to support property existence check.
+    status = editor.IndexAllBufferedTerms();
+    if (!status.ok()) {
+      ICING_LOG(WARNING) << "Failed to add hits in lite index due to: "
+                         << status.error_message();
+    }
+  }
+
+  if (put_document_stats != nullptr) {
+    put_document_stats->set_metadata_term_index_latency_ms(
+        index_timer->GetElapsedMilliseconds());
+    put_document_stats->mutable_tokenization_stats()
+        ->set_num_metadata_tokens_indexed(meta_tokens.size());
+  }
+
+  return status;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/property-existence-indexing-handler.h b/icing/index/property-existence-indexing-handler.h
new file mode 100644
index 0000000..55c0bb4
--- /dev/null
+++ b/icing/index/property-existence-indexing-handler.h
@@ -0,0 +1,86 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_PROPERTY_EXISTENCE_INDEXING_HANDLER_H_
+#define ICING_INDEX_PROPERTY_EXISTENCE_INDEXING_HANDLER_H_
+
+#include <memory>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/index.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+inline constexpr std::string_view kPropertyExistenceTokenPrefix =
+    "\xFF_HAS_\xFF";
+
+// This class is meant to be owned by TermIndexingHandler. Instead of using this
+// handler directly, callers should use TermIndexingHandler to index documents.
+//
+// This handler will not check or set last_added_document_id of the index, and
+// it will not merge or sort the lite index either.
+class PropertyExistenceIndexingHandler {
+ public:
+  // Creates a PropertyExistenceIndexingHandler instance which does not take
+  // ownership of any input components. All pointers must refer to valid objects
+  // that outlive the created PropertyExistenceIndexingHandler instance.
+  //
+  // Returns:
+  //   - A PropertyExistenceIndexingHandler instance on success
+  //   - FAILED_PRECONDITION_ERROR if any of the input pointer is null
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<PropertyExistenceIndexingHandler>>
+  Create(const Clock* clock, Index* index);
+
+  ~PropertyExistenceIndexingHandler() = default;
+
+  // Handles the property existence indexing process: add hits for metadata
+  // tokens used to index property existence.
+  //
+  // For example, if the passed in document has string properties "propA",
+  // "propB" and "propC.propD", and document property "propC", this handler will
+  // add the following metadata token to the index.
+  // - kPropertyExistenceTokenPrefix + "propA"
+  // - kPropertyExistenceTokenPrefix + "propB"
+  // - kPropertyExistenceTokenPrefix + "propC"
+  // - kPropertyExistenceTokenPrefix + "propC.propD"
+  //
+  /// Returns:
+  //   - OK on success
+  //   - RESOURCE_EXHAUSTED_ERROR if the index is full and can't add anymore
+  //     content.
+  //   - INTERNAL_ERROR if any other errors occur.
+  libtextclassifier3::Status Handle(const TokenizedDocument& tokenized_document,
+                                    DocumentId document_id,
+                                    PutDocumentStatsProto* put_document_stats);
+
+ private:
+  explicit PropertyExistenceIndexingHandler(const Clock& clock, Index* index)
+      : clock_(clock), index_(*index) {}
+
+  const Clock& clock_;  // Does not own.
+  Index& index_;        // Does not own.
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_PROPERTY_EXISTENCE_INDEXING_HANDLER_H_
diff --git a/icing/index/property-existence-indexing-handler_test.cc b/icing/index/property-existence-indexing-handler_test.cc
new file mode 100644
index 0000000..e42fbc3
--- /dev/null
+++ b/icing/index/property-existence-indexing-handler_test.cc
@@ -0,0 +1,524 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/property-existence-indexing-handler.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::IsTrue;
+using ::testing::Test;
+
+static constexpr std::string_view kTreeType = "TreeNode";
+static constexpr std::string_view kPropertyName = "name";
+static constexpr std::string_view kPropertyValue = "value";
+static constexpr std::string_view kPropertySubtrees = "subtrees";
+
+static constexpr std::string_view kValueType = "Value";
+static constexpr std::string_view kPropertyBody = "body";
+static constexpr std::string_view kPropertyTimestamp = "timestamp";
+static constexpr std::string_view kPropertyScore = "score";
+
+class PropertyExistenceIndexingHandlerTest : public Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    base_dir_ = GetTestTempDir() + "/icing_test";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    index_dir_ = base_dir_ + "/index";
+    schema_store_dir_ = base_dir_ + "/schema_store";
+    document_store_dir_ = base_dir_ + "/document_store";
+
+    language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        lang_segmenter_,
+        language_segmenter_factory::Create(std::move(segmenter_options)));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        normalizer_,
+        normalizer_factory::Create(
+            /*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
+
+    ASSERT_THAT(
+        filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+        IsTrue());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kTreeType)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyName)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kPropertyValue)
+                            .SetDataTypeDocument(
+                                kValueType, /*index_nested_properties=*/true)
+                            .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kPropertySubtrees)
+                            .SetDataTypeDocument(
+                                kTreeType, /*index_nested_properties=*/false)
+                            .SetCardinality(CARDINALITY_REPEATED)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kValueType)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyBody)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_REPEATED))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyTimestamp)
+                                     .SetDataType(TYPE_INT64)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyScore)
+                                     .SetDataType(TYPE_DOUBLE)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/true));
+
+    ASSERT_TRUE(
+        filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult doc_store_create_result,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store_.get(),
+                              /*force_recovery_and_revalidate_documents=*/false,
+                              /*namespace_id_fingerprint=*/false,
+                              /*pre_mapping_fbv=*/false,
+                              /*use_persistent_hash_map=*/false,
+                              PortableFileBackedProtoLog<
+                                  DocumentWrapper>::kDeflateCompressionLevel,
+                              /*initialize_stats=*/nullptr));
+    document_store_ = std::move(doc_store_create_result.document_store);
+  }
+
+  void TearDown() override {
+    document_store_.reset();
+    schema_store_.reset();
+    normalizer_.reset();
+    lang_segmenter_.reset();
+
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  IcingFilesystem icing_filesystem_;
+  FakeClock fake_clock_;
+  std::string base_dir_;
+  std::string index_dir_;
+  std::string schema_store_dir_;
+  std::string document_store_dir_;
+
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+  std::unique_ptr<Normalizer> normalizer_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+};
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+QueryExistence(Index* index, std::string_view property_path) {
+  return index->GetIterator(
+      absl_ports::StrCat(kPropertyExistenceTokenPrefix, property_path),
+      /*term_start_index=*/0,
+      /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+      TermMatchType::EXACT_ONLY,
+      /*need_hit_term_frequency=*/false);
+}
+
+std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
+  std::vector<DocHitInfo> infos;
+  while (iterator->Advance().ok()) {
+    infos.push_back(iterator->doc_hit_info());
+  }
+  return infos;
+}
+
+TEST_F(PropertyExistenceIndexingHandlerTest, HandlePropertyExistence) {
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/1024 * 8);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Index> index,
+      Index::Create(options, &filesystem_, &icing_filesystem_));
+
+  // Create a document with every property.
+  DocumentProto document0 =
+      DocumentBuilder()
+          .SetKey("icing", "uri0")
+          .SetSchema(std::string(kValueType))
+          .AddStringProperty(std::string(kPropertyBody), "foo")
+          .AddInt64Property(std::string(kPropertyTimestamp), 123)
+          .AddDoubleProperty(std::string(kPropertyScore), 456.789)
+          .Build();
+  // Create a document with missing body.
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("icing", "uri1")
+          .SetSchema(std::string(kValueType))
+          .AddInt64Property(std::string(kPropertyTimestamp), 123)
+          .AddDoubleProperty(std::string(kPropertyScore), 456.789)
+          .Build();
+  // Create a document with missing timestamp.
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("icing", "uri2")
+          .SetSchema(std::string(kValueType))
+          .AddStringProperty(std::string(kPropertyBody), "foo")
+          .AddDoubleProperty(std::string(kPropertyScore), 456.789)
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document0,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document0)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document1,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document2,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id0,
+      document_store_->Put(tokenized_document0.document()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(tokenized_document1.document()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(tokenized_document2.document()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PropertyExistenceIndexingHandler> handler,
+      PropertyExistenceIndexingHandler::Create(&fake_clock_, index.get()));
+
+  // Handle all docs
+  EXPECT_THAT(handler->Handle(tokenized_document0, document_id0,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+  EXPECT_THAT(handler->Handle(tokenized_document1, document_id1,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+  EXPECT_THAT(handler->Handle(tokenized_document2, document_id2,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+
+  // Get all documents that have "body".
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             QueryExistence(index.get(), kPropertyBody));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id2, std::vector<SectionId>{0}),
+                  EqualsDocHitInfo(document_id0, std::vector<SectionId>{0})));
+
+  // Get all documents that have "timestamp".
+  ICING_ASSERT_OK_AND_ASSIGN(itr,
+                             QueryExistence(index.get(), kPropertyTimestamp));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id1, std::vector<SectionId>{0}),
+                  EqualsDocHitInfo(document_id0, std::vector<SectionId>{0})));
+
+  // Get all documents that have "score".
+  ICING_ASSERT_OK_AND_ASSIGN(itr, QueryExistence(index.get(), kPropertyScore));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id2, std::vector<SectionId>{0}),
+                  EqualsDocHitInfo(document_id1, std::vector<SectionId>{0}),
+                  EqualsDocHitInfo(document_id0, std::vector<SectionId>{0})));
+}
+
+TEST_F(PropertyExistenceIndexingHandlerTest, HandleNestedPropertyExistence) {
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/1024 * 8);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Index> index,
+      Index::Create(options, &filesystem_, &icing_filesystem_));
+
+  // Create a complex nested root_document with the following property paths.
+  // - name
+  // - subtrees
+  // - subtrees.name
+  // - subtrees.value
+  // - subtrees.value.timestamp
+  // - subtrees.subtrees
+  // - subtrees.subtrees.name
+  // - subtrees.subtrees.value
+  // - subtrees.subtrees.value.body
+  // - subtrees.subtrees.value.score
+  DocumentProto leaf_document =
+      DocumentBuilder()
+          .SetKey("icing", "uri")
+          .SetSchema(std::string(kTreeType))
+          .AddStringProperty(std::string(kPropertyName), "leaf")
+          .AddDocumentProperty(
+              std::string(kPropertyValue),
+              DocumentBuilder()
+                  .SetKey("icing", "uri")
+                  .SetSchema(std::string(kValueType))
+                  .AddStringProperty(std::string(kPropertyBody), "foo")
+                  .AddDoubleProperty(std::string(kPropertyScore), 456.789)
+                  .Build())
+          .Build();
+  DocumentProto intermediate_document1 =
+      DocumentBuilder()
+          .SetKey("icing", "uri")
+          .SetSchema(std::string(kTreeType))
+          .AddStringProperty(std::string(kPropertyName), "intermediate1")
+          .AddDocumentProperty(
+              std::string(kPropertyValue),
+              DocumentBuilder()
+                  .SetKey("icing", "uri")
+                  .SetSchema(std::string(kValueType))
+                  .AddInt64Property(std::string(kPropertyTimestamp), 123)
+                  .Build())
+          .AddDocumentProperty(std::string(kPropertySubtrees), leaf_document)
+          .Build();
+  DocumentProto intermediate_document2 =
+      DocumentBuilder()
+          .SetKey("icing", "uri")
+          .SetSchema(std::string(kTreeType))
+          .AddStringProperty(std::string(kPropertyName), "intermediate2")
+          .Build();
+  DocumentProto root_document =
+      DocumentBuilder()
+          .SetKey("icing", "uri")
+          .SetSchema(std::string(kTreeType))
+          .AddStringProperty(std::string(kPropertyName), "root")
+          .AddDocumentProperty(std::string(kPropertySubtrees),
+                               intermediate_document1, intermediate_document2)
+          .Build();
+
+  // Handle root_document
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_root_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(root_document)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(tokenized_root_document.document()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PropertyExistenceIndexingHandler> handler,
+      PropertyExistenceIndexingHandler::Create(&fake_clock_, index.get()));
+  EXPECT_THAT(handler->Handle(tokenized_root_document, document_id,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+
+  // Check that the above property paths can be found by query.
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             QueryExistence(index.get(), "name"));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(itr, QueryExistence(index.get(), "subtrees"));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(itr, QueryExistence(index.get(), "subtrees.name"));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(itr,
+                             QueryExistence(index.get(), "subtrees.value"));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, QueryExistence(index.get(), "subtrees.value.timestamp"));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(itr,
+                             QueryExistence(index.get(), "subtrees.subtrees"));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, QueryExistence(index.get(), "subtrees.subtrees.name"));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, QueryExistence(index.get(), "subtrees.subtrees.value"));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, QueryExistence(index.get(), "subtrees.subtrees.value.body"));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, QueryExistence(index.get(), "subtrees.subtrees.value.score"));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+}
+
+TEST_F(PropertyExistenceIndexingHandlerTest, SingleEmptyStringIsNonExisting) {
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/1024 * 8);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Index> index,
+      Index::Create(options, &filesystem_, &icing_filesystem_));
+
+  // Create a document with one empty body.
+  DocumentProto document0 =
+      DocumentBuilder()
+          .SetKey("icing", "uri0")
+          .SetSchema(std::string(kValueType))
+          .AddStringProperty(std::string(kPropertyBody), "")
+          .Build();
+  // Create a document with two empty body.
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("icing", "uri1")
+          .SetSchema(std::string(kValueType))
+          .AddStringProperty(std::string(kPropertyBody), "", "")
+          .Build();
+  // Create a document with one non-empty body.
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("icing", "uri2")
+          .SetSchema(std::string(kValueType))
+          .AddStringProperty(std::string(kPropertyBody), "foo")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document0,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document0)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document1,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document2,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id0,
+      document_store_->Put(tokenized_document0.document()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(tokenized_document1.document()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(tokenized_document2.document()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PropertyExistenceIndexingHandler> handler,
+      PropertyExistenceIndexingHandler::Create(&fake_clock_, index.get()));
+
+  // Handle all docs
+  EXPECT_THAT(handler->Handle(tokenized_document0, document_id0,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+  EXPECT_THAT(handler->Handle(tokenized_document1, document_id1,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+  EXPECT_THAT(handler->Handle(tokenized_document2, document_id2,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+
+  // Check that the documents that have one or two empty bodies will not be
+  // considered as having a body property.
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             QueryExistence(index.get(), kPropertyBody));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id2, std::vector<SectionId>{0})));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/string-section-indexing-handler.cc b/icing/index/string-section-indexing-handler.cc
new file mode 100644
index 0000000..8b20d04
--- /dev/null
+++ b/icing/index/string-section-indexing-handler.cc
@@ -0,0 +1,114 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/string-section-indexing-handler.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/index.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<StringSectionIndexingHandler>>
+StringSectionIndexingHandler::Create(const Normalizer* normalizer,
+                                     Index* index) {
+  ICING_RETURN_ERROR_IF_NULL(normalizer);
+  ICING_RETURN_ERROR_IF_NULL(index);
+
+  return std::unique_ptr<StringSectionIndexingHandler>(
+      new StringSectionIndexingHandler(normalizer, index));
+}
+
+libtextclassifier3::Status StringSectionIndexingHandler::Handle(
+    const TokenizedDocument& tokenized_document, DocumentId document_id,
+    PutDocumentStatsProto* put_document_stats) {
+  uint32_t num_tokens = 0;
+  libtextclassifier3::Status status;
+  for (const TokenizedSection& section :
+       tokenized_document.tokenized_string_sections()) {
+    if (section.metadata.tokenizer ==
+        StringIndexingConfig::TokenizerType::NONE) {
+      ICING_LOG(WARNING)
+          << "Unexpected TokenizerType::NONE found when indexing document.";
+    }
+    // TODO(b/152934343): pass real namespace ids in
+    Index::Editor editor =
+        index_.Edit(document_id, section.metadata.id,
+                    section.metadata.term_match_type, /*namespace_id=*/0);
+    for (std::string_view token : section.token_sequence) {
+      ++num_tokens;
+
+      switch (section.metadata.tokenizer) {
+        case StringIndexingConfig::TokenizerType::VERBATIM:
+          // data() is safe to use here because a token created from the
+          // VERBATIM tokenizer is the entire string value. The character at
+          // data() + token.length() is guaranteed to be a null char.
+          status = editor.BufferTerm(token.data());
+          break;
+        case StringIndexingConfig::TokenizerType::NONE:
+          [[fallthrough]];
+        case StringIndexingConfig::TokenizerType::RFC822:
+          [[fallthrough]];
+        case StringIndexingConfig::TokenizerType::URL:
+          [[fallthrough]];
+        case StringIndexingConfig::TokenizerType::PLAIN:
+          std::string normalized_term = normalizer_.NormalizeTerm(token);
+          status = editor.BufferTerm(normalized_term.c_str());
+      }
+
+      if (!status.ok()) {
+        // We've encountered a failure. Bail out. We'll mark this doc as deleted
+        // and signal a failure to the client.
+        ICING_LOG(WARNING) << "Failed to buffer term in lite lexicon due to: "
+                           << status.error_message();
+        break;
+      }
+    }
+    if (!status.ok()) {
+      break;
+    }
+    // Add all the seen terms to the index with their term frequency.
+    status = editor.IndexAllBufferedTerms();
+    if (!status.ok()) {
+      ICING_LOG(WARNING) << "Failed to add hits in lite index due to: "
+                         << status.error_message();
+      break;
+    }
+  }
+
+  if (put_document_stats != nullptr) {
+    put_document_stats->mutable_tokenization_stats()->set_num_tokens_indexed(
+        num_tokens);
+  }
+
+  return status;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/string-section-indexing-handler.h b/icing/index/string-section-indexing-handler.h
new file mode 100644
index 0000000..8452e9f
--- /dev/null
+++ b/icing/index/string-section-indexing-handler.h
@@ -0,0 +1,77 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_STRING_SECTION_INDEXING_HANDLER_H_
+#define ICING_INDEX_STRING_SECTION_INDEXING_HANDLER_H_
+
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/index.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+// This class is meant to be owned by TermIndexingHandler. Instead of using this
+// handler directly, callers should use TermIndexingHandler to index documents.
+//
+// This handler will not check or set last_added_document_id of the index, and
+// it will not merge or sort the lite index either.
+class StringSectionIndexingHandler {
+ public:
+  // Creates a StringSectionIndexingHandler instance which does not take
+  // ownership of any input components. All pointers must refer to valid objects
+  // that outlive the created StringSectionIndexingHandler instance.
+  //
+  // Returns:
+  //   - A StringSectionIndexingHandler instance on success
+  //   - FAILED_PRECONDITION_ERROR if any of the input pointer is null
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<StringSectionIndexingHandler>>
+  Create(const Normalizer* normalizer, Index* index);
+
+  ~StringSectionIndexingHandler() = default;
+
+  // Handles the string term indexing process: add hits into the lite index for
+  // all contents in tokenized_document.tokenized_string_sections and merge lite
+  // index into main index if necessary.
+  //
+  /// Returns:
+  //   - OK on success
+  //   - RESOURCE_EXHAUSTED_ERROR if the index is full and can't add anymore
+  //     content.
+  //   - INTERNAL_ERROR if any other errors occur.
+  //   - Any main/lite index errors.
+  libtextclassifier3::Status Handle(const TokenizedDocument& tokenized_document,
+                                    DocumentId document_id,
+                                    PutDocumentStatsProto* put_document_stats);
+
+ private:
+  explicit StringSectionIndexingHandler(const Normalizer* normalizer,
+                                        Index* index)
+      : normalizer_(*normalizer), index_(*index) {}
+
+  const Normalizer& normalizer_;  // Does not own.
+  Index& index_;                  // Does not own.
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_STRING_SECTION_INDEXING_HANDLER_H_
diff --git a/icing/index/term-indexing-handler.cc b/icing/index/term-indexing-handler.cc
new file mode 100644
index 0000000..7eb9dda
--- /dev/null
+++ b/icing/index/term-indexing-handler.cc
@@ -0,0 +1,146 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/term-indexing-handler.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/index.h"
+#include "icing/index/property-existence-indexing-handler.h"
+#include "icing/index/string-section-indexing-handler.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<TermIndexingHandler>>
+TermIndexingHandler::Create(const Clock* clock, const Normalizer* normalizer,
+                            Index* index,
+                            bool build_property_existence_metadata_hits) {
+  ICING_RETURN_ERROR_IF_NULL(clock);
+  ICING_RETURN_ERROR_IF_NULL(normalizer);
+  ICING_RETURN_ERROR_IF_NULL(index);
+
+  // Property existence index handler
+  std::unique_ptr<PropertyExistenceIndexingHandler>
+      property_existence_indexing_handler = nullptr;
+  if (build_property_existence_metadata_hits) {
+    ICING_ASSIGN_OR_RETURN(
+        property_existence_indexing_handler,
+        PropertyExistenceIndexingHandler::Create(clock, index));
+  }
+  // String section index handler
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<StringSectionIndexingHandler>
+          string_section_indexing_handler,
+      StringSectionIndexingHandler::Create(normalizer, index));
+
+  return std::unique_ptr<TermIndexingHandler>(new TermIndexingHandler(
+      clock, index, std::move(property_existence_indexing_handler),
+      std::move(string_section_indexing_handler)));
+}
+
+libtextclassifier3::Status TermIndexingHandler::Handle(
+    const TokenizedDocument& tokenized_document, DocumentId document_id,
+    bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
+  std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
+
+  if (index_.last_added_document_id() != kInvalidDocumentId &&
+      document_id <= index_.last_added_document_id()) {
+    if (recovery_mode) {
+      // Skip the document if document_id <= last_added_document_id in recovery
+      // mode without returning an error.
+      return libtextclassifier3::Status::OK;
+    }
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "DocumentId %d must be greater than last added document_id %d",
+        document_id, index_.last_added_document_id()));
+  }
+  index_.set_last_added_document_id(document_id);
+
+  libtextclassifier3::Status status = libtextclassifier3::Status::OK;
+  if (property_existence_indexing_handler_ != nullptr) {
+    status = property_existence_indexing_handler_->Handle(
+        tokenized_document, document_id, put_document_stats);
+  }
+  if (status.ok()) {
+    status = string_section_indexing_handler_->Handle(
+        tokenized_document, document_id, put_document_stats);
+  }
+
+  if (put_document_stats != nullptr) {
+    put_document_stats->set_term_index_latency_ms(
+        index_timer->GetElapsedMilliseconds());
+  }
+
+  // Check if we should merge when we're either successful or we've hit resource
+  // exhausted.
+  bool should_merge =
+      (status.ok() || absl_ports::IsResourceExhausted(status)) &&
+      index_.WantsMerge();
+
+  // Check and sort the LiteIndex HitBuffer if we don't need to merge.
+  if (!should_merge && index_.LiteIndexNeedSort()) {
+    std::unique_ptr<Timer> sort_timer = clock_.GetNewTimer();
+    index_.SortLiteIndex();
+
+    if (put_document_stats != nullptr) {
+      put_document_stats->set_lite_index_sort_latency_ms(
+          sort_timer->GetElapsedMilliseconds());
+    }
+  }
+
+  // Attempt index merge if needed.
+  if (should_merge) {
+    ICING_LOG(INFO) << "Merging the index at docid " << document_id << ".";
+
+    std::unique_ptr<Timer> merge_timer = clock_.GetNewTimer();
+    libtextclassifier3::Status merge_status = index_.Merge();
+
+    if (!merge_status.ok()) {
+      ICING_LOG(ERROR) << "Index merging failed. Clearing index.";
+      if (!index_.Reset().ok()) {
+        return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+            "Unable to reset to clear index after merge failure. Merge "
+            "failure=%d:%s",
+            merge_status.error_code(), merge_status.error_message().c_str()));
+      } else {
+        return absl_ports::DataLossError(IcingStringUtil::StringPrintf(
+            "Forced to reset index after merge failure. Merge failure=%d:%s",
+            merge_status.error_code(), merge_status.error_message().c_str()));
+      }
+    }
+
+    if (put_document_stats != nullptr) {
+      put_document_stats->set_index_merge_latency_ms(
+          merge_timer->GetElapsedMilliseconds());
+    }
+  }
+  return status;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/term-indexing-handler.h b/icing/index/term-indexing-handler.h
new file mode 100644
index 0000000..c055bbf
--- /dev/null
+++ b/icing/index/term-indexing-handler.h
@@ -0,0 +1,97 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_TERM_INDEXING_HANDLER_H_
+#define ICING_INDEX_TERM_INDEXING_HANDLER_H_
+
+#include <memory>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/data-indexing-handler.h"
+#include "icing/index/index.h"
+#include "icing/index/property-existence-indexing-handler.h"
+#include "icing/index/string-section-indexing-handler.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+class TermIndexingHandler : public DataIndexingHandler {
+ public:
+  // Creates a TermIndexingHandler instance which does not take
+  // ownership of any input components. All pointers must refer to valid objects
+  // that outlive the created TermIndexingHandler instance.
+  //
+  // Returns:
+  //   - A TermIndexingHandler instance on success
+  //   - FAILED_PRECONDITION_ERROR if any of the input pointer is null
+  static libtextclassifier3::StatusOr<std::unique_ptr<TermIndexingHandler>>
+  Create(const Clock* clock, const Normalizer* normalizer, Index* index,
+         bool build_property_existence_metadata_hits);
+
+  ~TermIndexingHandler() override = default;
+
+  // Handles term indexing process:
+  // - Checks if document_id > last_added_document_id.
+  // - Updates last_added_document_id to document_id.
+  // - Handles PropertyExistenceIndexingHandler.
+  // - Handles StringSectionIndexingHandler.
+  // - Sorts the lite index if necessary.
+  // - Merges the lite index into the main index if necessary.
+  //
+  /// Returns:
+  //   - OK on success
+  //   - INVALID_ARGUMENT_ERROR if document_id is less than or equal to the
+  //     document_id of a previously indexed document in non recovery mode.
+  //   - RESOURCE_EXHAUSTED_ERROR if the index is full and can't add anymore
+  //     content.
+  //   - DATA_LOSS_ERROR if an attempt to merge the index fails and both indices
+  //     are cleared as a result.
+  //   - INTERNAL_ERROR if any other errors occur.
+  //   - Any main/lite index errors.
+  libtextclassifier3::Status Handle(
+      const TokenizedDocument& tokenized_document, DocumentId document_id,
+      bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
+
+ private:
+  explicit TermIndexingHandler(const Clock* clock, Index* index,
+                               std::unique_ptr<PropertyExistenceIndexingHandler>
+                                   property_existence_indexing_handler,
+                               std::unique_ptr<StringSectionIndexingHandler>
+                                   string_section_indexing_handler)
+      : DataIndexingHandler(clock),
+        index_(*index),
+        property_existence_indexing_handler_(
+            std::move(property_existence_indexing_handler)),
+        string_section_indexing_handler_(
+            std::move(string_section_indexing_handler)) {}
+
+  Index& index_;  // Does not own.
+
+  std::unique_ptr<PropertyExistenceIndexingHandler>
+      property_existence_indexing_handler_;  // Nullable
+  std::unique_ptr<StringSectionIndexingHandler>
+      string_section_indexing_handler_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_TERM_INDEXING_HANDLER_H_
diff --git a/icing/index/term-indexing-handler_test.cc b/icing/index/term-indexing-handler_test.cc
new file mode 100644
index 0000000..1b03865
--- /dev/null
+++ b/icing/index/term-indexing-handler_test.cc
@@ -0,0 +1,664 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/term-indexing-handler.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/property-existence-indexing-handler.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Test;
+
+// Schema type with indexable properties and section Id.
+// Section Id is determined by the lexicographical order of indexable property
+// path.
+// Section id = 0: body
+// Section id = 1: title
+constexpr std::string_view kFakeType = "FakeType";
+constexpr std::string_view kPropertyBody = "body";
+constexpr std::string_view kPropertyTitle = "title";
+
+constexpr SectionId kSectionIdBody = 0;
+constexpr SectionId kSectionIdTitle = 1;
+
+// Schema type with nested indexable properties and section Id.
+// Section id = 0: "name"
+// Section id = 1: "nested.body"
+// Section id = 3: "nested.title"
+// Section id = 4: "subject"
+constexpr std::string_view kNestedType = "NestedType";
+constexpr std::string_view kPropertyName = "name";
+constexpr std::string_view kPropertyNestedDoc = "nested";
+constexpr std::string_view kPropertySubject = "subject";
+
+constexpr SectionId kSectionIdNestedBody = 1;
+
+class TermIndexingHandlerTest : public Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    base_dir_ = GetTestTempDir() + "/icing_test";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    index_dir_ = base_dir_ + "/index";
+    schema_store_dir_ = base_dir_ + "/schema_store";
+    document_store_dir_ = base_dir_ + "/document_store";
+
+    language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        lang_segmenter_,
+        language_segmenter_factory::Create(std::move(segmenter_options)));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        normalizer_,
+        normalizer_factory::Create(
+            /*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
+
+    ASSERT_THAT(
+        filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+        IsTrue());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kFakeType)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyTitle)
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyBody)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kNestedType)
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kPropertyNestedDoc)
+                            .SetDataTypeDocument(
+                                kFakeType, /*index_nested_properties=*/true)
+                            .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertySubject)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyName)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    ASSERT_TRUE(
+        filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult doc_store_create_result,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store_.get(),
+                              /*force_recovery_and_revalidate_documents=*/false,
+                              /*namespace_id_fingerprint=*/false,
+                              /*pre_mapping_fbv=*/false,
+                              /*use_persistent_hash_map=*/false,
+                              PortableFileBackedProtoLog<
+                                  DocumentWrapper>::kDeflateCompressionLevel,
+                              /*initialize_stats=*/nullptr));
+    document_store_ = std::move(doc_store_create_result.document_store);
+  }
+
+  void TearDown() override {
+    document_store_.reset();
+    schema_store_.reset();
+    normalizer_.reset();
+    lang_segmenter_.reset();
+
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  IcingFilesystem icing_filesystem_;
+  FakeClock fake_clock_;
+  std::string base_dir_;
+  std::string index_dir_;
+  std::string schema_store_dir_;
+  std::string document_store_dir_;
+
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+  std::unique_ptr<Normalizer> normalizer_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+};
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+QueryExistence(Index* index, std::string_view property_path) {
+  return index->GetIterator(
+      absl_ports::StrCat(kPropertyExistenceTokenPrefix, property_path),
+      /*term_start_index=*/0,
+      /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+      TermMatchType::EXACT_ONLY,
+      /*need_hit_term_frequency=*/false);
+}
+
+std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
+  std::vector<DocHitInfo> infos;
+  while (iterator->Advance().ok()) {
+    infos.push_back(iterator->doc_hit_info());
+  }
+  return infos;
+}
+
+std::vector<DocHitInfoTermFrequencyPair> GetHitsWithTermFrequency(
+    std::unique_ptr<DocHitInfoIterator> iterator) {
+  std::vector<DocHitInfoTermFrequencyPair> infos;
+  while (iterator->Advance().ok()) {
+    std::vector<TermMatchInfo> matched_terms_stats;
+    iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+    for (const TermMatchInfo& term_match_info : matched_terms_stats) {
+      infos.push_back(DocHitInfoTermFrequencyPair(
+          iterator->doc_hit_info(), term_match_info.term_frequencies));
+    }
+  }
+  return infos;
+}
+
+TEST_F(TermIndexingHandlerTest, HandleBothStringSectionAndPropertyExistence) {
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/1024 * 8);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Index> index,
+      Index::Create(options, &filesystem_, &icing_filesystem_));
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyTitle), "foo")
+          .AddStringProperty(std::string(kPropertyBody), "")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(tokenized_document.document()));
+
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<TermIndexingHandler> handler,
+      TermIndexingHandler::Create(
+          &fake_clock_, normalizer_.get(), index.get(),
+          /*build_property_existence_metadata_hits=*/true));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+
+  EXPECT_THAT(index->last_added_document_id(), Eq(document_id));
+
+  // Query 'foo'
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index->GetIterator("foo", /*term_start_index=*/0,
+                         /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                         TermMatchType::EXACT_ONLY));
+  std::vector<DocHitInfoTermFrequencyPair> hits =
+      GetHitsWithTermFrequency(std::move(itr));
+  std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
+      {kSectionIdTitle, 1}};
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        document_id, expected_map)));
+
+  // Query for "title" property existence.
+  ICING_ASSERT_OK_AND_ASSIGN(itr, QueryExistence(index.get(), kPropertyTitle));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(document_id, std::vector<SectionId>{0})));
+
+  // Query for "body" property existence.
+  ICING_ASSERT_OK_AND_ASSIGN(itr, QueryExistence(index.get(), kPropertyBody));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(TermIndexingHandlerTest,
+       HandleIntoLiteIndex_sortInIndexingNotTriggered) {
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/1024 * 8);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Index> index,
+      Index::Create(options, &filesystem_, &icing_filesystem_));
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyTitle), "foo")
+          .AddStringProperty(std::string(kPropertyBody), "foo bar baz")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(tokenized_document.document()));
+
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<TermIndexingHandler> handler,
+      TermIndexingHandler::Create(
+          &fake_clock_, normalizer_.get(), index.get(),
+          /*build_property_existence_metadata_hits=*/true));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+
+  EXPECT_THAT(index->last_added_document_id(), Eq(document_id));
+
+  // Query 'foo'
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index->GetIterator("foo", /*term_start_index=*/0,
+                         /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                         TermMatchType::EXACT_ONLY));
+  std::vector<DocHitInfoTermFrequencyPair> hits =
+      GetHitsWithTermFrequency(std::move(itr));
+  std::unordered_map<SectionId, Hit::TermFrequency> expected_map{
+      {kSectionIdTitle, 1}, {kSectionIdBody, 1}};
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        document_id, expected_map)));
+
+  // Query 'foo' with sectionId mask that masks all results
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index->GetIterator("foo", /*term_start_index=*/0,
+                              /*unnormalized_term_length=*/0, 1U << 2,
+                              TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(TermIndexingHandlerTest, HandleIntoLiteIndex_sortInIndexingTriggered) {
+  // Create the LiteIndex with a smaller sort threshold. At 64 bytes we sort the
+  // HitBuffer after inserting 8 hits
+  Index::Options options(index_dir_,
+                         /*index_merge_size=*/1024 * 1024,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/64);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Index> index,
+      Index::Create(options, &filesystem_, &icing_filesystem_));
+
+  DocumentProto document0 =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/0")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyTitle), "foo foo foo")
+          .AddStringProperty(std::string(kPropertyBody), "foo bar baz")
+          .Build();
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyTitle), "bar baz baz")
+          .AddStringProperty(std::string(kPropertyBody), "foo foo baz")
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("icing", "nested_type/0")
+          .SetSchema(std::string(kNestedType))
+          .AddDocumentProperty(std::string(kPropertyNestedDoc), document1)
+          .AddStringProperty(std::string(kPropertyName), "qux")
+          .AddStringProperty(std::string(kPropertySubject), "bar bar")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document0,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document0)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id0,
+      document_store_->Put(tokenized_document0.document()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document1,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(tokenized_document1.document()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document2,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(tokenized_document2.document()));
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<TermIndexingHandler> handler,
+      TermIndexingHandler::Create(
+          &fake_clock_, normalizer_.get(), index.get(),
+          /*build_property_existence_metadata_hits=*/true));
+
+  // Handle doc0 and doc1. The LiteIndex should sort and merge after adding
+  // these
+  EXPECT_THAT(handler->Handle(tokenized_document0, document_id0,
+                              /*recovery_mode=*/false,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+  EXPECT_THAT(handler->Handle(tokenized_document1, document_id1,
+                              /*recovery_mode=*/false,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+  EXPECT_THAT(index->last_added_document_id(), Eq(document_id1));
+  EXPECT_THAT(index->LiteIndexNeedSort(), IsFalse());
+
+  // Handle doc2. The LiteIndex should have an unsorted portion after adding
+  EXPECT_THAT(handler->Handle(tokenized_document2, document_id2,
+                              /*recovery_mode=*/false,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+  EXPECT_THAT(index->last_added_document_id(), Eq(document_id2));
+
+  // Hits in the hit buffer:
+  // <term>: {(docId, sectionId, term_freq)...}
+  // foo: {(0, kSectionIdTitle, 3); (0, kSectionIdBody, 1);
+  //       (1, kSectionIdBody, 2);
+  //       (2, kSectionIdNestedBody, 2)}
+  // bar: {(0, kSectionIdBody, 1);
+  //       (1, kSectionIdTitle, 1);
+  //       (2, kSectionIdNestedTitle, 1); (2, kSectionIdSubject, 2)}
+  // baz: {(0, kSectionIdBody, 1);
+  //       (1, kSectionIdTitle, 2); (1, kSectionIdBody, 1),
+  //       (2, kSectionIdNestedTitle, 2); (2, kSectionIdNestedBody, 1)}
+  // qux: {(2, kSectionIdName, 1)}
+
+  // Query 'foo'
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index->GetIterator("foo", /*term_start_index=*/0,
+                         /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                         TermMatchType::EXACT_ONLY));
+
+  // Advance the iterator and verify that we're returning hits in the correct
+  // order (i.e. in descending order of DocId)
+  ASSERT_THAT(itr->Advance(), IsOk());
+  EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(2));
+  EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+              Eq(1U << kSectionIdNestedBody));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map2 = {{kSectionIdNestedBody, 2}};
+  itr->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "foo", expected_section_ids_tf_map2)));
+
+  ASSERT_THAT(itr->Advance(), IsOk());
+  EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(1));
+  EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+              Eq(1U << kSectionIdBody));
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map1 = {{kSectionIdBody, 2}};
+  matched_terms_stats.clear();
+  itr->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "foo", expected_section_ids_tf_map1)));
+
+  ASSERT_THAT(itr->Advance(), IsOk());
+  EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(0));
+  EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+              Eq(1U << kSectionIdTitle | 1U << kSectionIdBody));
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map0 = {{kSectionIdTitle, 3},
+                                      {kSectionIdBody, 1}};
+  matched_terms_stats.clear();
+  itr->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "foo", expected_section_ids_tf_map0)));
+}
+
+TEST_F(TermIndexingHandlerTest, HandleIntoLiteIndex_enableSortInIndexing) {
+  // Create the LiteIndex with a smaller sort threshold. At 64 bytes we sort the
+  // HitBuffer after inserting 8 hits
+  Index::Options options(index_dir_,
+                         /*index_merge_size=*/1024 * 1024,
+                         /*lite_index_sort_at_indexing=*/false,
+                         /*lite_index_sort_size=*/64);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Index> index,
+      Index::Create(options, &filesystem_, &icing_filesystem_));
+
+  DocumentProto document0 =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/0")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyTitle), "foo foo foo")
+          .AddStringProperty(std::string(kPropertyBody), "foo bar baz")
+          .Build();
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyTitle), "bar baz baz")
+          .AddStringProperty(std::string(kPropertyBody), "foo foo baz")
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("icing", "nested_type/0")
+          .SetSchema(std::string(kNestedType))
+          .AddDocumentProperty(std::string(kPropertyNestedDoc), document1)
+          .AddStringProperty(std::string(kPropertyName), "qux")
+          .AddStringProperty(std::string(kPropertySubject), "bar bar")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document0,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document0)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id0,
+      document_store_->Put(tokenized_document0.document()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document1,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(tokenized_document1.document()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document2,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(tokenized_document2.document()));
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<TermIndexingHandler> handler,
+      TermIndexingHandler::Create(
+          &fake_clock_, normalizer_.get(), index.get(),
+          /*build_property_existence_metadata_hits=*/true));
+
+  // Handle all docs
+  EXPECT_THAT(handler->Handle(tokenized_document0, document_id0,
+                              /*recovery_mode=*/false,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+  EXPECT_THAT(handler->Handle(tokenized_document1, document_id1,
+                              /*recovery_mode=*/false,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+  EXPECT_THAT(handler->Handle(tokenized_document2, document_id2,
+                              /*recovery_mode=*/false,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+  EXPECT_THAT(index->last_added_document_id(), Eq(document_id2));
+
+  // We've disabled sorting during indexing so the HitBuffer's unsorted section
+  // should exceed the sort threshold. PersistToDisk and reinitialize the
+  // LiteIndex with sort_at_indexing=true.
+  ASSERT_THAT(index->PersistToDisk(), IsOk());
+  options = Index::Options(index_dir_,
+                           /*index_merge_size=*/1024 * 1024,
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/64);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+  // Verify that the HitBuffer has been sorted after initializing with
+  // sort_at_indexing enabled.
+  EXPECT_THAT(index->LiteIndexNeedSort(), IsFalse());
+
+  // Hits in the hit buffer:
+  // <term>: {(docId, sectionId, term_freq)...}
+  // foo: {(0, kSectionIdTitle, 3); (0, kSectionIdBody, 1);
+  //       (1, kSectionIdBody, 2);
+  //       (2, kSectionIdNestedBody, 2)}
+  // bar: {(0, kSectionIdBody, 1);
+  //       (1, kSectionIdTitle, 1);
+  //       (2, kSectionIdNestedTitle, 1); (2, kSectionIdSubject, 2)}
+  // baz: {(0, kSectionIdBody, 1);
+  //       (1, kSectionIdTitle, 2); (1, kSectionIdBody, 1),
+  //       (2, kSectionIdNestedTitle, 2); (2, kSectionIdNestedBody, 1)}
+  // qux: {(2, kSectionIdName, 1)}
+
+  // Query 'foo'
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index->GetIterator("foo", /*term_start_index=*/0,
+                         /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+                         TermMatchType::EXACT_ONLY));
+
+  // Advance the iterator and verify that we're returning hits in the correct
+  // order (i.e. in descending order of DocId)
+  ASSERT_THAT(itr->Advance(), IsOk());
+  EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(2));
+  EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+              Eq(1U << kSectionIdNestedBody));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map2 = {{kSectionIdNestedBody, 2}};
+  itr->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "foo", expected_section_ids_tf_map2)));
+
+  ASSERT_THAT(itr->Advance(), IsOk());
+  EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(1));
+  EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+              Eq(1U << kSectionIdBody));
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map1 = {{kSectionIdBody, 2}};
+  matched_terms_stats.clear();
+  itr->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "foo", expected_section_ids_tf_map1)));
+
+  ASSERT_THAT(itr->Advance(), IsOk());
+  EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(0));
+  EXPECT_THAT(itr->doc_hit_info().hit_section_ids_mask(),
+              Eq(1U << kSectionIdTitle | 1U << kSectionIdBody));
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map0 = {{kSectionIdTitle, 3},
+                                      {kSectionIdBody, 1}};
+  matched_terms_stats.clear();
+  itr->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "foo", expected_section_ids_tf_map0)));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/term-metadata.h b/icing/index/term-metadata.h
index c1c1564..09c59ae 100644
--- a/icing/index/term-metadata.h
+++ b/icing/index/term-metadata.h
@@ -22,14 +22,17 @@ namespace lib {
 
 // A POD struct storing metadata of a term.
 struct TermMetadata {
-  TermMetadata(std::string content_in, int hit_count_in)
-      : content(std::move(content_in)), hit_count(hit_count_in) {}
+  TermMetadata(std::string content_in, int score_in)
+      : content(std::move(content_in)), score(score_in) {}
 
   // Content of the term.
   std::string content;
 
-  // Number of document hits associated with the term.
-  int hit_count;
+  // The score of the term.
+  // It will either be:
+  //- HIT_COUNT - number of document+section hits associated with the term
+  //- TERM_FREQUENCY - the number of times that the term appears in documents
+  int score;
 };
 
 }  // namespace lib
diff --git a/icing/jni.lds b/icing/jni.lds
new file mode 100644
index 0000000..64fae36
--- /dev/null
+++ b/icing/jni.lds
@@ -0,0 +1,9 @@
+VERS_1.0 {
+  # Export JNI symbols.
+  global:
+    JNI_OnLoad;
+
+  # Hide everything else
+  local:
+    *;
+};
diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc
index b1b5420..a0883fa 100644
--- a/icing/jni/icing-search-engine-jni.cc
+++ b/icing/jni/icing-search-engine-jni.cc
@@ -15,11 +15,12 @@
 #include <jni.h>
 
 #include <string>
+#include <utility>
 
-#include "icing/jni/jni-cache.h"
-#include <google/protobuf/message_lite.h>
-#include "icing/absl_ports/status_imports.h"
 #include "icing/icing-search-engine.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/jni/scoped-primitive-array-critical.h"
+#include "icing/jni/scoped-utf-chars.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/initialize.pb.h"
 #include "icing/proto/optimize.pb.h"
@@ -27,38 +28,43 @@
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
 #include "icing/proto/search.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
+#include <google/protobuf/message_lite.h>
 
 namespace {
+
 bool ParseProtoFromJniByteArray(JNIEnv* env, jbyteArray bytes,
                                 google::protobuf::MessageLite* protobuf) {
-  int bytes_size = env->GetArrayLength(bytes);
-  uint8_t* bytes_ptr = static_cast<uint8_t*>(
-      env->GetPrimitiveArrayCritical(bytes, /*isCopy=*/nullptr));
-  bool parsed = protobuf->ParseFromArray(bytes_ptr, bytes_size);
-  env->ReleasePrimitiveArrayCritical(bytes, bytes_ptr, /*mode=*/0);
-
-  return parsed;
+  icing::lib::ScopedPrimitiveArrayCritical<uint8_t> scoped_array(env, bytes);
+  return protobuf->ParseFromArray(scoped_array.data(), scoped_array.size());
 }
 
-jbyteArray SerializeProtoToJniByteArray(
-    JNIEnv* env, const google::protobuf::MessageLite& protobuf) {
+jbyteArray SerializeProtoToJniByteArray(JNIEnv* env,
+                                        const google::protobuf::MessageLite& protobuf) {
   int size = protobuf.ByteSizeLong();
   jbyteArray ret = env->NewByteArray(size);
   if (ret == nullptr) {
-    ICING_LOG(ERROR) << "Failed to allocated bytes for jni protobuf";
+    ICING_LOG(icing::lib::ERROR)
+        << "Failed to allocated bytes for jni protobuf";
     return nullptr;
   }
 
-  uint8_t* ret_buf = static_cast<uint8_t*>(
-      env->GetPrimitiveArrayCritical(ret, /*isCopy=*/nullptr));
-  protobuf.SerializeWithCachedSizesToArray(ret_buf);
-  env->ReleasePrimitiveArrayCritical(ret, ret_buf, 0);
+  icing::lib::ScopedPrimitiveArrayCritical<uint8_t> scoped_array(env, ret);
+  protobuf.SerializeWithCachedSizesToArray(scoped_array.data());
   return ret;
 }
 
-icing::lib::IcingSearchEngine* GetIcingSearchEnginePointer(
-    jlong native_pointer) {
+struct {
+  jfieldID native_pointer;
+} JavaIcingSearchEngineImpl;
+
+icing::lib::IcingSearchEngine* GetIcingSearchEnginePointer(JNIEnv* env,
+                                                           jobject object) {
+  jlong native_pointer =
+      env->GetLongField(object, JavaIcingSearchEngineImpl.native_pointer);
   return reinterpret_cast<icing::lib::IcingSearchEngine*>(native_pointer);
 }
 
@@ -66,23 +72,12 @@ icing::lib::IcingSearchEngine* GetIcingSearchEnginePointer(
 
 extern "C" {
 
-jint JNI_OnLoad(JavaVM* vm, void* reserved) {
-  JNIEnv* env;
-  if (vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION_1_6) != JNI_OK) {
-    ICING_LOG(ERROR) << "ERROR: GetEnv failed";
-    return JNI_ERR;
-  }
-
-  return JNI_VERSION_1_6;
-}
-
-JNIEXPORT jlong JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeCreate(
-    JNIEnv* env, jclass clazz, jbyteArray icing_search_engine_options_bytes) {
+jlong nativeCreate(JNIEnv* env, jclass clazz,
+                   jbyteArray icing_search_engine_options_bytes) {
   icing::lib::IcingSearchEngineOptions options;
   if (!ParseProtoFromJniByteArray(env, icing_search_engine_options_bytes,
                                   &options)) {
-    ICING_LOG(ERROR)
+    ICING_LOG(icing::lib::ERROR)
         << "Failed to parse IcingSearchEngineOptions in nativeCreate";
     return 0;
   }
@@ -96,11 +91,15 @@ Java_com_google_android_icing_IcingSearchEngine_nativeCreate(
   return reinterpret_cast<jlong>(icing);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeInitialize(
-    JNIEnv* env, jclass clazz, jlong native_pointer) {
+void nativeDestroy(JNIEnv* env, jclass clazz, jobject object) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
+  delete icing;
+}
+
+jbyteArray nativeInitialize(JNIEnv* env, jclass clazz, jobject object) {
+  icing::lib::IcingSearchEngine* icing =
+      GetIcingSearchEnginePointer(env, object);
 
   icing::lib::InitializeResultProto initialize_result_proto =
       icing->Initialize();
@@ -108,16 +107,16 @@ Java_com_google_android_icing_IcingSearchEngine_nativeInitialize(
   return SerializeProtoToJniByteArray(env, initialize_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeSetSchema(
-    JNIEnv* env, jclass clazz, jlong native_pointer, jbyteArray schema_bytes,
-    jboolean ignore_errors_and_delete_documents) {
+jbyteArray nativeSetSchema(JNIEnv* env, jclass clazz, jobject object,
+                           jbyteArray schema_bytes,
+                           jboolean ignore_errors_and_delete_documents) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
 
   icing::lib::SchemaProto schema_proto;
   if (!ParseProtoFromJniByteArray(env, schema_bytes, &schema_proto)) {
-    ICING_LOG(ERROR) << "Failed to parse SchemaProto in nativeSetSchema";
+    ICING_LOG(icing::lib::ERROR)
+        << "Failed to parse SchemaProto in nativeSetSchema";
     return nullptr;
   }
 
@@ -127,41 +126,36 @@ Java_com_google_android_icing_IcingSearchEngine_nativeSetSchema(
   return SerializeProtoToJniByteArray(env, set_schema_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetSchema(
-    JNIEnv* env, jclass clazz, jlong native_pointer) {
+jbyteArray nativeGetSchema(JNIEnv* env, jclass clazz, jobject object) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
 
   icing::lib::GetSchemaResultProto get_schema_result_proto = icing->GetSchema();
 
   return SerializeProtoToJniByteArray(env, get_schema_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetSchemaType(
-    JNIEnv* env, jclass clazz, jlong native_pointer, jstring schema_type) {
+jbyteArray nativeGetSchemaType(JNIEnv* env, jclass clazz, jobject object,
+                               jstring schema_type) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
 
-  const char* native_schema_type =
-      env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr);
+  icing::lib::ScopedUtfChars scoped_schema_type_chars(env, schema_type);
   icing::lib::GetSchemaTypeResultProto get_schema_type_result_proto =
-      icing->GetSchemaType(native_schema_type);
+      icing->GetSchemaType(scoped_schema_type_chars.c_str());
 
   return SerializeProtoToJniByteArray(env, get_schema_type_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativePut(
-    JNIEnv* env, jclass clazz, jlong native_pointer,
-    jbyteArray document_bytes) {
+jbyteArray nativePut(JNIEnv* env, jclass clazz, jobject object,
+                     jbyteArray document_bytes) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
 
   icing::lib::DocumentProto document_proto;
   if (!ParseProtoFromJniByteArray(env, document_bytes, &document_proto)) {
-    ICING_LOG(ERROR) << "Failed to parse DocumentProto in nativePut";
+    ICING_LOG(icing::lib::ERROR)
+        << "Failed to parse DocumentProto in nativePut";
     return nullptr;
   }
 
@@ -171,127 +165,220 @@ Java_com_google_android_icing_IcingSearchEngine_nativePut(
   return SerializeProtoToJniByteArray(env, put_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGet(
-    JNIEnv* env, jclass clazz, jlong native_pointer, jstring name_space,
-    jstring uri) {
+jbyteArray nativeGet(JNIEnv* env, jclass clazz, jobject object,
+                     jstring name_space, jstring uri,
+                     jbyteArray result_spec_bytes) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
 
-  const char* native_name_space =
-      env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
-  const char* native_uri = env->GetStringUTFChars(uri, /*isCopy=*/nullptr);
+  icing::lib::GetResultSpecProto get_result_spec;
+  if (!ParseProtoFromJniByteArray(env, result_spec_bytes, &get_result_spec)) {
+    ICING_LOG(icing::lib::ERROR)
+        << "Failed to parse GetResultSpecProto in nativeGet";
+    return nullptr;
+  }
+  icing::lib::ScopedUtfChars scoped_name_space_chars(env, name_space);
+  icing::lib::ScopedUtfChars scoped_uri_chars(env, uri);
   icing::lib::GetResultProto get_result_proto =
-      icing->Get(native_name_space, native_uri);
+      icing->Get(scoped_name_space_chars.c_str(), scoped_uri_chars.c_str(),
+                 get_result_spec);
 
   return SerializeProtoToJniByteArray(env, get_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeSearch(
-    JNIEnv* env, jclass clazz, jlong native_pointer,
-    jbyteArray search_spec_bytes, jbyteArray scoring_spec_bytes,
-    jbyteArray result_spec_bytes) {
+jbyteArray nativeReportUsage(JNIEnv* env, jclass clazz, jobject object,
+                             jbyteArray usage_report_bytes) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
+
+  icing::lib::UsageReport usage_report;
+  if (!ParseProtoFromJniByteArray(env, usage_report_bytes, &usage_report)) {
+    ICING_LOG(icing::lib::ERROR)
+        << "Failed to parse UsageReport in nativeReportUsage";
+    return nullptr;
+  }
+
+  icing::lib::ReportUsageResultProto report_usage_result_proto =
+      icing->ReportUsage(usage_report);
+
+  return SerializeProtoToJniByteArray(env, report_usage_result_proto);
+}
+
+jbyteArray nativeGetAllNamespaces(JNIEnv* env, jclass clazz, jobject object) {
+  icing::lib::IcingSearchEngine* icing =
+      GetIcingSearchEnginePointer(env, object);
+
+  icing::lib::GetAllNamespacesResultProto get_all_namespaces_result_proto =
+      icing->GetAllNamespaces();
+
+  return SerializeProtoToJniByteArray(env, get_all_namespaces_result_proto);
+}
+
+jbyteArray nativeGetNextPage(JNIEnv* env, jclass clazz, jobject object,
+                             jlong next_page_token,
+                             jlong java_to_native_start_timestamp_ms) {
+  icing::lib::IcingSearchEngine* icing =
+      GetIcingSearchEnginePointer(env, object);
+
+  const std::unique_ptr<const icing::lib::Clock> clock =
+      std::make_unique<icing::lib::Clock>();
+  int32_t java_to_native_jni_latency_ms =
+      clock->GetSystemTimeMilliseconds() - java_to_native_start_timestamp_ms;
+
+  icing::lib::SearchResultProto next_page_result_proto =
+      icing->GetNextPage(next_page_token);
+
+  icing::lib::QueryStatsProto* query_stats =
+      next_page_result_proto.mutable_query_stats();
+  query_stats->set_java_to_native_jni_latency_ms(java_to_native_jni_latency_ms);
+  query_stats->set_native_to_java_start_timestamp_ms(
+      clock->GetSystemTimeMilliseconds());
+
+  return SerializeProtoToJniByteArray(env, next_page_result_proto);
+}
+
+void nativeInvalidateNextPageToken(JNIEnv* env, jclass clazz, jobject object,
+                                   jlong next_page_token) {
+  icing::lib::IcingSearchEngine* icing =
+      GetIcingSearchEnginePointer(env, object);
+
+  icing->InvalidateNextPageToken(next_page_token);
+
+  return;
+}
+
+jbyteArray nativeSearch(JNIEnv* env, jclass clazz, jobject object,
+                        jbyteArray search_spec_bytes,
+                        jbyteArray scoring_spec_bytes,
+                        jbyteArray result_spec_bytes,
+                        jlong java_to_native_start_timestamp_ms) {
+  icing::lib::IcingSearchEngine* icing =
+      GetIcingSearchEnginePointer(env, object);
 
   icing::lib::SearchSpecProto search_spec_proto;
   if (!ParseProtoFromJniByteArray(env, search_spec_bytes, &search_spec_proto)) {
-    ICING_LOG(ERROR) << "Failed to parse SearchSpecProto in nativeSearch";
+    ICING_LOG(icing::lib::ERROR)
+        << "Failed to parse SearchSpecProto in nativeSearch";
     return nullptr;
   }
 
   icing::lib::ScoringSpecProto scoring_spec_proto;
   if (!ParseProtoFromJniByteArray(env, scoring_spec_bytes,
                                   &scoring_spec_proto)) {
-    ICING_LOG(ERROR) << "Failed to parse ScoringSpecProto in nativeSearch";
+    ICING_LOG(icing::lib::ERROR)
+        << "Failed to parse ScoringSpecProto in nativeSearch";
     return nullptr;
   }
 
   icing::lib::ResultSpecProto result_spec_proto;
   if (!ParseProtoFromJniByteArray(env, result_spec_bytes, &result_spec_proto)) {
-    ICING_LOG(ERROR) << "Failed to parse ResultSpecProto in nativeSearch";
+    ICING_LOG(icing::lib::ERROR)
+        << "Failed to parse ResultSpecProto in nativeSearch";
     return nullptr;
   }
 
+  const std::unique_ptr<const icing::lib::Clock> clock =
+      std::make_unique<icing::lib::Clock>();
+  int32_t java_to_native_jni_latency_ms =
+      clock->GetSystemTimeMilliseconds() - java_to_native_start_timestamp_ms;
+
   icing::lib::SearchResultProto search_result_proto =
       icing->Search(search_spec_proto, scoring_spec_proto, result_spec_proto);
 
+  icing::lib::QueryStatsProto* query_stats =
+      search_result_proto.mutable_query_stats();
+  query_stats->set_java_to_native_jni_latency_ms(java_to_native_jni_latency_ms);
+  query_stats->set_native_to_java_start_timestamp_ms(
+      clock->GetSystemTimeMilliseconds());
+
   return SerializeProtoToJniByteArray(env, search_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeDelete(
-    JNIEnv* env, jclass clazz, jlong native_pointer, jstring name_space,
-    jstring uri) {
+jbyteArray nativeDelete(JNIEnv* env, jclass clazz, jobject object,
+                        jstring name_space, jstring uri) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
 
-  const char* native_name_space =
-      env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
-  const char* native_uri = env->GetStringUTFChars(uri, /*isCopy=*/nullptr);
+  icing::lib::ScopedUtfChars scoped_name_space_chars(env, name_space);
+  icing::lib::ScopedUtfChars scoped_uri_chars(env, uri);
   icing::lib::DeleteResultProto delete_result_proto =
-      icing->Delete(native_name_space, native_uri);
+      icing->Delete(scoped_name_space_chars.c_str(), scoped_uri_chars.c_str());
 
   return SerializeProtoToJniByteArray(env, delete_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByNamespace(
-    JNIEnv* env, jclass clazz, jlong native_pointer, jstring name_space) {
+jbyteArray nativeDeleteByNamespace(JNIEnv* env, jclass clazz, jobject object,
+                                   jstring name_space) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
 
-  const char* native_name_space =
-      env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
+  icing::lib::ScopedUtfChars scoped_name_space_chars(env, name_space);
   icing::lib::DeleteByNamespaceResultProto delete_by_namespace_result_proto =
-      icing->DeleteByNamespace(native_name_space);
+      icing->DeleteByNamespace(scoped_name_space_chars.c_str());
 
   return SerializeProtoToJniByteArray(env, delete_by_namespace_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeDeleteBySchemaType(
-    JNIEnv* env, jclass clazz, jlong native_pointer, jstring schema_type) {
+jbyteArray nativeDeleteBySchemaType(JNIEnv* env, jclass clazz, jobject object,
+                                    jstring schema_type) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
 
-  const char* native_schema_type =
-      env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr);
+  icing::lib::ScopedUtfChars scoped_schema_type_chars(env, schema_type);
   icing::lib::DeleteBySchemaTypeResultProto delete_by_schema_type_result_proto =
-      icing->DeleteBySchemaType(native_schema_type);
+      icing->DeleteBySchemaType(scoped_schema_type_chars.c_str());
 
   return SerializeProtoToJniByteArray(env, delete_by_schema_type_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativePersistToDisk(
-    JNIEnv* env, jclass clazz, jlong native_pointer) {
+jbyteArray nativeDeleteByQuery(JNIEnv* env, jclass clazz, jobject object,
+                               jbyteArray search_spec_bytes,
+                               jboolean return_deleted_document_info) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
 
+  icing::lib::SearchSpecProto search_spec_proto;
+  if (!ParseProtoFromJniByteArray(env, search_spec_bytes, &search_spec_proto)) {
+    ICING_LOG(icing::lib::ERROR)
+        << "Failed to parse SearchSpecProto in nativeSearch";
+    return nullptr;
+  }
+  icing::lib::DeleteByQueryResultProto delete_result_proto =
+      icing->DeleteByQuery(search_spec_proto, return_deleted_document_info);
+
+  return SerializeProtoToJniByteArray(env, delete_result_proto);
+}
+
+jbyteArray nativePersistToDisk(JNIEnv* env, jclass clazz, jobject object,
+                               jint persist_type_code) {
+  icing::lib::IcingSearchEngine* icing =
+      GetIcingSearchEnginePointer(env, object);
+
+  if (!icing::lib::PersistType::Code_IsValid(persist_type_code)) {
+    ICING_LOG(icing::lib::ERROR)
+        << persist_type_code << " is an invalid value for PersistType::Code";
+    return nullptr;
+  }
+  icing::lib::PersistType::Code persist_type_code_enum =
+      static_cast<icing::lib::PersistType::Code>(persist_type_code);
   icing::lib::PersistToDiskResultProto persist_to_disk_result_proto =
-      icing->PersistToDisk();
+      icing->PersistToDisk(persist_type_code_enum);
 
   return SerializeProtoToJniByteArray(env, persist_to_disk_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeOptimize(
-    JNIEnv* env, jclass clazz, jlong native_pointer) {
+jbyteArray nativeOptimize(JNIEnv* env, jclass clazz, jobject object) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
 
   icing::lib::OptimizeResultProto optimize_result_proto = icing->Optimize();
 
   return SerializeProtoToJniByteArray(env, optimize_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetOptimizeInfo(
-    JNIEnv* env, jclass clazz, jlong native_pointer) {
+jbyteArray nativeGetOptimizeInfo(JNIEnv* env, jclass clazz, jobject object) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
 
   icing::lib::GetOptimizeInfoResultProto get_optimize_info_result_proto =
       icing->GetOptimizeInfo();
@@ -299,15 +386,188 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetOptimizeInfo(
   return SerializeProtoToJniByteArray(env, get_optimize_info_result_proto);
 }
 
-JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeReset(
-    JNIEnv* env, jclass clazz, jlong native_pointer) {
+jbyteArray nativeGetStorageInfo(JNIEnv* env, jclass clazz, jobject object) {
   icing::lib::IcingSearchEngine* icing =
-      GetIcingSearchEnginePointer(native_pointer);
+      GetIcingSearchEnginePointer(env, object);
+
+  icing::lib::StorageInfoResultProto storage_info_result_proto =
+      icing->GetStorageInfo();
+
+  return SerializeProtoToJniByteArray(env, storage_info_result_proto);
+}
+
+jbyteArray nativeReset(JNIEnv* env, jclass clazz, jobject object) {
+  icing::lib::IcingSearchEngine* icing =
+      GetIcingSearchEnginePointer(env, object);
 
   icing::lib::ResetResultProto reset_result_proto = icing->Reset();
 
   return SerializeProtoToJniByteArray(env, reset_result_proto);
 }
 
+jbyteArray nativeSearchSuggestions(JNIEnv* env, jclass clazz, jobject object,
+                                   jbyteArray suggestion_spec_bytes) {
+  icing::lib::IcingSearchEngine* icing =
+      GetIcingSearchEnginePointer(env, object);
+
+  icing::lib::SuggestionSpecProto suggestion_spec_proto;
+  if (!ParseProtoFromJniByteArray(env, suggestion_spec_bytes,
+                                  &suggestion_spec_proto)) {
+    ICING_LOG(icing::lib::ERROR)
+        << "Failed to parse SuggestionSpecProto in nativeSearch";
+    return nullptr;
+  }
+  icing::lib::SuggestionResponse suggestionResponse =
+      icing->SearchSuggestions(suggestion_spec_proto);
+
+  return SerializeProtoToJniByteArray(env, suggestionResponse);
+}
+
+jbyteArray nativeGetDebugInfo(JNIEnv* env, jclass clazz, jobject object,
+                              jint verbosity) {
+  icing::lib::IcingSearchEngine* icing =
+      GetIcingSearchEnginePointer(env, object);
+
+  if (!icing::lib::DebugInfoVerbosity::Code_IsValid(verbosity)) {
+    ICING_LOG(icing::lib::ERROR)
+        << "Invalid value for Debug Info verbosity: " << verbosity;
+    return nullptr;
+  }
+
+  icing::lib::DebugInfoResultProto debug_info_result_proto =
+      icing->GetDebugInfo(
+          static_cast<icing::lib::DebugInfoVerbosity::Code>(verbosity));
+
+  return SerializeProtoToJniByteArray(env, debug_info_result_proto);
+}
+
+jboolean nativeShouldLog(JNIEnv* env, jclass clazz, jshort severity,
+                         jshort verbosity) {
+  if (!icing::lib::LogSeverity::Code_IsValid(severity)) {
+    ICING_LOG(icing::lib::ERROR)
+        << "Invalid value for logging severity: " << severity;
+    return false;
+  }
+  return icing::lib::ShouldLog(
+      static_cast<icing::lib::LogSeverity::Code>(severity), verbosity);
+}
+
+jboolean nativeSetLoggingLevel(JNIEnv* env, jclass clazz, jshort severity,
+                               jshort verbosity) {
+  if (!icing::lib::LogSeverity::Code_IsValid(severity)) {
+    ICING_LOG(icing::lib::ERROR)
+        << "Invalid value for logging severity: " << severity;
+    return false;
+  }
+  return icing::lib::SetLoggingLevel(
+      static_cast<icing::lib::LogSeverity::Code>(severity), verbosity);
+}
+
+jstring nativeGetLoggingTag(JNIEnv* env, jclass clazz) {
+  return env->NewStringUTF(icing::lib::kIcingLoggingTag);
+}
+
+#pragma clang diagnostic ignored "-Wwrite-strings"
+jint JNI_OnLoad(JavaVM* vm, void* reserved) {
+  JNIEnv* env;
+  if (vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION_1_6) != JNI_OK) {
+    ICING_LOG(icing::lib::ERROR) << "ERROR: GetEnv failed";
+    return JNI_ERR;
+  }
+
+  // Find your class. JNI_OnLoad is called from the correct class loader context
+  // for this to work.
+  jclass java_class =
+      env->FindClass("com/google/android/icing/IcingSearchEngineImpl");
+  if (java_class == nullptr) {
+    return JNI_ERR;
+  }
+  JavaIcingSearchEngineImpl.native_pointer =
+      env->GetFieldID(java_class, "nativePointer", "J");
+
+  // Register your class' native methods.
+  static const JNINativeMethod methods[] = {
+      {"nativeCreate", "([B)J", reinterpret_cast<void*>(nativeCreate)},
+      {"nativeDestroy", "(Lcom/google/android/icing/IcingSearchEngineImpl;)V",
+       reinterpret_cast<void*>(nativeDestroy)},
+      {"nativeInitialize",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+       reinterpret_cast<void*>(nativeInitialize)},
+      {"nativeSetSchema",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;[BZ)[B",
+       reinterpret_cast<void*>(nativeSetSchema)},
+      {"nativeGetSchema",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+       reinterpret_cast<void*>(nativeGetSchema)},
+      {"nativeGetSchemaType",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;Ljava/lang/String;)[B",
+       reinterpret_cast<void*>(nativeGetSchemaType)},
+      {"nativePut", "(Lcom/google/android/icing/IcingSearchEngineImpl;[B)[B",
+       reinterpret_cast<void*>(nativePut)},
+      {"nativeGet",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;Ljava/lang/"
+       "String;Ljava/lang/String;[B)[B",
+       reinterpret_cast<void*>(nativeGet)},
+      {"nativeReportUsage",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;[B)[B",
+       reinterpret_cast<void*>(nativeReportUsage)},
+      {"nativeGetAllNamespaces",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+       reinterpret_cast<void*>(nativeGetAllNamespaces)},
+      {"nativeGetNextPage",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;JJ)[B",
+       reinterpret_cast<void*>(nativeGetNextPage)},
+      {"nativeInvalidateNextPageToken",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;J)V",
+       reinterpret_cast<void*>(nativeInvalidateNextPageToken)},
+      {"nativeSearch",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;[B[B[BJ)[B",
+       reinterpret_cast<void*>(nativeSearch)},
+      {"nativeDelete",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;Ljava/lang/"
+       "String;Ljava/lang/String;)[B",
+       reinterpret_cast<void*>(nativeDelete)},
+      {"nativeDeleteByNamespace",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;Ljava/lang/String;)[B",
+       reinterpret_cast<void*>(nativeDeleteByNamespace)},
+      {"nativeDeleteBySchemaType",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;Ljava/lang/String;)[B",
+       reinterpret_cast<void*>(nativeDeleteBySchemaType)},
+      {"nativeDeleteByQuery",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;[BZ)[B",
+       reinterpret_cast<void*>(nativeDeleteByQuery)},
+      {"nativePersistToDisk",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;I)[B",
+       reinterpret_cast<void*>(nativePersistToDisk)},
+      {"nativeOptimize", "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+       reinterpret_cast<void*>(nativeOptimize)},
+      {"nativeGetOptimizeInfo",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+       reinterpret_cast<void*>(nativeGetOptimizeInfo)},
+      {"nativeGetStorageInfo",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+       reinterpret_cast<void*>(nativeGetStorageInfo)},
+      {"nativeReset", "(Lcom/google/android/icing/IcingSearchEngineImpl;)[B",
+       reinterpret_cast<void*>(nativeReset)},
+      {"nativeSearchSuggestions",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;[B)[B",
+       reinterpret_cast<void*>(nativeSearchSuggestions)},
+      {"nativeGetDebugInfo",
+       "(Lcom/google/android/icing/IcingSearchEngineImpl;I)[B",
+       reinterpret_cast<void*>(nativeGetDebugInfo)},
+      {"nativeShouldLog", "(SS)Z", reinterpret_cast<void*>(nativeShouldLog)},
+      {"nativeSetLoggingLevel", "(SS)Z",
+       reinterpret_cast<void*>(nativeSetLoggingLevel)},
+      {"nativeGetLoggingTag", "()Ljava/lang/String;",
+       reinterpret_cast<void*>(nativeGetLoggingTag)},
+  };
+  int register_natives_success = env->RegisterNatives(
+      java_class, methods, sizeof(methods) / sizeof(JNINativeMethod));
+  if (register_natives_success != JNI_OK) {
+    return register_natives_success;
+  }
+
+  return JNI_VERSION_1_6;
+}
+
 }  // extern "C"
diff --git a/icing/jni/jni-cache.cc b/icing/jni/jni-cache.cc
index 58eb8bf..1804b9a 100644
--- a/icing/jni/jni-cache.cc
+++ b/icing/jni/jni-cache.cc
@@ -14,6 +14,8 @@
 
 #include "icing/jni/jni-cache.h"
 
+#ifdef ICING_REVERSE_JNI_SEGMENTATION
+
 #include "icing/text_classifier/lib3/utils/java/jni-base.h"
 #include "icing/text_classifier/lib3/utils/java/jni-helper.h"
 #include "icing/absl_ports/canonical_errors.h"
@@ -157,8 +159,7 @@ libtextclassifier3::StatusOr<std::unique_ptr<JniCache>> JniCache::Create(
 
   // BreakIteratorBatcher
   ICING_GET_CLASS_OR_RETURN_NULL(
-      breakiterator,
-      "com/google/android/icing/BreakIteratorBatcher");
+      breakiterator, "com/google/android/icing/BreakIteratorBatcher");
   ICING_GET_METHOD(breakiterator, constructor, "<init>",
                    "(Ljava/util/Locale;)V");
   ICING_GET_METHOD(breakiterator, settext, "setText", "(Ljava/lang/String;)V");
@@ -214,3 +215,5 @@ JniCache::ConvertToJavaString(const char* utf8_text,
 
 }  // namespace lib
 }  // namespace icing
+
+#endif  // ICING_REVERSE_JNI_SEGMENTATION
diff --git a/icing/jni/jni-cache.h b/icing/jni/jni-cache.h
index a5f16c7..3faaed6 100644
--- a/icing/jni/jni-cache.h
+++ b/icing/jni/jni-cache.h
@@ -15,6 +15,16 @@
 #ifndef ICING_JNI_JNI_CACHE_H_
 #define ICING_JNI_JNI_CACHE_H_
 
+#ifndef ICING_REVERSE_JNI_SEGMENTATION
+namespace icing {
+namespace lib {
+
+class JniCache {};  // Declare an empty class definition for non-Android builds.
+
+}  // namespace lib
+}  // namespace icing
+#else  // ICING_REVERSE_JNI_SEGMENTATION
+
 #include <jni.h>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -75,4 +85,6 @@ struct JniCache {
 }  // namespace lib
 }  // namespace icing
 
+#endif  // !ICING_REVERSE_JNI_SEGMENTATION
+
 #endif  // ICING_JNI_JNI_CACHE_H_
diff --git a/icing/jni/scoped-primitive-array-critical.h b/icing/jni/scoped-primitive-array-critical.h
new file mode 100644
index 0000000..062c145
--- /dev/null
+++ b/icing/jni/scoped-primitive-array-critical.h
@@ -0,0 +1,86 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JNI_SCOPED_PRIMITIVE_ARRAY_CRITICAL_H_
+#define ICING_JNI_SCOPED_PRIMITIVE_ARRAY_CRITICAL_H_
+
+#include <jni.h>
+
+#include <utility>
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class ScopedPrimitiveArrayCritical {
+ public:
+  ScopedPrimitiveArrayCritical(JNIEnv* env, jarray array)
+      : env_(env), array_(array) {
+    if (array_ == nullptr) {
+      array_critical_ = nullptr;
+      array_critical_size_ = 0;
+    } else {
+      array_critical_size_ = env->GetArrayLength(array);
+      array_critical_ = static_cast<T*>(
+          env->GetPrimitiveArrayCritical(array, /*isCopy=*/nullptr));
+    }
+  }
+
+  ScopedPrimitiveArrayCritical(ScopedPrimitiveArrayCritical&& rhs)
+      : env_(nullptr),
+        array_(nullptr),
+        array_critical_(nullptr),
+        array_critical_size_(0) {
+    Swap(rhs);
+  }
+
+  ScopedPrimitiveArrayCritical(const ScopedPrimitiveArrayCritical&) = delete;
+
+  ScopedPrimitiveArrayCritical& operator=(ScopedPrimitiveArrayCritical&& rhs) {
+    Swap(rhs);
+    return *this;
+  }
+
+  ScopedPrimitiveArrayCritical& operator=(const ScopedPrimitiveArrayCritical&) =
+      delete;
+
+  ~ScopedPrimitiveArrayCritical() {
+    if (array_critical_ != nullptr && array_ != nullptr) {
+      env_->ReleasePrimitiveArrayCritical(array_, array_critical_, /*mode=*/0);
+    }
+  }
+
+  T* data() { return array_critical_; }
+  const T* data() const { return array_critical_; }
+
+  size_t size() const { return array_critical_size_; }
+
+ private:
+  void Swap(ScopedPrimitiveArrayCritical& other) {
+    std::swap(env_, other.env_);
+    std::swap(array_, other.array_);
+    std::swap(array_critical_, other.array_critical_);
+    std::swap(array_critical_size_, other.array_critical_size_);
+  }
+
+  JNIEnv* env_;
+  jarray array_;
+  T* array_critical_;
+  size_t array_critical_size_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JNI_SCOPED_PRIMITIVE_ARRAY_CRITICAL_H_
diff --git a/icing/jni/scoped-primitive-array-critical_test.cc b/icing/jni/scoped-primitive-array-critical_test.cc
new file mode 100644
index 0000000..3655378
--- /dev/null
+++ b/icing/jni/scoped-primitive-array-critical_test.cc
@@ -0,0 +1,140 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/jni/scoped-primitive-array-critical.h"
+
+#include <jni.h>
+
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "util/java/mock_jni_env.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsNull;
+using ::testing::Return;
+using util::java::test::MockJNIEnv;
+
+TEST(ScopedJniClassesTest, ScopedPrimitiveArrayNull) {
+  auto env_mock = std::make_unique<MockJNIEnv>();
+  // Construct a scoped utf chars normally.
+  ScopedPrimitiveArrayCritical<uint8_t> scoped_primitive_array(
+      env_mock.get(), /*array=*/nullptr);
+  EXPECT_THAT(scoped_primitive_array.data(), IsNull());
+  EXPECT_THAT(scoped_primitive_array.size(), Eq(0));
+
+  // Move construct a scoped utf chars
+  ScopedPrimitiveArrayCritical<uint8_t> moved_scoped_primitive_array(
+      std::move(scoped_primitive_array));
+  EXPECT_THAT(moved_scoped_primitive_array.data(), IsNull());
+  EXPECT_THAT(moved_scoped_primitive_array.size(), Eq(0));
+
+  // Move assign a scoped utf chars
+  ScopedPrimitiveArrayCritical<uint8_t> move_assigned_scoped_primitive_array =
+      std::move(moved_scoped_primitive_array);
+  EXPECT_THAT(move_assigned_scoped_primitive_array.data(), IsNull());
+  EXPECT_THAT(move_assigned_scoped_primitive_array.size(), Eq(0));
+}
+
+TEST(ScopedJniClassesTest, ScopedPrimitiveArrayConstruction) {
+  auto env_mock = std::make_unique<MockJNIEnv>();
+  // Construct a scoped utf chars normally.
+  jarray fake_jarray = reinterpret_cast<jarray>(-303);
+  uint8_t fake_array[] = {1, 8, 63, 90};
+  ON_CALL(*env_mock, GetPrimitiveArrayCritical(Eq(fake_jarray), IsNull()))
+      .WillByDefault(Return(fake_array));
+  ON_CALL(*env_mock, GetArrayLength(Eq(fake_jarray))).WillByDefault(Return(4));
+
+  ScopedPrimitiveArrayCritical<uint8_t> scoped_primitive_array(
+      env_mock.get(),
+      /*array=*/fake_jarray);
+  EXPECT_THAT(scoped_primitive_array.data(), Eq(fake_array));
+  EXPECT_THAT(scoped_primitive_array.size(), Eq(4));
+
+  EXPECT_CALL(*env_mock, ReleasePrimitiveArrayCritical(Eq(fake_jarray),
+                                                       Eq(fake_array), Eq(0)))
+      .Times(1);
+}
+
+TEST(ScopedJniClassesTest, ScopedPrimitiveArrayMoveConstruction) {
+  auto env_mock = std::make_unique<MockJNIEnv>();
+  // Construct a scoped utf chars normally.
+  jarray fake_jarray = reinterpret_cast<jarray>(-303);
+  uint8_t fake_array[] = {1, 8, 63, 90};
+  ON_CALL(*env_mock, GetPrimitiveArrayCritical(Eq(fake_jarray), IsNull()))
+      .WillByDefault(Return(fake_array));
+  ON_CALL(*env_mock, GetArrayLength(Eq(fake_jarray))).WillByDefault(Return(4));
+
+  ScopedPrimitiveArrayCritical<uint8_t> scoped_primitive_array(
+      env_mock.get(),
+      /*array=*/fake_jarray);
+
+  // Move construct a scoped utf chars
+  ScopedPrimitiveArrayCritical<uint8_t> moved_scoped_primitive_array(
+      std::move(scoped_primitive_array));
+  EXPECT_THAT(moved_scoped_primitive_array.data(), Eq(fake_array));
+  EXPECT_THAT(moved_scoped_primitive_array.size(), Eq(4));
+
+  EXPECT_CALL(*env_mock, ReleasePrimitiveArrayCritical(Eq(fake_jarray),
+                                                       Eq(fake_array), Eq(0)))
+      .Times(1);
+}
+
+TEST(ScopedJniClassesTest, ScopedPrimitiveArrayMoveAssignment) {
+  // Setup the mock to return:
+  //   {1, 8, 63, 90} for jstring (-303)
+  //   {5, 9, 82} for jstring (-505)
+  auto env_mock = std::make_unique<MockJNIEnv>();
+  jarray fake_jarray1 = reinterpret_cast<jarray>(-303);
+  uint8_t fake_array1[] = {1, 8, 63, 90};
+  ON_CALL(*env_mock, GetPrimitiveArrayCritical(Eq(fake_jarray1), IsNull()))
+      .WillByDefault(Return(fake_array1));
+  ON_CALL(*env_mock, GetArrayLength(Eq(fake_jarray1))).WillByDefault(Return(4));
+
+  jarray fake_jarray2 = reinterpret_cast<jarray>(-505);
+  uint8_t fake_array2[] = {5, 9, 82};
+  ON_CALL(*env_mock, GetPrimitiveArrayCritical(Eq(fake_jarray2), IsNull()))
+      .WillByDefault(Return(fake_array2));
+  ON_CALL(*env_mock, GetArrayLength(Eq(fake_jarray2))).WillByDefault(Return(3));
+
+  ScopedPrimitiveArrayCritical<uint8_t> scoped_primitive_array1(
+      env_mock.get(),
+      /*array=*/fake_jarray1);
+  ScopedPrimitiveArrayCritical<uint8_t> scoped_primitive_array2(
+      env_mock.get(),
+      /*array=*/fake_jarray2);
+
+  // Move assign a scoped utf chars
+  scoped_primitive_array2 = std::move(scoped_primitive_array1);
+  EXPECT_THAT(scoped_primitive_array2.data(), Eq(fake_array1));
+  EXPECT_THAT(scoped_primitive_array2.size(), Eq(4));
+
+  EXPECT_CALL(*env_mock, ReleasePrimitiveArrayCritical(Eq(fake_jarray1),
+                                                       Eq(fake_array1), Eq(0)))
+      .Times(1);
+  EXPECT_CALL(*env_mock, ReleasePrimitiveArrayCritical(Eq(fake_jarray2),
+                                                       Eq(fake_array2), Eq(0)))
+      .Times(1);
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/jni/scoped-utf-chars.h b/icing/jni/scoped-utf-chars.h
new file mode 100644
index 0000000..5a3ac6a
--- /dev/null
+++ b/icing/jni/scoped-utf-chars.h
@@ -0,0 +1,81 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JNI_SCOPED_UTF_CHARS_H_
+#define ICING_JNI_SCOPED_UTF_CHARS_H_
+
+#include <jni.h>
+
+#include <cstddef>
+#include <cstring>
+#include <utility>
+
+namespace icing {
+namespace lib {
+
+// An RAII class to manage access and allocation of a Java string's UTF chars.
+class ScopedUtfChars {
+ public:
+  ScopedUtfChars(JNIEnv* env, jstring s) : env_(env), string_(s) {
+    if (s == nullptr) {
+      utf_chars_ = nullptr;
+      size_ = 0;
+    } else {
+      utf_chars_ = env->GetStringUTFChars(s, /*isCopy=*/nullptr);
+      size_ = strlen(utf_chars_);
+    }
+  }
+
+  ScopedUtfChars(ScopedUtfChars&& rhs)
+      : env_(nullptr), string_(nullptr), utf_chars_(nullptr) {
+    Swap(rhs);
+  }
+
+  ScopedUtfChars(const ScopedUtfChars&) = delete;
+
+  ScopedUtfChars& operator=(ScopedUtfChars&& rhs) {
+    Swap(rhs);
+    return *this;
+  }
+
+  ScopedUtfChars& operator=(const ScopedUtfChars&) = delete;
+
+  ~ScopedUtfChars() {
+    if (utf_chars_ != nullptr) {
+      env_->ReleaseStringUTFChars(string_, utf_chars_);
+    }
+  }
+
+  const char* c_str() const { return utf_chars_; }
+
+  size_t size() const { return size_; }
+
+ private:
+  void Swap(ScopedUtfChars& other) {
+    std::swap(env_, other.env_);
+    std::swap(string_, other.string_);
+    std::swap(utf_chars_, other.utf_chars_);
+    std::swap(size_, other.size_);
+  }
+
+  JNIEnv* env_;
+  jstring string_;
+  const char* utf_chars_;
+  size_t size_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JNI_SCOPED_UTF_CHARS_H_
diff --git a/icing/jni/scoped-utf-chars_test.cc b/icing/jni/scoped-utf-chars_test.cc
new file mode 100644
index 0000000..d249f69
--- /dev/null
+++ b/icing/jni/scoped-utf-chars_test.cc
@@ -0,0 +1,126 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/jni/scoped-utf-chars.h"
+
+#include <jni.h>
+
+#include <string>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "util/java/mock_jni_env.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsNull;
+using ::testing::Return;
+using util::java::test::MockJNIEnv;
+
+TEST(ScopedJniClassesTest, ScopedUtfCharsNull) {
+  auto env_mock = std::make_unique<MockJNIEnv>();
+  // Construct a scoped utf chars normally.
+  ScopedUtfChars scoped_utf_chars(env_mock.get(), /*s=*/nullptr);
+  EXPECT_THAT(scoped_utf_chars.c_str(), IsNull());
+  EXPECT_THAT(scoped_utf_chars.size(), Eq(0));
+
+  // Move construct a scoped utf chars
+  ScopedUtfChars moved_scoped_utf_chars(std::move(scoped_utf_chars));
+  EXPECT_THAT(moved_scoped_utf_chars.c_str(), IsNull());
+  EXPECT_THAT(moved_scoped_utf_chars.size(), Eq(0));
+
+  // Move assign a scoped utf chars
+  ScopedUtfChars move_assigned_scoped_utf_chars =
+      std::move(moved_scoped_utf_chars);
+  EXPECT_THAT(move_assigned_scoped_utf_chars.c_str(), IsNull());
+  EXPECT_THAT(move_assigned_scoped_utf_chars.size(), Eq(0));
+}
+
+TEST(ScopedJniClassesTest, ScopedUtfCharsConstruction) {
+  auto env_mock = std::make_unique<MockJNIEnv>();
+  // Construct a scoped utf chars normally.
+  jstring fake_jstring = reinterpret_cast<jstring>(-303);
+  std::string fake_string = "foo";
+  ON_CALL(*env_mock, GetStringUTFChars(Eq(fake_jstring), IsNull()))
+      .WillByDefault(Return(fake_string.c_str()));
+
+  ScopedUtfChars scoped_utf_chars(env_mock.get(), /*s=*/fake_jstring);
+  EXPECT_THAT(scoped_utf_chars.c_str(), Eq(fake_string.c_str()));
+  EXPECT_THAT(scoped_utf_chars.size(), Eq(3));
+
+  EXPECT_CALL(*env_mock,
+              ReleaseStringUTFChars(Eq(fake_jstring), Eq(fake_string.c_str())))
+      .Times(1);
+}
+
+TEST(ScopedJniClassesTest, ScopedUtfCharsMoveConstruction) {
+  auto env_mock = std::make_unique<MockJNIEnv>();
+  // Construct a scoped utf chars normally.
+  jstring fake_jstring = reinterpret_cast<jstring>(-303);
+  std::string fake_string = "foo";
+  ON_CALL(*env_mock, GetStringUTFChars(Eq(fake_jstring), IsNull()))
+      .WillByDefault(Return(fake_string.c_str()));
+
+  ScopedUtfChars scoped_utf_chars(env_mock.get(), /*s=*/fake_jstring);
+
+  // Move construct a scoped utf chars
+  ScopedUtfChars moved_scoped_utf_chars(std::move(scoped_utf_chars));
+  EXPECT_THAT(moved_scoped_utf_chars.c_str(), Eq(fake_string.c_str()));
+  EXPECT_THAT(moved_scoped_utf_chars.size(), Eq(3));
+
+  EXPECT_CALL(*env_mock,
+              ReleaseStringUTFChars(Eq(fake_jstring), Eq(fake_string.c_str())))
+      .Times(1);
+}
+
+TEST(ScopedJniClassesTest, ScopedUtfCharsMoveAssignment) {
+  // Setup the mock to return:
+  //   "foo" for jstring (-303)
+  //   "bar baz" for jstring (-505)
+  auto env_mock = std::make_unique<MockJNIEnv>();
+  jstring fake_jstring1 = reinterpret_cast<jstring>(-303);
+  std::string fake_string1 = "foo";
+  ON_CALL(*env_mock, GetStringUTFChars(Eq(fake_jstring1), IsNull()))
+      .WillByDefault(Return(fake_string1.c_str()));
+
+  jstring fake_jstring2 = reinterpret_cast<jstring>(-505);
+  std::string fake_string2 = "bar baz";
+  ON_CALL(*env_mock, GetStringUTFChars(Eq(fake_jstring2), IsNull()))
+      .WillByDefault(Return(fake_string2.c_str()));
+
+  ScopedUtfChars scoped_utf_chars1(env_mock.get(), /*s=*/fake_jstring1);
+  ScopedUtfChars scoped_utf_chars2(env_mock.get(), /*s=*/fake_jstring2);
+
+  // Move assign a scoped utf chars
+  scoped_utf_chars2 = std::move(scoped_utf_chars1);
+  EXPECT_THAT(scoped_utf_chars2.c_str(), Eq(fake_string1.c_str()));
+  EXPECT_THAT(scoped_utf_chars2.size(), Eq(3));
+
+  EXPECT_CALL(*env_mock, ReleaseStringUTFChars(Eq(fake_jstring1),
+                                               Eq(fake_string1.c_str())))
+      .Times(1);
+  EXPECT_CALL(*env_mock, ReleaseStringUTFChars(Eq(fake_jstring2),
+                                               Eq(fake_string2.c_str())))
+      .Times(1);
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/aggregation-scorer.cc b/icing/join/aggregation-scorer.cc
new file mode 100644
index 0000000..3dee3dd
--- /dev/null
+++ b/icing/join/aggregation-scorer.cc
@@ -0,0 +1,139 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/aggregation-scorer.h"
+
+#include <algorithm>
+#include <memory>
+#include <numeric>
+#include <vector>
+
+#include "icing/proto/search.pb.h"
+#include "icing/scoring/scored-document-hit.h"
+
+namespace icing {
+namespace lib {
+
+class CountAggregationScorer : public AggregationScorer {
+ public:
+  double GetScore(const ScoredDocumentHit& parent,
+                  const std::vector<ScoredDocumentHit>& children) override {
+    return children.size();
+  }
+};
+
+class MinAggregationScorer : public AggregationScorer {
+ public:
+  double GetScore(const ScoredDocumentHit& parent,
+                  const std::vector<ScoredDocumentHit>& children) override {
+    if (children.empty()) {
+      // Return 0 if there is no child document.
+      // For non-empty children with negative scores, they are considered "worse
+      // than" 0, so it is correct to return 0 for empty children to assign it a
+      // rank higher than non-empty children with negative scores.
+      return 0.0;
+    }
+    return std::min_element(children.begin(), children.end(),
+                            [](const ScoredDocumentHit& lhs,
+                               const ScoredDocumentHit& rhs) -> bool {
+                              return lhs.score() < rhs.score();
+                            })
+        ->score();
+  }
+};
+
+class AverageAggregationScorer : public AggregationScorer {
+ public:
+  double GetScore(const ScoredDocumentHit& parent,
+                  const std::vector<ScoredDocumentHit>& children) override {
+    if (children.empty()) {
+      // Return 0 if there is no child document.
+      // For non-empty children with negative scores, they are considered "worse
+      // than" 0, so it is correct to return 0 for empty children to assign it a
+      // rank higher than non-empty children with negative scores.
+      return 0.0;
+    }
+    return std::reduce(
+               children.begin(), children.end(), 0.0,
+               [](double prev, const ScoredDocumentHit& item) -> double {
+                 return prev + item.score();
+               }) /
+           children.size();
+  }
+};
+
+class MaxAggregationScorer : public AggregationScorer {
+ public:
+  double GetScore(const ScoredDocumentHit& parent,
+                  const std::vector<ScoredDocumentHit>& children) override {
+    if (children.empty()) {
+      // Return 0 if there is no child document.
+      // For non-empty children with negative scores, they are considered "worse
+      // than" 0, so it is correct to return 0 for empty children to assign it a
+      // rank higher than non-empty children with negative scores.
+      return 0.0;
+    }
+    return std::max_element(children.begin(), children.end(),
+                            [](const ScoredDocumentHit& lhs,
+                               const ScoredDocumentHit& rhs) -> bool {
+                              return lhs.score() < rhs.score();
+                            })
+        ->score();
+  }
+};
+
+class SumAggregationScorer : public AggregationScorer {
+ public:
+  double GetScore(const ScoredDocumentHit& parent,
+                  const std::vector<ScoredDocumentHit>& children) override {
+    return std::reduce(
+        children.begin(), children.end(), 0.0,
+        [](double prev, const ScoredDocumentHit& item) -> double {
+          return prev + item.score();
+        });
+  }
+};
+
+class DefaultAggregationScorer : public AggregationScorer {
+ public:
+  double GetScore(const ScoredDocumentHit& parent,
+                  const std::vector<ScoredDocumentHit>& children) override {
+    return parent.score();
+  }
+};
+
+std::unique_ptr<AggregationScorer> AggregationScorer::Create(
+    const JoinSpecProto& join_spec) {
+  switch (join_spec.aggregation_scoring_strategy()) {
+    case JoinSpecProto::AggregationScoringStrategy::COUNT:
+      return std::make_unique<CountAggregationScorer>();
+    case JoinSpecProto::AggregationScoringStrategy::MIN:
+      return std::make_unique<MinAggregationScorer>();
+    case JoinSpecProto::AggregationScoringStrategy::AVG:
+      return std::make_unique<AverageAggregationScorer>();
+    case JoinSpecProto::AggregationScoringStrategy::MAX:
+      return std::make_unique<MaxAggregationScorer>();
+    case JoinSpecProto::AggregationScoringStrategy::SUM:
+      return std::make_unique<SumAggregationScorer>();
+    case JoinSpecProto::AggregationScoringStrategy::NONE:
+      // No aggregation strategy means using parent document score, so fall
+      // through to return DefaultAggregationScorer.
+      [[fallthrough]];
+    default:
+      return std::make_unique<DefaultAggregationScorer>();
+  }
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/aggregation-scorer.h b/icing/join/aggregation-scorer.h
new file mode 100644
index 0000000..3d38cf0
--- /dev/null
+++ b/icing/join/aggregation-scorer.h
@@ -0,0 +1,41 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_AGGREGATION_SCORER_H_
+#define ICING_JOIN_AGGREGATION_SCORER_H_
+
+#include <memory>
+#include <vector>
+
+#include "icing/proto/search.pb.h"
+#include "icing/scoring/scored-document-hit.h"
+
+namespace icing {
+namespace lib {
+
+class AggregationScorer {
+ public:
+  static std::unique_ptr<AggregationScorer> Create(
+      const JoinSpecProto& join_spec);
+
+  virtual ~AggregationScorer() = default;
+
+  virtual double GetScore(const ScoredDocumentHit& parent,
+                          const std::vector<ScoredDocumentHit>& children) = 0;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_AGGREGATION_SCORER_H_
diff --git a/icing/join/aggregation-scorer_test.cc b/icing/join/aggregation-scorer_test.cc
new file mode 100644
index 0000000..19a7239
--- /dev/null
+++ b/icing/join/aggregation-scorer_test.cc
@@ -0,0 +1,215 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/aggregation-scorer.h"
+
+#include <algorithm>
+#include <iterator>
+#include <memory>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/proto/search.pb.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/scored-document-hit.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::DoubleEq;
+
+struct AggregationScorerTestParam {
+  double ans;
+  JoinSpecProto::AggregationScoringStrategy::Code scoring_strategy;
+  double parent_score;
+  std::vector<double> child_scores;
+
+  explicit AggregationScorerTestParam(
+      double ans_in,
+      JoinSpecProto::AggregationScoringStrategy::Code scoring_strategy_in,
+      double parent_score_in, std::vector<double> child_scores_in)
+      : ans(ans_in),
+        scoring_strategy(scoring_strategy_in),
+        parent_score(std::move(parent_score_in)),
+        child_scores(std::move(child_scores_in)) {}
+};
+
+class AggregationScorerTest
+    : public ::testing::TestWithParam<AggregationScorerTestParam> {};
+
+TEST_P(AggregationScorerTest, GetScore) {
+  static constexpr DocumentId kDefaultDocumentId = 0;
+
+  const AggregationScorerTestParam& param = GetParam();
+  // Test AggregationScorer by creating some ScoredDocumentHits for parent and
+  // child documents. DocumentId and SectionIdMask won't affect the aggregation
+  // score calculation, so just simply set default values.
+  // Parent document
+  ScoredDocumentHit parent_scored_document_hit(
+      kDefaultDocumentId, kSectionIdMaskNone, param.parent_score);
+  // Child documents
+  std::vector<ScoredDocumentHit> child_scored_document_hits;
+  child_scored_document_hits.reserve(param.child_scores.size());
+  std::transform(param.child_scores.cbegin(), param.child_scores.cend(),
+                 std::back_inserter(child_scored_document_hits),
+                 [](double score) -> ScoredDocumentHit {
+                   return ScoredDocumentHit(kDefaultDocumentId,
+                                            kSectionIdMaskNone, score);
+                 });
+
+  JoinSpecProto join_spec;
+  join_spec.set_aggregation_scoring_strategy(param.scoring_strategy);
+  std::unique_ptr<AggregationScorer> scorer =
+      AggregationScorer::Create(join_spec);
+  EXPECT_THAT(
+      scorer->GetScore(parent_scored_document_hit, child_scored_document_hits),
+      DoubleEq(param.ans));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    CountAggregationScorerTest, AggregationScorerTest,
+    testing::Values(
+        // General case.
+        AggregationScorerTestParam(
+            /*ans_in=*/5, JoinSpecProto::AggregationScoringStrategy::COUNT,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{8, 3, 1, 4, 7}),
+        // Only one child.
+        AggregationScorerTestParam(
+            /*ans_in=*/1, JoinSpecProto::AggregationScoringStrategy::COUNT,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{123}),
+        // No child.
+        AggregationScorerTestParam(
+            /*ans_in=*/0, JoinSpecProto::AggregationScoringStrategy::COUNT,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{})));
+
+INSTANTIATE_TEST_SUITE_P(
+    MinAggregationScorerTest, AggregationScorerTest,
+    testing::Values(
+        // General case.
+        AggregationScorerTestParam(
+            /*ans_in=*/1, JoinSpecProto::AggregationScoringStrategy::MIN,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{8, 3, 1, 4, 7}),
+        // Only one child, greater than parent.
+        AggregationScorerTestParam(
+            /*ans_in=*/123, JoinSpecProto::AggregationScoringStrategy::MIN,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{123}),
+        // Only one child, smaller than parent.
+        AggregationScorerTestParam(
+            /*ans_in=*/50, JoinSpecProto::AggregationScoringStrategy::MIN,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{50}),
+        // No child.
+        AggregationScorerTestParam(
+            /*ans_in=*/0, JoinSpecProto::AggregationScoringStrategy::MIN,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{})));
+
+INSTANTIATE_TEST_SUITE_P(
+    AverageAggregationScorerTest, AggregationScorerTest,
+    testing::Values(
+        // General case.
+        AggregationScorerTestParam(
+            /*ans_in=*/4.6, JoinSpecProto::AggregationScoringStrategy::AVG,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{8, 3, 1, 4, 7}),
+        // Only one child.
+        AggregationScorerTestParam(
+            /*ans_in=*/123, JoinSpecProto::AggregationScoringStrategy::AVG,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{123}),
+        // No child.
+        AggregationScorerTestParam(
+            /*ans_in=*/0, JoinSpecProto::AggregationScoringStrategy::AVG,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{})));
+
+INSTANTIATE_TEST_SUITE_P(
+    MaxAggregationScorerTest, AggregationScorerTest,
+    testing::Values(
+        // General case.
+        AggregationScorerTestParam(
+            /*ans_in=*/8, JoinSpecProto::AggregationScoringStrategy::MAX,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{8, 3, 1, 4, 7}),
+        // Only one child, greater than parent.
+        AggregationScorerTestParam(
+            /*ans_in=*/123, JoinSpecProto::AggregationScoringStrategy::MAX,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{123}),
+        // Only one child, smaller than parent.
+        AggregationScorerTestParam(
+            /*ans_in=*/50, JoinSpecProto::AggregationScoringStrategy::MAX,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{50}),
+        // No child.
+        AggregationScorerTestParam(
+            /*ans_in=*/0, JoinSpecProto::AggregationScoringStrategy::MAX,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{})));
+
+INSTANTIATE_TEST_SUITE_P(
+    SumAggregationScorerTest, AggregationScorerTest,
+    testing::Values(
+        // General case.
+        AggregationScorerTestParam(
+            /*ans_in=*/23, JoinSpecProto::AggregationScoringStrategy::SUM,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{8, 3, 1, 4, 7}),
+        // Only one child.
+        AggregationScorerTestParam(
+            /*ans_in=*/123, JoinSpecProto::AggregationScoringStrategy::SUM,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{123}),
+        // No child.
+        AggregationScorerTestParam(
+            /*ans_in=*/0, JoinSpecProto::AggregationScoringStrategy::SUM,
+            /*parent_score_in=*/0,
+            /*child_scores_in=*/{})));
+
+INSTANTIATE_TEST_SUITE_P(
+    DefaultAggregationScorerTest, AggregationScorerTest,
+    testing::Values(
+        // General case.
+        AggregationScorerTestParam(
+            /*ans_in=*/98, JoinSpecProto::AggregationScoringStrategy::NONE,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{8, 3, 1, 4, 7}),
+        // Only one child, greater than parent.
+        AggregationScorerTestParam(
+            /*ans_in=*/98, JoinSpecProto::AggregationScoringStrategy::NONE,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{123}),
+        // Only one child, smaller than parent.
+        AggregationScorerTestParam(
+            /*ans_in=*/98, JoinSpecProto::AggregationScoringStrategy::NONE,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{50}),
+        // No child.
+        AggregationScorerTestParam(
+            /*ans_in=*/98, JoinSpecProto::AggregationScoringStrategy::NONE,
+            /*parent_score_in=*/98,
+            /*child_scores_in=*/{})));
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/doc-join-info.cc b/icing/join/doc-join-info.cc
new file mode 100644
index 0000000..3b06f01
--- /dev/null
+++ b/icing/join/doc-join-info.cc
@@ -0,0 +1,49 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/doc-join-info.h"
+
+#include <cstdint>
+
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-id.h"
+#include "icing/util/bit-util.h"
+
+namespace icing {
+namespace lib {
+
+DocJoinInfo::DocJoinInfo(DocumentId document_id,
+                         JoinablePropertyId joinable_property_id) {
+  Value temp_value = 0;
+  bit_util::BitfieldSet(/*new_value=*/document_id,
+                        /*lsb_offset=*/kJoinablePropertyIdBits,
+                        /*len=*/kDocumentIdBits, &temp_value);
+  bit_util::BitfieldSet(/*new_value=*/joinable_property_id,
+                        /*lsb_offset=*/0,
+                        /*len=*/kJoinablePropertyIdBits, &temp_value);
+  value_ = temp_value;
+}
+
+DocumentId DocJoinInfo::document_id() const {
+  return bit_util::BitfieldGet(value_, /*lsb_offset=*/kJoinablePropertyIdBits,
+                               /*len=*/kDocumentIdBits);
+}
+
+JoinablePropertyId DocJoinInfo::joinable_property_id() const {
+  return bit_util::BitfieldGet(value_, /*lsb_offset=*/0,
+                               /*len=*/kJoinablePropertyIdBits);
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/doc-join-info.h b/icing/join/doc-join-info.h
new file mode 100644
index 0000000..7696b92
--- /dev/null
+++ b/icing/join/doc-join-info.h
@@ -0,0 +1,66 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_DOC_JOIN_INFO
+#define ICING_JOIN_DOC_JOIN_INFO
+
+#include <cstdint>
+#include <limits>
+
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// DocJoinInfo is composed of document_id and joinable_property_id.
+class DocJoinInfo {
+ public:
+  // The datatype used to encode DocJoinInfo information: the document_id and
+  // joinable_property_id.
+  using Value = uint32_t;
+
+  static_assert(kDocumentIdBits + kJoinablePropertyIdBits <= sizeof(Value) * 8,
+                "Cannot encode document id and joinable property id in "
+                "DocJoinInfo::Value");
+
+  // All bits of kInvalidValue are 1, and it contains:
+  // - 0b1 for 4 unused bits.
+  // - kInvalidDocumentId (2^22-1).
+  // - JoinablePropertyId 2^6-1 (valid), which is ok because kInvalidDocumentId
+  //   has already invalidated the value. In fact, we currently use all 2^6
+  //   joinable property ids and there is no "invalid joinable property id", so
+  //   it doesn't matter what JoinablePropertyId we set for kInvalidValue.
+  static constexpr Value kInvalidValue = std::numeric_limits<Value>::max();
+
+  explicit DocJoinInfo(DocumentId document_id,
+                       JoinablePropertyId joinable_property_id);
+
+  explicit DocJoinInfo(Value value = kInvalidValue) : value_(value) {}
+
+  bool is_valid() const { return value_ != kInvalidValue; }
+  Value value() const { return value_; }
+  DocumentId document_id() const;
+  JoinablePropertyId joinable_property_id() const;
+
+ private:
+  // Value bits layout: 4 unused + 22 document_id + 6 joinable_property_id.
+  Value value_;
+} __attribute__((packed));
+static_assert(sizeof(DocJoinInfo) == 4, "");
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_DOC_JOIN_INFO
diff --git a/icing/join/doc-join-info_test.cc b/icing/join/doc-join-info_test.cc
new file mode 100644
index 0000000..7025473
--- /dev/null
+++ b/icing/join/doc-join-info_test.cc
@@ -0,0 +1,96 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/doc-join-info.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+
+static constexpr DocumentId kSomeDocumentId = 24;
+static constexpr JoinablePropertyId kSomeJoinablePropertyId = 5;
+
+TEST(DocJoinInfoTest, Accessors) {
+  DocJoinInfo doc_join_info(kSomeDocumentId, kSomeJoinablePropertyId);
+  EXPECT_THAT(doc_join_info.document_id(), Eq(kSomeDocumentId));
+  EXPECT_THAT(doc_join_info.joinable_property_id(),
+              Eq(kSomeJoinablePropertyId));
+}
+
+TEST(DocJoinInfoTest, Invalid) {
+  DocJoinInfo default_invalid;
+  EXPECT_THAT(default_invalid.is_valid(), IsFalse());
+
+  // Also make sure the invalid DocJoinInfo contains an invalid document id.
+  EXPECT_THAT(default_invalid.document_id(), Eq(kInvalidDocumentId));
+  EXPECT_THAT(default_invalid.joinable_property_id(),
+              Eq(kMaxJoinablePropertyId));
+}
+
+TEST(DocJoinInfoTest, Valid) {
+  DocJoinInfo maximum_document_id_info(kMaxDocumentId, kSomeJoinablePropertyId);
+  EXPECT_THAT(maximum_document_id_info.is_valid(), IsTrue());
+  EXPECT_THAT(maximum_document_id_info.document_id(), Eq(kMaxDocumentId));
+  EXPECT_THAT(maximum_document_id_info.joinable_property_id(),
+              Eq(kSomeJoinablePropertyId));
+
+  DocJoinInfo maximum_joinable_property_id_info(kSomeDocumentId,
+                                                kMaxJoinablePropertyId);
+  EXPECT_THAT(maximum_joinable_property_id_info.is_valid(), IsTrue());
+  EXPECT_THAT(maximum_joinable_property_id_info.document_id(),
+              Eq(kSomeDocumentId));
+  EXPECT_THAT(maximum_joinable_property_id_info.joinable_property_id(),
+              Eq(kMaxJoinablePropertyId));
+
+  DocJoinInfo minimum_document_id_info(kMinDocumentId, kSomeJoinablePropertyId);
+  EXPECT_THAT(minimum_document_id_info.is_valid(), IsTrue());
+  EXPECT_THAT(minimum_document_id_info.document_id(), Eq(kMinDocumentId));
+  EXPECT_THAT(minimum_document_id_info.joinable_property_id(),
+              Eq(kSomeJoinablePropertyId));
+
+  DocJoinInfo minimum_joinable_property_id_info(kSomeDocumentId,
+                                                kMinJoinablePropertyId);
+  EXPECT_THAT(minimum_joinable_property_id_info.is_valid(), IsTrue());
+  EXPECT_THAT(minimum_joinable_property_id_info.document_id(),
+              Eq(kSomeDocumentId));
+  EXPECT_THAT(minimum_joinable_property_id_info.joinable_property_id(),
+              Eq(kMinJoinablePropertyId));
+
+  DocJoinInfo all_maximum_info(kMaxDocumentId, kMaxJoinablePropertyId);
+  EXPECT_THAT(all_maximum_info.is_valid(), IsTrue());
+  EXPECT_THAT(all_maximum_info.document_id(), Eq(kMaxDocumentId));
+  EXPECT_THAT(all_maximum_info.joinable_property_id(),
+              Eq(kMaxJoinablePropertyId));
+
+  DocJoinInfo all_minimum_info(kMinDocumentId, kMinJoinablePropertyId);
+  EXPECT_THAT(all_minimum_info.is_valid(), IsTrue());
+  EXPECT_THAT(all_minimum_info.document_id(), Eq(kMinDocumentId));
+  EXPECT_THAT(all_minimum_info.joinable_property_id(),
+              Eq(kMinJoinablePropertyId));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/document-id-to-join-info.h b/icing/join/document-id-to-join-info.h
new file mode 100644
index 0000000..dee4885
--- /dev/null
+++ b/icing/join/document-id-to-join-info.h
@@ -0,0 +1,67 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_DOCUMENT_ID_TO_JOIN_INFO_H_
+#define ICING_JOIN_DOCUMENT_ID_TO_JOIN_INFO_H_
+
+#include <utility>
+
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// DocumentIdToJoinInfo is composed of document_id and its join info.
+// - QualifiedId join: join info is the referenced document's namespace_id +
+//   fingerprint(uri).
+// - String join: join info is the term id.
+// - Integer join: join info is the integer.
+//
+// DocumentIdToJoinInfo will be stored in posting list.
+template <typename JoinInfoType>
+class DocumentIdToJoinInfo {
+ public:
+  static DocumentIdToJoinInfo<JoinInfoType> GetInvalid() {
+    return DocumentIdToJoinInfo<JoinInfoType>(kInvalidDocumentId,
+                                              JoinInfoType());
+  }
+
+  explicit DocumentIdToJoinInfo(DocumentId document_id, JoinInfoType join_info)
+      : document_id_(document_id), join_info_(std::move(join_info)) {}
+
+  DocumentId document_id() const { return document_id_; }
+  const JoinInfoType& join_info() const { return join_info_; }
+
+  bool is_valid() const { return IsDocumentIdValid(document_id_); }
+
+  bool operator<(const DocumentIdToJoinInfo<JoinInfoType>& other) const {
+    if (document_id_ != other.document_id_) {
+      return document_id_ < other.document_id_;
+    }
+    return join_info_ < other.join_info_;
+  }
+
+  bool operator==(const DocumentIdToJoinInfo<JoinInfoType>& other) const {
+    return document_id_ == other.document_id_ && join_info_ == other.join_info_;
+  }
+
+ private:
+  DocumentId document_id_;
+  JoinInfoType join_info_;
+} __attribute__((packed));
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_DOCUMENT_ID_TO_JOIN_INFO_H_
diff --git a/icing/join/join-children-fetcher.cc b/icing/join/join-children-fetcher.cc
new file mode 100644
index 0000000..c6d1b97
--- /dev/null
+++ b/icing/join/join-children-fetcher.cc
@@ -0,0 +1,39 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/join-children-fetcher.h"
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::StatusOr<std::vector<ScoredDocumentHit>>
+JoinChildrenFetcher::GetChildren(DocumentId parent_doc_id) const {
+  if (join_spec_.parent_property_expression() == kQualifiedIdExpr) {
+    if (auto iter = map_joinable_qualified_id_.find(parent_doc_id);
+        iter != map_joinable_qualified_id_.end()) {
+      return iter->second;
+    }
+    return std::vector<ScoredDocumentHit>();
+  }
+  // TODO(b/256022027): So far we only support kQualifiedIdExpr for
+  // parent_property_expression, we could support more.
+  return absl_ports::UnimplementedError(absl_ports::StrCat(
+      "Parent property expression must be ", kQualifiedIdExpr));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/join-children-fetcher.h b/icing/join/join-children-fetcher.h
new file mode 100644
index 0000000..1b875bc
--- /dev/null
+++ b/icing/join/join-children-fetcher.h
@@ -0,0 +1,73 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_JOIN_CHILDREN_FETCHER_H_
+#define ICING_JOIN_JOIN_CHILDREN_FETCHER_H_
+
+#include <unordered_map>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/search.pb.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// A class that provides the GetChildren method for joins to fetch all children
+// documents given a parent document id.
+//
+// Internally, the class maintains a map for each joinable value type that
+// groups children according to the joinable values. Currently we only support
+// QUALIFIED_ID joining, in which the joinable value type is document id.
+class JoinChildrenFetcher {
+ public:
+  explicit JoinChildrenFetcher(
+      const JoinSpecProto& join_spec,
+      std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>&&
+          map_joinable_qualified_id)
+      : join_spec_(join_spec),
+        map_joinable_qualified_id_(std::move(map_joinable_qualified_id)) {}
+
+  // Get a vector of children ScoredDocumentHit by parent document id.
+  //
+  // TODO(b/256022027): Implement property value joins with types of string and
+  // int. In these cases, GetChildren should look up join index to fetch
+  // joinable property value of the given parent_doc_id according to
+  // join_spec_.parent_property_expression, and then fetch children by the
+  // corresponding map in this class using the joinable property value.
+  //
+  // Returns:
+  //   The vector of results on success.
+  //   UNIMPLEMENTED_ERROR if the join type specified by join_spec is not
+  //   supported.
+  libtextclassifier3::StatusOr<std::vector<ScoredDocumentHit>> GetChildren(
+      DocumentId parent_doc_id) const;
+
+ private:
+  static constexpr std::string_view kQualifiedIdExpr = "this.qualifiedId()";
+
+  const JoinSpecProto& join_spec_;  // Does not own!
+
+  // The map that groups children by qualified id used to support QualifiedId
+  // joining. The joining type is document id.
+  std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
+      map_joinable_qualified_id_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_JOIN_CHILDREN_FETCHER_H_
diff --git a/icing/join/join-children-fetcher_test.cc b/icing/join/join-children-fetcher_test.cc
new file mode 100644
index 0000000..92a7a81
--- /dev/null
+++ b/icing/join/join-children-fetcher_test.cc
@@ -0,0 +1,83 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/join-children-fetcher.h"
+
+#include <unordered_map>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/join/join-processor.h"
+#include "icing/proto/search.pb.h"
+#include "icing/schema/section.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+
+TEST(JoinChildrenFetcherTest, FetchQualifiedIdJoinChildren) {
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec.set_child_property_expression("sender");
+
+  std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
+      map_joinable_qualified_id;
+  DocumentId parent_doc_id = 0;
+  ScoredDocumentHit child1(/*document_id=*/1, kSectionIdMaskNone,
+                           /*score=*/1.0);
+  ScoredDocumentHit child2(/*document_id=*/2, kSectionIdMaskNone,
+                           /*score=*/2.0);
+  map_joinable_qualified_id[parent_doc_id].push_back(child1);
+  map_joinable_qualified_id[parent_doc_id].push_back(child2);
+
+  JoinChildrenFetcher fetcher(join_spec, std::move(map_joinable_qualified_id));
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<ScoredDocumentHit> children,
+                             fetcher.GetChildren(parent_doc_id));
+  EXPECT_THAT(children, ElementsAre(EqualsScoredDocumentHit(child1),
+                                    EqualsScoredDocumentHit(child2)));
+}
+
+TEST(JoinChildrenFetcherTest, FetchJoinEmptyChildren) {
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec.set_child_property_expression("sender");
+
+  DocumentId parent_doc_id = 0;
+
+  JoinChildrenFetcher fetcher(join_spec, /*map_joinable_qualified_id=*/{});
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<ScoredDocumentHit> children,
+                             fetcher.GetChildren(parent_doc_id));
+  EXPECT_THAT(children, IsEmpty());
+}
+
+TEST(JoinChildrenFetcherTest, UnsupportedJoin) {
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression("name");
+  join_spec.set_child_property_expression("sender");
+  JoinChildrenFetcher fetcher(join_spec, /*map_joinable_qualified_id=*/{});
+  EXPECT_THAT(fetcher.GetChildren(0),
+              StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/join-processor.cc b/icing/join/join-processor.cc
new file mode 100644
index 0000000..1b7ca0d
--- /dev/null
+++ b/icing/join/join-processor.cc
@@ -0,0 +1,270 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/join-processor.h"
+
+#include <algorithm>
+#include <memory>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/join/aggregation-scorer.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::StatusOr<JoinChildrenFetcher>
+JoinProcessor::GetChildrenFetcher(
+    const JoinSpecProto& join_spec,
+    std::vector<ScoredDocumentHit>&& child_scored_document_hits) {
+  if (join_spec.parent_property_expression() != kQualifiedIdExpr) {
+    // TODO(b/256022027): So far we only support kQualifiedIdExpr for
+    // parent_property_expression, we could support more.
+    return absl_ports::UnimplementedError(absl_ports::StrCat(
+        "Parent property expression must be ", kQualifiedIdExpr));
+  }
+
+  ScoredDocumentHitComparator score_comparator(
+      /*is_descending=*/join_spec.nested_spec().scoring_spec().order_by() ==
+      ScoringSpecProto::Order::DESC);
+
+  if (qualified_id_join_index_->is_v2()) {
+    // v2
+    // Step 1a: sort child ScoredDocumentHits in document id descending order.
+    std::sort(child_scored_document_hits.begin(),
+              child_scored_document_hits.end(),
+              [](const ScoredDocumentHit& lhs, const ScoredDocumentHit& rhs) {
+                return lhs.document_id() > rhs.document_id();
+              });
+
+    // Step 1b: group all child ScoredDocumentHits by the document's
+    //          schema_type_id.
+    std::unordered_map<SchemaTypeId, std::vector<ScoredDocumentHit>>
+        schema_to_child_scored_doc_hits_map;
+    for (const ScoredDocumentHit& child_scored_document_hit :
+         child_scored_document_hits) {
+      std::optional<DocumentFilterData> child_doc_filter_data =
+          doc_store_->GetAliveDocumentFilterData(
+              child_scored_document_hit.document_id(), current_time_ms_);
+      if (!child_doc_filter_data) {
+        continue;
+      }
+
+      schema_to_child_scored_doc_hits_map[child_doc_filter_data
+                                              ->schema_type_id()]
+          .push_back(child_scored_document_hit);
+    }
+
+    // Step 1c: for each schema_type_id, lookup QualifiedIdJoinIndexImplV2 to
+    //          fetch all child join data from posting list(s). Convert all
+    //          child join data to referenced parent document ids and bucketize
+    //          child ScoredDocumentHits by it.
+    std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
+        parent_to_child_docs_map;
+    for (auto& [schema_type_id, grouped_child_scored_doc_hits] :
+         schema_to_child_scored_doc_hits_map) {
+      // Get joinable_property_id of this schema.
+      ICING_ASSIGN_OR_RETURN(
+          const JoinablePropertyMetadata* metadata,
+          schema_store_->GetJoinablePropertyMetadata(
+              schema_type_id, join_spec.child_property_expression()));
+      if (metadata == nullptr ||
+          metadata->value_type != JoinableConfig::ValueType::QUALIFIED_ID) {
+        // Currently we only support qualified id, so skip other types.
+        continue;
+      }
+
+      // Lookup QualifiedIdJoinIndexImplV2.
+      ICING_ASSIGN_OR_RETURN(
+          std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase>
+              join_index_iter,
+          qualified_id_join_index_->GetIterator(
+              schema_type_id, /*joinable_property_id=*/metadata->id));
+
+      // - Join index contains all join data of schema_type_id and
+      //   join_index_iter will return all of them in (child) document id
+      //   descending order.
+      // - But we only need join data of child document ids which appear in
+      //   grouped_child_scored_doc_hits. Also grouped_child_scored_doc_hits
+      //   contain ScoredDocumentHits in (child) document id descending order.
+      // - Therefore, we advance 2 iterators to intersect them and get desired
+      //   join data.
+      auto child_scored_doc_hits_iter = grouped_child_scored_doc_hits.cbegin();
+      while (join_index_iter->Advance().ok() &&
+             child_scored_doc_hits_iter !=
+                 grouped_child_scored_doc_hits.cend()) {
+        // Advance child_scored_doc_hits_iter until it points to a
+        // ScoredDocumentHit with document id <= the one pointed by
+        // join_index_iter.
+        while (child_scored_doc_hits_iter !=
+                   grouped_child_scored_doc_hits.cend() &&
+               child_scored_doc_hits_iter->document_id() >
+                   join_index_iter->GetCurrent().document_id()) {
+          ++child_scored_doc_hits_iter;
+        }
+
+        if (child_scored_doc_hits_iter !=
+                grouped_child_scored_doc_hits.cend() &&
+            child_scored_doc_hits_iter->document_id() ==
+                join_index_iter->GetCurrent().document_id()) {
+          // We get a join data whose child document id exists in both join
+          // index and grouped_child_scored_doc_hits. Convert its join info to
+          // referenced parent document ids and bucketize ScoredDocumentHits by
+          // it (putting into parent_to_child_docs_map).
+          const NamespaceFingerprintIdentifier& ref_ns_id =
+              join_index_iter->GetCurrent().join_info();
+          libtextclassifier3::StatusOr<DocumentId> ref_parent_doc_id_or =
+              doc_store_->GetDocumentId(ref_ns_id);
+          if (ref_parent_doc_id_or.ok()) {
+            parent_to_child_docs_map[std::move(ref_parent_doc_id_or)
+                                         .ValueOrDie()]
+                .push_back(*child_scored_doc_hits_iter);
+          }
+        }
+      }
+    }
+
+    // Step 1d: finally, sort each parent's joined child ScoredDocumentHits by
+    //          score.
+    for (auto& [parent_doc_id, bucketized_child_scored_hits] :
+         parent_to_child_docs_map) {
+      std::sort(bucketized_child_scored_hits.begin(),
+                bucketized_child_scored_hits.end(), score_comparator);
+    }
+
+    return JoinChildrenFetcher(join_spec, std::move(parent_to_child_docs_map));
+  }
+
+  // v1
+  // TODO(b/275121148): deprecate this part after rollout v2.
+  std::sort(child_scored_document_hits.begin(),
+            child_scored_document_hits.end(), score_comparator);
+
+  // Step 1: group child documents by parent documentId. Currently we only
+  //         support QualifiedId joining, so fetch the qualified id content of
+  //         child_property_expression, break it down into namespace + uri, and
+  //         lookup the DocumentId.
+  // The keys of this map are the DocumentIds of the parent docs the child
+  // ScoredDocumentHits refer to. The values in this map are vectors of child
+  // ScoredDocumentHits that refer to a parent DocumentId.
+  std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
+      map_joinable_qualified_id;
+  for (const ScoredDocumentHit& child : child_scored_document_hits) {
+    ICING_ASSIGN_OR_RETURN(
+        DocumentId ref_doc_id,
+        FetchReferencedQualifiedId(child.document_id(),
+                                   join_spec.child_property_expression()));
+    if (ref_doc_id == kInvalidDocumentId) {
+      continue;
+    }
+
+    map_joinable_qualified_id[ref_doc_id].push_back(child);
+  }
+  return JoinChildrenFetcher(join_spec, std::move(map_joinable_qualified_id));
+}
+
+libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>>
+JoinProcessor::Join(
+    const JoinSpecProto& join_spec,
+    std::vector<ScoredDocumentHit>&& parent_scored_document_hits,
+    const JoinChildrenFetcher& join_children_fetcher) {
+  std::unique_ptr<AggregationScorer> aggregation_scorer =
+      AggregationScorer::Create(join_spec);
+
+  std::vector<JoinedScoredDocumentHit> joined_scored_document_hits;
+  joined_scored_document_hits.reserve(parent_scored_document_hits.size());
+
+  // Step 2: iterate through all parent documentIds and construct
+  //         JoinedScoredDocumentHit for each by looking up
+  //         join_children_fetcher.
+  for (ScoredDocumentHit& parent : parent_scored_document_hits) {
+    ICING_ASSIGN_OR_RETURN(
+        std::vector<ScoredDocumentHit> children,
+        join_children_fetcher.GetChildren(parent.document_id()));
+
+    double final_score = aggregation_scorer->GetScore(parent, children);
+    joined_scored_document_hits.emplace_back(final_score, std::move(parent),
+                                             std::move(children));
+  }
+
+  return joined_scored_document_hits;
+}
+
+libtextclassifier3::StatusOr<DocumentId>
+JoinProcessor::FetchReferencedQualifiedId(
+    const DocumentId& document_id, const std::string& property_path) const {
+  std::optional<DocumentFilterData> filter_data =
+      doc_store_->GetAliveDocumentFilterData(document_id, current_time_ms_);
+  if (!filter_data) {
+    return kInvalidDocumentId;
+  }
+
+  ICING_ASSIGN_OR_RETURN(const JoinablePropertyMetadata* metadata,
+                         schema_store_->GetJoinablePropertyMetadata(
+                             filter_data->schema_type_id(), property_path));
+  if (metadata == nullptr ||
+      metadata->value_type != JoinableConfig::ValueType::QUALIFIED_ID) {
+    // Currently we only support qualified id.
+    return kInvalidDocumentId;
+  }
+
+  DocJoinInfo info(document_id, metadata->id);
+  libtextclassifier3::StatusOr<std::string_view> ref_qualified_id_str_or =
+      qualified_id_join_index_->Get(info);
+  if (!ref_qualified_id_str_or.ok()) {
+    if (absl_ports::IsNotFound(ref_qualified_id_str_or.status())) {
+      return kInvalidDocumentId;
+    }
+    return std::move(ref_qualified_id_str_or).status();
+  }
+
+  libtextclassifier3::StatusOr<QualifiedId> ref_qualified_id_or =
+      QualifiedId::Parse(std::move(ref_qualified_id_str_or).ValueOrDie());
+  if (!ref_qualified_id_or.ok()) {
+    // This shouldn't happen because we've validated it during indexing and only
+    // put valid qualified id strings into qualified id join index.
+    return kInvalidDocumentId;
+  }
+  QualifiedId qualified_id = std::move(ref_qualified_id_or).ValueOrDie();
+
+  libtextclassifier3::StatusOr<DocumentId> ref_document_id_or =
+      doc_store_->GetDocumentId(qualified_id.name_space(), qualified_id.uri());
+  if (!ref_document_id_or.ok()) {
+    return kInvalidDocumentId;
+  }
+  return std::move(ref_document_id_or).ValueOrDie();
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/join-processor.h b/icing/join/join-processor.h
new file mode 100644
index 0000000..517e9db
--- /dev/null
+++ b/icing/join/join-processor.h
@@ -0,0 +1,88 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_JOIN_PROCESSOR_H_
+#define ICING_JOIN_JOIN_PROCESSOR_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+class JoinProcessor {
+ public:
+  static constexpr std::string_view kQualifiedIdExpr = "this.qualifiedId()";
+
+  explicit JoinProcessor(const DocumentStore* doc_store,
+                         const SchemaStore* schema_store,
+                         const QualifiedIdJoinIndex* qualified_id_join_index,
+                         int64_t current_time_ms)
+      : doc_store_(doc_store),
+        schema_store_(schema_store),
+        qualified_id_join_index_(qualified_id_join_index),
+        current_time_ms_(current_time_ms) {}
+
+  // Get a JoinChildrenFetcher used to fetch all children documents by a parent
+  // document id.
+  //
+  // Returns:
+  //   A JoinChildrenFetcher instance on success.
+  //   UNIMPLEMENTED_ERROR if the join type specified by join_spec is not
+  //   supported.
+  libtextclassifier3::StatusOr<JoinChildrenFetcher> GetChildrenFetcher(
+      const JoinSpecProto& join_spec,
+      std::vector<ScoredDocumentHit>&& child_scored_document_hits);
+
+  libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>> Join(
+      const JoinSpecProto& join_spec,
+      std::vector<ScoredDocumentHit>&& parent_scored_document_hits,
+      const JoinChildrenFetcher& join_children_fetcher);
+
+ private:
+  // Fetches referenced document id of the given document under the given
+  // property path.
+  //
+  // TODO(b/256022027): validate joinable property (and its upper-level) should
+  //                    not have REPEATED cardinality.
+  //
+  // Returns:
+  //   - A valid referenced document id on success
+  //   - kInvalidDocumentId if the given document is not found, doesn't have
+  //     qualified id joinable type for the given property_path, or doesn't have
+  //     joinable value (an optional property)
+  //   - Any other QualifiedIdJoinIndex errors
+  libtextclassifier3::StatusOr<DocumentId> FetchReferencedQualifiedId(
+      const DocumentId& document_id, const std::string& property_path) const;
+
+  const DocumentStore* doc_store_;  // Does not own.
+  const SchemaStore* schema_store_;  // Does not own.
+  const QualifiedIdJoinIndex* qualified_id_join_index_;  // Does not own.
+  int64_t current_time_ms_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_JOIN_PROCESSOR_H_
diff --git a/icing/join/join-processor_test.cc b/icing/join/join-processor_test.cc
new file mode 100644
index 0000000..a40d934
--- /dev/null
+++ b/icing/join/join-processor_test.cc
@@ -0,0 +1,930 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/join-processor.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
+#include "icing/join/qualified-id-join-index-impl-v2.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::IsTrue;
+
+// TODO(b/275121148): remove template after deprecating
+// QualifiedIdJoinIndexImplV1.
+template <typename T>
+class JoinProcessorTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/icing_join_processor_test";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()),
+                IsTrue());
+
+    schema_store_dir_ = test_dir_ + "/schema_store";
+    doc_store_dir_ = test_dir_ + "/doc_store";
+    qualified_id_join_index_dir_ = test_dir_ + "/qualified_id_join_index";
+
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    language_segmenter_factory::SegmenterOptions options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        lang_segmenter_,
+        language_segmenter_factory::Create(std::move(options)));
+
+    ASSERT_THAT(
+        filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+        IsTrue());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("Name")
+                    .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                    .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("Email")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("subject")
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("sender")
+                                     .SetDataTypeJoinableString(
+                                         JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("Message")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("content")
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("sender")
+                                     .SetDataTypeJoinableString(
+                                         JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("receiver")
+                                     .SetDataTypeJoinableString(
+                                         JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+
+            .Build();
+    ASSERT_THAT(schema_store_->SetSchema(
+                    schema, /*ignore_errors_and_delete_documents=*/false,
+                    /*allow_circular_schema_definitions=*/false),
+                IsOk());
+
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()),
+                IsTrue());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/true, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    doc_store_ = std::move(create_result.document_store);
+
+    ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+                               CreateQualifiedIdJoinIndex<T>());
+  }
+
+  void TearDown() override {
+    qualified_id_join_index_.reset();
+    doc_store_.reset();
+    schema_store_.reset();
+    lang_segmenter_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  template <typename UnknownJoinIndexType>
+  libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+  CreateQualifiedIdJoinIndex() {
+    return absl_ports::InvalidArgumentError("Unknown type");
+  }
+
+  template <>
+  libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+  CreateQualifiedIdJoinIndex<QualifiedIdJoinIndexImplV1>() {
+    return QualifiedIdJoinIndexImplV1::Create(
+        filesystem_, qualified_id_join_index_dir_, /*pre_mapping_fbv=*/false,
+        /*use_persistent_hash_map=*/false);
+  }
+
+  template <>
+  libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+  CreateQualifiedIdJoinIndex<QualifiedIdJoinIndexImplV2>() {
+    return QualifiedIdJoinIndexImplV2::Create(filesystem_,
+                                              qualified_id_join_index_dir_,
+                                              /*pre_mapping_fbv=*/false);
+  }
+
+  libtextclassifier3::StatusOr<DocumentId> PutAndIndexDocument(
+      const DocumentProto& document) {
+    ICING_ASSIGN_OR_RETURN(DocumentId document_id, doc_store_->Put(document));
+    ICING_ASSIGN_OR_RETURN(
+        TokenizedDocument tokenized_document,
+        TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                  document));
+
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+        QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                               qualified_id_join_index_.get()));
+    ICING_RETURN_IF_ERROR(handler->Handle(tokenized_document, document_id,
+                                          /*recovery_mode=*/false,
+                                          /*put_document_stats=*/nullptr));
+    return document_id;
+  }
+
+  libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>> Join(
+      const JoinSpecProto& join_spec,
+      std::vector<ScoredDocumentHit> parent_scored_document_hits,
+      std::vector<ScoredDocumentHit> child_scored_document_hits) {
+    JoinProcessor join_processor(
+        doc_store_.get(), schema_store_.get(), qualified_id_join_index_.get(),
+        /*current_time_ms=*/fake_clock_.GetSystemTimeMilliseconds());
+    ICING_ASSIGN_OR_RETURN(
+        JoinChildrenFetcher join_children_fetcher,
+        join_processor.GetChildrenFetcher(
+            join_spec, std::move(child_scored_document_hits)));
+    return join_processor.Join(join_spec,
+                               std::move(parent_scored_document_hits),
+                               join_children_fetcher);
+  }
+
+  Filesystem filesystem_;
+  std::string test_dir_;
+  std::string schema_store_dir_;
+  std::string doc_store_dir_;
+  std::string qualified_id_join_index_dir_;
+
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> doc_store_;
+  std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index_;
+
+  FakeClock fake_clock_;
+};
+
+using TestTypes =
+    ::testing::Types<QualifiedIdJoinIndexImplV1, QualifiedIdJoinIndexImplV2>;
+TYPED_TEST_SUITE(JoinProcessorTest, TestTypes);
+
+TYPED_TEST(JoinProcessorTest, JoinByQualifiedId_allDocuments) {
+  DocumentProto person1 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person1")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Alice")
+                              .Build();
+  DocumentProto person2 = DocumentBuilder()
+                              .SetKey(R"(pkg$db/name#space\\)", "person2")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Bob")
+                              .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+  DocumentProto email2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 2")
+          .AddStringProperty("sender",
+                             R"(pkg$db/name\#space\\\\#person2)")  // escaped
+          .Build();
+  DocumentProto email3 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email3")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 3")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             this->PutAndIndexDocument(person1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             this->PutAndIndexDocument(person2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             this->PutAndIndexDocument(email1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             this->PutAndIndexDocument(email2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             this->PutAndIndexDocument(email3));
+
+  ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit2(document_id2, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone,
+                                    /*score=*/3.0);
+  ScoredDocumentHit scored_doc_hit4(document_id4, kSectionIdMaskNone,
+                                    /*score=*/4.0);
+  ScoredDocumentHit scored_doc_hit5(document_id5, kSectionIdMaskNone,
+                                    /*score=*/5.0);
+
+  // Parent ScoredDocumentHits: all Person documents
+  std::vector<ScoredDocumentHit> parent_scored_document_hits = {
+      scored_doc_hit2, scored_doc_hit1};
+
+  // Child ScoredDocumentHits: all Email documents
+  std::vector<ScoredDocumentHit> child_scored_document_hits = {
+      scored_doc_hit5, scored_doc_hit4, scored_doc_hit3};
+
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec.set_child_property_expression("sender");
+  join_spec.set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
+      ScoringSpecProto::Order::DESC);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
+  EXPECT_THAT(
+      joined_result_document_hits,
+      ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/1.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit2,
+                      /*child_scored_document_hits=*/{scored_doc_hit4})),
+                  EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/2.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit1,
+                      /*child_scored_document_hits=*/
+                      {scored_doc_hit5, scored_doc_hit3}))));
+}
+
+TYPED_TEST(JoinProcessorTest, JoinByQualifiedId_partialDocuments) {
+  DocumentProto person1 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person1")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Alice")
+                              .Build();
+  DocumentProto person2 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person2")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Bob")
+                              .Build();
+  DocumentProto person3 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person3")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Eve")
+                              .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+  DocumentProto email2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 2")
+          .AddStringProperty("sender", "pkg$db/namespace#person2")
+          .Build();
+  DocumentProto email3 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email3")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 3")
+          .AddStringProperty("sender", "pkg$db/namespace#person3")
+          .Build();
+  DocumentProto email4 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email4")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 4")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             this->PutAndIndexDocument(person1));
+  ICING_ASSERT_OK(/*document_id2 unused*/
+                  this->PutAndIndexDocument(person2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             this->PutAndIndexDocument(person3));
+  ICING_ASSERT_OK(/*document_id4 unused*/
+                  this->PutAndIndexDocument(email1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             this->PutAndIndexDocument(email2));
+  ICING_ASSERT_OK(/*document_id6 unused*/
+                  this->PutAndIndexDocument(email3));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id7,
+                             this->PutAndIndexDocument(email4));
+
+  ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit5(document_id5, kSectionIdMaskNone,
+                                    /*score=*/4.0);
+  ScoredDocumentHit scored_doc_hit7(document_id7, kSectionIdMaskNone,
+                                    /*score=*/5.0);
+
+  // Only join person1, person3, email2 and email4.
+  // Parent ScoredDocumentHits: person1, person3
+  std::vector<ScoredDocumentHit> parent_scored_document_hits = {
+      scored_doc_hit3, scored_doc_hit1};
+
+  // Child ScoredDocumentHits: email2, email4
+  std::vector<ScoredDocumentHit> child_scored_document_hits = {scored_doc_hit7,
+                                                               scored_doc_hit5};
+
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec.set_child_property_expression("sender");
+  join_spec.set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
+      ScoringSpecProto::Order::DESC);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
+  EXPECT_THAT(
+      joined_result_document_hits,
+      ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/0.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit3,
+                      /*child_scored_document_hits=*/{})),
+                  EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/1.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit1,
+                      /*child_scored_document_hits=*/{scored_doc_hit7}))));
+}
+
+TYPED_TEST(JoinProcessorTest,
+           ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
+  DocumentProto person1 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person1")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Alice")
+                              .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+  DocumentProto email2 = DocumentBuilder()
+                             .SetKey("pkg$db/namespace", "email2")
+                             .SetSchema("Email")
+                             .AddStringProperty("subject", "test subject 2")
+                             .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             this->PutAndIndexDocument(person1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             this->PutAndIndexDocument(email1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             this->PutAndIndexDocument(email2));
+
+  ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit2(document_id2, kSectionIdMaskNone,
+                                    /*score=*/5.0);
+  ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone,
+                                    /*score=*/6.0);
+
+  // Parent ScoredDocumentHits: all Person documents
+  std::vector<ScoredDocumentHit> parent_scored_document_hits = {
+      scored_doc_hit1};
+
+  // Child ScoredDocumentHits: all Email documents
+  std::vector<ScoredDocumentHit> child_scored_document_hits = {scored_doc_hit2,
+                                                               scored_doc_hit3};
+
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec.set_child_property_expression("sender");
+  join_spec.set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
+      ScoringSpecProto::Order::DESC);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
+  // Since Email2 doesn't have "sender" property, it should be ignored.
+  EXPECT_THAT(
+      joined_result_document_hits,
+      ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+          /*final_score=*/1.0, /*parent_scored_document_hit=*/scored_doc_hit1,
+          /*child_scored_document_hits=*/{scored_doc_hit2}))));
+}
+
+TYPED_TEST(JoinProcessorTest,
+           ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
+  DocumentProto person1 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person1")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Alice")
+                              .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+  DocumentProto email2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 2")
+          .AddStringProperty(
+              "sender",
+              "pkg$db/namespace#person2")  // qualified id is invalid since
+                                           // person2 doesn't exist.
+          .Build();
+  DocumentProto email3 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email3")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 3")
+          .AddStringProperty("sender",
+                             R"(pkg$db/namespace\#person1)")  // invalid format
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             this->PutAndIndexDocument(person1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             this->PutAndIndexDocument(email1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             this->PutAndIndexDocument(email2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             this->PutAndIndexDocument(email3));
+
+  ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit2(document_id2, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit4(document_id4, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+
+  // Parent ScoredDocumentHits: all Person documents
+  std::vector<ScoredDocumentHit> parent_scored_document_hits = {
+      scored_doc_hit1};
+
+  // Child ScoredDocumentHits: all Email documents
+  std::vector<ScoredDocumentHit> child_scored_document_hits = {
+      scored_doc_hit2, scored_doc_hit3, scored_doc_hit4};
+
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec.set_child_property_expression("sender");
+  join_spec.set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
+      ScoringSpecProto::Order::DESC);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
+  // Email 2 and email 3 (document id 3 and 4) contain invalid qualified ids.
+  // Join processor should ignore them.
+  EXPECT_THAT(joined_result_document_hits,
+              ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                  /*final_score=*/1.0,
+                  /*parent_scored_document_hit=*/scored_doc_hit1,
+                  /*child_scored_document_hits=*/{scored_doc_hit2}))));
+}
+
+TYPED_TEST(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
+  DocumentProto person1 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person1")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Alice")
+                              .Build();
+  DocumentProto person2 = DocumentBuilder()
+                              .SetKey(R"(pkg$db/name#space\\)", "person2")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Bob")
+                              .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("sender",
+                             R"(pkg$db/name\#space\\\\#person2)")  // escaped
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             this->PutAndIndexDocument(person1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             this->PutAndIndexDocument(person2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             this->PutAndIndexDocument(email1));
+
+  ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit2(document_id2, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone,
+                                    /*score=*/3.0);
+
+  // Parent ScoredDocumentHits: all Person documents
+  std::vector<ScoredDocumentHit> parent_scored_document_hits = {
+      scored_doc_hit2, scored_doc_hit1};
+
+  // Child ScoredDocumentHits: all Email documents
+  std::vector<ScoredDocumentHit> child_scored_document_hits = {scored_doc_hit3};
+
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec.set_child_property_expression("sender");
+  join_spec.set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
+      ScoringSpecProto::Order::DESC);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
+  // Person1 has no child documents, but left join should also include it.
+  EXPECT_THAT(
+      joined_result_document_hits,
+      ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/1.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit2,
+                      /*child_scored_document_hits=*/{scored_doc_hit3})),
+                  EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/0.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit1,
+                      /*child_scored_document_hits=*/{}))));
+}
+
+TYPED_TEST(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
+  DocumentProto person1 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person1")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Alice")
+                              .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+  DocumentProto email2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 2")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+  DocumentProto email3 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email3")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 3")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             this->PutAndIndexDocument(person1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             this->PutAndIndexDocument(email1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             this->PutAndIndexDocument(email2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             this->PutAndIndexDocument(email3));
+
+  ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit2(document_id2, kSectionIdMaskNone,
+                                    /*score=*/2.0);
+  ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone,
+                                    /*score=*/5.0);
+  ScoredDocumentHit scored_doc_hit4(document_id4, kSectionIdMaskNone,
+                                    /*score=*/3.0);
+
+  // Parent ScoredDocumentHits: all Person documents
+  std::vector<ScoredDocumentHit> parent_scored_document_hits = {
+      scored_doc_hit1};
+
+  // Child ScoredDocumentHits: all Email documents
+  std::vector<ScoredDocumentHit> child_scored_document_hits = {
+      scored_doc_hit2, scored_doc_hit3, scored_doc_hit4};
+
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec.set_child_property_expression("sender");
+  join_spec.set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
+      ScoringSpecProto::Order::DESC);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
+  // Child documents should be sorted according to the (nested) ranking
+  // strategy.
+  EXPECT_THAT(
+      joined_result_document_hits,
+      ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+          /*final_score=*/3.0, /*parent_scored_document_hit=*/scored_doc_hit1,
+          /*child_scored_document_hits=*/
+          {scored_doc_hit3, scored_doc_hit4, scored_doc_hit2}))));
+}
+
+TYPED_TEST(JoinProcessorTest, ShouldAllowSelfJoining) {
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("sender", "pkg$db/namespace#email1")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             this->PutAndIndexDocument(email1));
+
+  ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+
+  // Parent ScoredDocumentHits: all Person documents
+  std::vector<ScoredDocumentHit> parent_scored_document_hits = {
+      scored_doc_hit1};
+
+  // Child ScoredDocumentHits: all Email documents
+  std::vector<ScoredDocumentHit> child_scored_document_hits = {scored_doc_hit1};
+
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec.set_child_property_expression("sender");
+  join_spec.set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
+      ScoringSpecProto::Order::DESC);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
+  EXPECT_THAT(joined_result_document_hits,
+              ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                  /*final_score=*/1.0,
+                  /*parent_scored_document_hit=*/scored_doc_hit1,
+                  /*child_scored_document_hits=*/{scored_doc_hit1}))));
+}
+
+TYPED_TEST(JoinProcessorTest, MultipleChildSchemasJoining) {
+  DocumentProto person1 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person1")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Alice")
+                              .Build();
+  DocumentProto person2 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person2")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Bob")
+                              .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("sender", "pkg$db/namespace#person2")
+          .Build();
+  DocumentProto email2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 2")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+  DocumentProto email3 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email3")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 3")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+  DocumentProto message1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "message1")
+          .SetSchema("Message")
+          .AddStringProperty("content", "test content 1")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .AddStringProperty("receiver", "pkg$db/namespace#person2")
+          .Build();
+  DocumentProto message2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "message2")
+          .SetSchema("Message")
+          .AddStringProperty("content", "test content 2")
+          .AddStringProperty("sender", "pkg$db/namespace#person2")
+          .AddStringProperty("receiver", "pkg$db/namespace#person1")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             this->PutAndIndexDocument(person1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             this->PutAndIndexDocument(person2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             this->PutAndIndexDocument(email1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             this->PutAndIndexDocument(email2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             this->PutAndIndexDocument(email3));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6,
+                             this->PutAndIndexDocument(message1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id7,
+                             this->PutAndIndexDocument(message2));
+
+  ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit2(document_id2, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone,
+                                    /*score=*/5.0);
+  ScoredDocumentHit scored_doc_hit4(document_id4, kSectionIdMaskNone,
+                                    /*score=*/3.0);
+  ScoredDocumentHit scored_doc_hit5(document_id5, kSectionIdMaskNone,
+                                    /*score=*/2.0);
+  ScoredDocumentHit scored_doc_hit6(document_id6, kSectionIdMaskNone,
+                                    /*score=*/4.0);
+  ScoredDocumentHit scored_doc_hit7(document_id7, kSectionIdMaskNone,
+                                    /*score=*/1.0);
+
+  // Parent ScoredDocumentHits: all Person documents
+  std::vector<ScoredDocumentHit> parent_scored_document_hits = {
+      scored_doc_hit1, scored_doc_hit2};
+
+  // Child ScoredDocumentHits: all Email and Message documents
+  std::vector<ScoredDocumentHit> child_scored_document_hits = {
+      scored_doc_hit3, scored_doc_hit4, scored_doc_hit5, scored_doc_hit6,
+      scored_doc_hit7};
+
+  // Join by "sender".
+  // - Person1: [
+  //     email2 (scored_doc_hit4),
+  //     email3 (scored_doc_hit5),
+  //     message1 (scored_doc_hit6),
+  //   ]
+  // - Person2: [
+  //     email1 (scored_doc_hit3),
+  //     message2 (scored_doc_hit7),
+  //   ]
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec.set_child_property_expression("sender");
+  join_spec.set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
+      ScoringSpecProto::Order::DESC);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<JoinedScoredDocumentHit> joined_result_document_hits1,
+      this->Join(join_spec, parent_scored_document_hits,
+                 child_scored_document_hits));
+  EXPECT_THAT(
+      joined_result_document_hits1,
+      ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/3.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit1,
+                      /*child_scored_document_hits=*/
+                      {scored_doc_hit6, scored_doc_hit4, scored_doc_hit5})),
+                  EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/2.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit2,
+                      /*child_scored_document_hits=*/
+                      {scored_doc_hit3, scored_doc_hit7}))));
+
+  // Join by "receiver".
+  // - Person1: [
+  //     message2 (scored_doc_hit7),
+  //   ]
+  // - Person2: [
+  //     message1 (scored_doc_hit6),
+  //   ]
+  join_spec.set_child_property_expression("receiver");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<JoinedScoredDocumentHit> joined_result_document_hits2,
+      this->Join(join_spec, parent_scored_document_hits,
+                 child_scored_document_hits));
+  EXPECT_THAT(
+      joined_result_document_hits2,
+      ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/1.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit1,
+                      /*child_scored_document_hits=*/{scored_doc_hit7})),
+                  EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/1.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit2,
+                      /*child_scored_document_hits=*/{scored_doc_hit6}))));
+}
+
+// TODO(b/256022027): add unit tests for non-joinable property. If joinable
+//                    value type is unset, then qualifed id join should not
+//                    include the child document even if it contains a valid
+//                    qualified id string.
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/posting-list-join-data-accessor.h b/icing/join/posting-list-join-data-accessor.h
new file mode 100644
index 0000000..6669f9f
--- /dev/null
+++ b/icing/join/posting-list-join-data-accessor.h
@@ -0,0 +1,211 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_POSTING_LIST_JOIN_DATA_ACCESSOR_H_
+#define ICING_JOIN_POSTING_LIST_JOIN_DATA_ACCESSOR_H_
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/index-block.h"
+#include "icing/file/posting_list/posting-list-accessor.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/join/posting-list-join-data-serializer.h"
+#include "icing/legacy/index/icing-bit-util.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// This class is used to provide a simple abstraction for adding join data to
+// posting lists. PostingListJoinDataAccessor handles:
+// 1) selection of properly-sized posting lists for the accumulated join index
+//    data during Finalize()
+// 2) chaining of max-sized posting lists.
+template <typename JoinDataType>
+class PostingListJoinDataAccessor : public PostingListAccessor {
+ public:
+  // Creates an empty PostingListJoinDataAccessor.
+  //
+  // RETURNS:
+  //   - On success, a valid instance of PostingListJoinDataAccessor
+  //   - INVALID_ARGUMENT error if storage has an invalid block_size.
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
+  Create(FlashIndexStorage* storage,
+         PostingListJoinDataSerializer<JoinDataType>* serializer);
+
+  // Creates a PostingListJoinDataAccessor with an existing posting list
+  // identified by existing_posting_list_id.
+  //
+  // RETURNS:
+  //   - On success, a valid instance of PostingListJoinDataAccessor
+  //   - INVALID_ARGUMENT if storage has an invalid block_size.
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
+  CreateFromExisting(FlashIndexStorage* storage,
+                     PostingListJoinDataSerializer<JoinDataType>* serializer,
+                     PostingListIdentifier existing_posting_list_id);
+
+  PostingListSerializer* GetSerializer() override { return serializer_; }
+
+  // Retrieves the next batch of data in the posting list chain.
+  //
+  // RETURNS:
+  //   - On success, a vector of join data in the posting list chain
+  //   - FAILED_PRECONDITION_ERROR if called on an instance that was created via
+  //     Create.
+  //   - INTERNAL_ERROR if unable to read the next posting list in the chain or
+  //     if the posting list has been corrupted somehow.
+  libtextclassifier3::StatusOr<std::vector<JoinDataType>> GetNextDataBatch();
+
+  // Prepends one data. This may result in flushing the posting list to disk (if
+  // the PostingListJoinDataAccessor holds a max-sized posting list that is
+  // full) or freeing a pre-existing posting list if it is too small to fit all
+  // data necessary.
+  //
+  // RETURNS:
+  //   - OK, on success
+  //   - INVALID_ARGUMENT if !data.is_valid() or if data is greater than the
+  //     previously added data.
+  //   - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
+  //     posting list.
+  libtextclassifier3::Status PrependData(const JoinDataType& data);
+
+ private:
+  explicit PostingListJoinDataAccessor(
+      FlashIndexStorage* storage, PostingListUsed in_memory_posting_list,
+      PostingListJoinDataSerializer<JoinDataType>* serializer)
+      : PostingListAccessor(storage, std::move(in_memory_posting_list)),
+        serializer_(serializer) {}
+
+  PostingListJoinDataSerializer<JoinDataType>* serializer_;  // Does not own.
+};
+
+template <typename JoinDataType>
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
+PostingListJoinDataAccessor<JoinDataType>::Create(
+    FlashIndexStorage* storage,
+    PostingListJoinDataSerializer<JoinDataType>* serializer) {
+  uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
+      storage->block_size(), serializer->GetDataTypeBytes());
+  ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
+                         PostingListUsed::CreateFromUnitializedRegion(
+                             serializer, max_posting_list_bytes));
+  return std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>(
+      new PostingListJoinDataAccessor<JoinDataType>(
+          storage, std::move(in_memory_posting_list), serializer));
+}
+
+template <typename JoinDataType>
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
+PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+    FlashIndexStorage* storage,
+    PostingListJoinDataSerializer<JoinDataType>* serializer,
+    PostingListIdentifier existing_posting_list_id) {
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      Create(storage, serializer));
+  ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
+                         storage->GetPostingList(existing_posting_list_id));
+  pl_accessor->preexisting_posting_list_ =
+      std::make_unique<PostingListHolder>(std::move(holder));
+  return pl_accessor;
+}
+
+// Returns the next batch of join data for the provided posting list.
+template <typename JoinDataType>
+libtextclassifier3::StatusOr<std::vector<JoinDataType>>
+PostingListJoinDataAccessor<JoinDataType>::GetNextDataBatch() {
+  if (preexisting_posting_list_ == nullptr) {
+    if (has_reached_posting_list_chain_end_) {
+      return std::vector<JoinDataType>();
+    }
+    return absl_ports::FailedPreconditionError(
+        "Cannot retrieve data from a PostingListJoinDataAccessor that was not "
+        "created from a preexisting posting list.");
+  }
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<JoinDataType> batch,
+      serializer_->GetData(&preexisting_posting_list_->posting_list));
+  uint32_t next_block_index = kInvalidBlockIndex;
+  // Posting lists will only be chained when they are max-sized, in which case
+  // next_block_index will point to the next block for the next posting list.
+  // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
+  // to the next free list block, which is not relevant here.
+  if (preexisting_posting_list_->posting_list.size_in_bytes() ==
+      storage_->max_posting_list_bytes()) {
+    next_block_index = preexisting_posting_list_->next_block_index;
+  }
+
+  if (next_block_index != kInvalidBlockIndex) {
+    // Since we only have to deal with next block for max-sized posting list
+    // block, max_num_posting_lists is 1 and posting_list_index_bits is
+    // BitsToStore(1).
+    PostingListIdentifier next_posting_list_id(
+        next_block_index, /*posting_list_index=*/0,
+        /*posting_list_index_bits=*/BitsToStore(1));
+    ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
+                           storage_->GetPostingList(next_posting_list_id));
+    preexisting_posting_list_ =
+        std::make_unique<PostingListHolder>(std::move(holder));
+  } else {
+    has_reached_posting_list_chain_end_ = true;
+    preexisting_posting_list_.reset();
+  }
+  return batch;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataAccessor<JoinDataType>::PrependData(
+    const JoinDataType& data) {
+  PostingListUsed& active_pl = (preexisting_posting_list_ != nullptr)
+                                   ? preexisting_posting_list_->posting_list
+                                   : in_memory_posting_list_;
+  libtextclassifier3::Status status =
+      serializer_->PrependData(&active_pl, data);
+  if (!absl_ports::IsResourceExhausted(status)) {
+    return status;
+  }
+  // There is no more room to add data to this current posting list! Therefore,
+  // we need to either move those data to a larger posting list or flush this
+  // posting list and create another max-sized posting list in the chain.
+  if (preexisting_posting_list_ != nullptr) {
+    ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
+  } else {
+    ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
+  }
+
+  // Re-add data. Should always fit since we just cleared
+  // in_memory_posting_list_. It's fine to explicitly reference
+  // in_memory_posting_list_ here because there's no way of reaching this line
+  // while preexisting_posting_list_ is still in use.
+  return serializer_->PrependData(&in_memory_posting_list_, data);
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_POSTING_LIST_JOIN_DATA_ACCESSOR_H_
diff --git a/icing/join/posting-list-join-data-accessor_test.cc b/icing/join/posting-list-join-data-accessor_test.cc
new file mode 100644
index 0000000..ddc2d32
--- /dev/null
+++ b/icing/join/posting-list-join-data-accessor_test.cc
@@ -0,0 +1,435 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/posting-list-join-data-accessor.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-accessor.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/posting-list-join-data-serializer.h"
+#include "icing/store/document-id.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::Lt;
+using ::testing::Ne;
+using ::testing::SizeIs;
+
+using JoinDataType = DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>;
+
+static constexpr NamespaceId kDefaultNamespaceId = 1;
+
+class PostingListJoinDataAccessorTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/test_dir";
+    file_name_ = test_dir_ + "/test_file.idx.index";
+
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
+
+    serializer_ =
+        std::make_unique<PostingListJoinDataSerializer<JoinDataType>>();
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FlashIndexStorage flash_index_storage,
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+    flash_index_storage_ =
+        std::make_unique<FlashIndexStorage>(std::move(flash_index_storage));
+  }
+
+  void TearDown() override {
+    flash_index_storage_.reset();
+    serializer_.reset();
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+  }
+
+  Filesystem filesystem_;
+  std::string test_dir_;
+  std::string file_name_;
+  std::unique_ptr<PostingListJoinDataSerializer<JoinDataType>> serializer_;
+  std::unique_ptr<FlashIndexStorage> flash_index_storage_;
+};
+
+std::vector<JoinDataType> CreateData(int num_data, DocumentId start_document_id,
+                                     NamespaceId ref_namespace_id,
+                                     uint64_t start_ref_hash_uri) {
+  std::vector<JoinDataType> data;
+  data.reserve(num_data);
+  for (int i = 0; i < num_data; ++i) {
+    data.push_back(JoinDataType(
+        start_document_id,
+        NamespaceFingerprintIdentifier(ref_namespace_id,
+                                       /*fingerprint=*/start_ref_hash_uri)));
+
+    ++start_document_id;
+    ++start_ref_hash_uri;
+  }
+  return data;
+}
+
+TEST_F(PostingListJoinDataAccessorTest, DataAddAndRetrieveProperly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  // Add some join data
+  std::vector<JoinDataType> data_vec =
+      CreateData(/*num_data=*/5, /*start_document_id=*/0,
+                 /*ref_namespace_id=*/kDefaultNamespaceId,
+                 /*start_ref_hash_uri=*/819);
+  for (const JoinDataType& data : data_vec) {
+    EXPECT_THAT(pl_accessor->PrependData(data), IsOk());
+  }
+  PostingListAccessor::FinalizeResult result =
+      std::move(*pl_accessor).Finalize();
+  EXPECT_THAT(result.status, IsOk());
+  EXPECT_THAT(result.id.block_index(), Eq(1));
+  EXPECT_THAT(result.id.posting_list_index(), Eq(0));
+
+  // Retrieve some data.
+  ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+                             flash_index_storage_->GetPostingList(result.id));
+  EXPECT_THAT(
+      serializer_->GetData(&pl_holder.posting_list),
+      IsOkAndHolds(ElementsAreArray(data_vec.rbegin(), data_vec.rend())));
+  EXPECT_THAT(pl_holder.next_block_index, Eq(kInvalidBlockIndex));
+}
+
+TEST_F(PostingListJoinDataAccessorTest, PreexistingPLKeepOnSameBlock) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  // Add a single data. This will fit in a min-sized posting list.
+  JoinDataType data1(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/123));
+  ICING_ASSERT_OK(pl_accessor->PrependData(data1));
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result1.status);
+  // Should be allocated to the first block.
+  ASSERT_THAT(result1.id.block_index(), Eq(1));
+  ASSERT_THAT(result1.id.posting_list_index(), Eq(0));
+
+  // Add one more data. The minimum size for a posting list must be able to fit
+  // two data, so this should NOT cause the previous pl to be reallocated.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  JoinDataType data2(
+      /*document_id=*/2,
+      NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/456));
+  ICING_ASSERT_OK(pl_accessor->PrependData(data2));
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result2.status);
+  // Should be in the same posting list.
+  EXPECT_THAT(result2.id, Eq(result1.id));
+
+  // The posting list at result2.id should hold all of the data that have been
+  // added.
+  ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+                             flash_index_storage_->GetPostingList(result2.id));
+  EXPECT_THAT(serializer_->GetData(&pl_holder.posting_list),
+              IsOkAndHolds(ElementsAre(data2, data1)));
+}
+
+TEST_F(PostingListJoinDataAccessorTest, PreexistingPLReallocateToLargerPL) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  // Adding 3 data should cause Finalize allocating a 56-byte posting list,
+  // which can store at most 4 data.
+  std::vector<JoinDataType> data_vec1 =
+      CreateData(/*num_data=*/3, /*start_document_id=*/0,
+                 /*ref_namespace_id=*/kDefaultNamespaceId,
+                 /*start_ref_hash_uri=*/819);
+  for (const JoinDataType& data : data_vec1) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result1.status);
+  // Should be allocated to the first block.
+  ASSERT_THAT(result1.id.block_index(), Eq(1));
+  ASSERT_THAT(result1.id.posting_list_index(), Eq(0));
+
+  // Now add more data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  // The current posting list can fit 1 more data. Adding 12 more data should
+  // result in these data being moved to a larger posting list. Also the total
+  // size of these data won't exceed max size posting list, so there will be
+  // only one single posting list and no chain.
+  std::vector<JoinDataType> data_vec2 = CreateData(
+      /*num_data=*/12, /*start_document_id=*/data_vec1.back().document_id() + 1,
+      /*ref_namespace_id=*/kDefaultNamespaceId, /*start_ref_hash_uri=*/819);
+
+  for (const JoinDataType& data : data_vec2) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result2.status);
+  // Should be allocated to the second (new) block because the posting list
+  // should grow beyond the size that the first block maintains.
+  EXPECT_THAT(result2.id.block_index(), Eq(2));
+  EXPECT_THAT(result2.id.posting_list_index(), Eq(0));
+
+  // The posting list at result2.id should hold all of the data that have been
+  // added.
+  std::vector<JoinDataType> all_data_vec;
+  all_data_vec.reserve(data_vec1.size() + data_vec2.size());
+  all_data_vec.insert(all_data_vec.end(), data_vec1.begin(), data_vec1.end());
+  all_data_vec.insert(all_data_vec.end(), data_vec2.begin(), data_vec2.end());
+  ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+                             flash_index_storage_->GetPostingList(result2.id));
+  EXPECT_THAT(serializer_->GetData(&pl_holder.posting_list),
+              IsOkAndHolds(ElementsAreArray(all_data_vec.rbegin(),
+                                            all_data_vec.rend())));
+}
+
+TEST_F(PostingListJoinDataAccessorTest, MultiBlockChainsBlocksProperly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  // Block size is 4096, sizeof(BlockHeader) is 12 and sizeof(JoinDataType)
+  // is 14, so the max size posting list can store (4096 - 12) / 14 = 291 data.
+  // Adding 292 data should cause:
+  // - 2 max size posting lists being allocated to block 1 and block 2.
+  // - Chaining: block 2 -> block 1
+  std::vector<JoinDataType> data_vec = CreateData(
+      /*num_data=*/292, /*start_document_id=*/0,
+      /*ref_namespace_id=*/kDefaultNamespaceId, /*start_ref_hash_uri=*/819);
+  for (const JoinDataType& data : data_vec) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result1.status);
+  PostingListIdentifier second_block_id = result1.id;
+  // Should be allocated to the second block.
+  EXPECT_THAT(second_block_id, Eq(PostingListIdentifier(
+                                   /*block_index=*/2, /*posting_list_index=*/0,
+                                   /*posting_list_index_bits=*/0)));
+
+  // We should be able to retrieve all data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListHolder pl_holder,
+      flash_index_storage_->GetPostingList(second_block_id));
+  // This pl_holder will only hold a posting list with the data that didn't fit
+  // on the first block.
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<JoinDataType> second_block_data,
+                             serializer_->GetData(&pl_holder.posting_list));
+  ASSERT_THAT(second_block_data, SizeIs(Lt(data_vec.size())));
+  auto first_block_data_start = data_vec.rbegin() + second_block_data.size();
+  EXPECT_THAT(second_block_data,
+              ElementsAreArray(data_vec.rbegin(), first_block_data_start));
+
+  // Now retrieve all of the data that were on the first block.
+  uint32_t first_block_id = pl_holder.next_block_index;
+  EXPECT_THAT(first_block_id, Eq(1));
+
+  PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
+                              /*posting_list_index_bits=*/0);
+  ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
+                             flash_index_storage_->GetPostingList(pl_id));
+  EXPECT_THAT(
+      serializer_->GetData(&pl_holder.posting_list),
+      IsOkAndHolds(ElementsAreArray(first_block_data_start, data_vec.rend())));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+       PreexistingMultiBlockReusesBlocksProperly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  // Block size is 4096, sizeof(BlockHeader) is 12 and sizeof(JoinDataType)
+  // is 14, so the max size posting list can store (4096 - 12) / 14 = 291 data.
+  // Adding 292 data will cause:
+  // - 2 max size posting lists being allocated to block 1 and block 2.
+  // - Chaining: block 2 -> block 1
+  std::vector<JoinDataType> data_vec1 = CreateData(
+      /*num_data=*/292, /*start_document_id=*/0,
+      /*ref_namespace_id=*/kDefaultNamespaceId, /*start_ref_hash_uri=*/819);
+  for (const JoinDataType& data : data_vec1) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result1.status);
+  PostingListIdentifier first_add_id = result1.id;
+  EXPECT_THAT(first_add_id, Eq(PostingListIdentifier(
+                                /*block_index=*/2, /*posting_list_index=*/0,
+                                /*posting_list_index_bits=*/0)));
+
+  // Now add more data. These should fit on the existing second block and not
+  // fill it up.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), first_add_id));
+  std::vector<JoinDataType> data_vec2 = CreateData(
+      /*num_data=*/10, /*start_document_id=*/data_vec1.back().document_id() + 1,
+      /*ref_namespace_id=*/kDefaultNamespaceId, /*start_ref_hash_uri=*/819);
+  for (const JoinDataType& data : data_vec2) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result2.status);
+  PostingListIdentifier second_add_id = result2.id;
+  EXPECT_THAT(second_add_id, Eq(first_add_id));
+
+  // We should be able to retrieve all data.
+  std::vector<JoinDataType> all_data_vec;
+  all_data_vec.reserve(data_vec1.size() + data_vec2.size());
+  all_data_vec.insert(all_data_vec.end(), data_vec1.begin(), data_vec1.end());
+  all_data_vec.insert(all_data_vec.end(), data_vec2.begin(), data_vec2.end());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListHolder pl_holder,
+      flash_index_storage_->GetPostingList(second_add_id));
+  // This pl_holder will only hold a posting list with the data that didn't fit
+  // on the first block.
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<JoinDataType> second_block_data,
+                             serializer_->GetData(&pl_holder.posting_list));
+  ASSERT_THAT(second_block_data, SizeIs(Lt(all_data_vec.size())));
+  auto first_block_data_start =
+      all_data_vec.rbegin() + second_block_data.size();
+  EXPECT_THAT(second_block_data,
+              ElementsAreArray(all_data_vec.rbegin(), first_block_data_start));
+
+  // Now retrieve all of the data that were on the first block.
+  uint32_t first_block_id = pl_holder.next_block_index;
+  EXPECT_THAT(first_block_id, Eq(1));
+
+  PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
+                              /*posting_list_index_bits=*/0);
+  ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
+                             flash_index_storage_->GetPostingList(pl_id));
+  EXPECT_THAT(serializer_->GetData(&pl_holder.posting_list),
+              IsOkAndHolds(ElementsAreArray(first_block_data_start,
+                                            all_data_vec.rend())));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+       InvalidDataShouldReturnInvalidArgument) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  JoinDataType invalid_data = JoinDataType::GetInvalid();
+  EXPECT_THAT(pl_accessor->PrependData(invalid_data),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+       JoinDataNonIncreasingShouldReturnInvalidArgument) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  JoinDataType data1(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/819));
+  ICING_ASSERT_OK(pl_accessor->PrependData(data1));
+
+  JoinDataType data2(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/818));
+  EXPECT_THAT(pl_accessor->PrependData(data2),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  JoinDataType data3(/*document_id=*/1,
+                     NamespaceFingerprintIdentifier(kDefaultNamespaceId - 1,
+                                                    /*fingerprint=*/820));
+  EXPECT_THAT(pl_accessor->PrependData(data3),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  JoinDataType data4(/*document_id=*/0,
+                     NamespaceFingerprintIdentifier(kDefaultNamespaceId + 1,
+                                                    /*fingerprint=*/820));
+  EXPECT_THAT(pl_accessor->PrependData(data4),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+       NewPostingListNoDataAddedShouldReturnInvalidArgument) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  PostingListAccessor::FinalizeResult result =
+      std::move(*pl_accessor).Finalize();
+  EXPECT_THAT(result.status,
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+       PreexistingPostingListNoDataAddedShouldSucceed) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor1,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  JoinDataType data1(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/819));
+  ICING_ASSERT_OK(pl_accessor1->PrependData(data1));
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor1).Finalize();
+  ICING_ASSERT_OK(result1.status);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor2,
+      PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor2).Finalize();
+  EXPECT_THAT(result2.status, IsOk());
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/posting-list-join-data-serializer.h b/icing/join/posting-list-join-data-serializer.h
new file mode 100644
index 0000000..9f39dca
--- /dev/null
+++ b/icing/join/posting-list-join-data-serializer.h
@@ -0,0 +1,803 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_POSTING_LIST_JOIN_DATA_SERIALIZER_H_
+#define ICING_JOIN_POSTING_LIST_JOIN_DATA_SERIALIZER_H_
+
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// A serializer class to serialize JoinDataType to PostingListUsed. Usually
+// JoinDataType is DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>,
+// DocumentIdToJoinInfo<TermId>, or DocumentIdToJoinInfo<int64_t>.
+//
+// REQUIRES:
+// - JoinDataType is comparable by operator <.
+// - JoinDataType implements is_valid() method.
+// - JoinDataType has static method GetInvalid() that returns a JoinDataType
+//   instance containing invalid data.
+template <typename JoinDataType>
+class PostingListJoinDataSerializer : public PostingListSerializer {
+ public:
+  using SpecialDataType = SpecialData<JoinDataType>;
+  static_assert(sizeof(SpecialDataType) == sizeof(JoinDataType), "");
+
+  static constexpr uint32_t kSpecialDataSize =
+      kNumSpecialData * sizeof(SpecialDataType);
+
+  uint32_t GetDataTypeBytes() const override { return sizeof(JoinDataType); }
+
+  uint32_t GetMinPostingListSize() const override {
+    static constexpr uint32_t kMinPostingListSize = kSpecialDataSize;
+    static_assert(sizeof(PostingListIndex) <= kMinPostingListSize,
+                  "PostingListIndex must be small enough to fit in a "
+                  "minimum-sized Posting List.");
+
+    return kMinPostingListSize;
+  }
+
+  uint32_t GetMinPostingListSizeToFit(
+      const PostingListUsed* posting_list_used) const override;
+
+  uint32_t GetBytesUsed(
+      const PostingListUsed* posting_list_used) const override;
+
+  void Clear(PostingListUsed* posting_list_used) const override;
+
+  libtextclassifier3::Status MoveFrom(PostingListUsed* dst,
+                                      PostingListUsed* src) const override;
+
+  // Prepend a JoinData to the posting list.
+  //
+  // RETURNS:
+  //   - INVALID_ARGUMENT if !data.is_valid() or if data is not greater than the
+  //     previously added data.
+  //   - RESOURCE_EXHAUSTED if there is no more room to add data to the posting
+  //     list.
+  libtextclassifier3::Status PrependData(PostingListUsed* posting_list_used,
+                                         const JoinDataType& data) const;
+
+  // Prepend multiple JoinData to the posting list.
+  // Data should be sorted in ascending order (as defined by the less than
+  // operator for JoinData)
+  // If keep_prepended is true, whatever could be prepended is kept, otherwise
+  // the posting list is reverted and left in its original state.
+  //
+  // RETURNS:
+  //   The number of data that have been prepended to the posting list. If
+  //   keep_prepended is false and reverted, then it returns 0.
+  libtextclassifier3::StatusOr<uint32_t> PrependDataArray(
+      PostingListUsed* posting_list_used, const JoinDataType* array,
+      uint32_t num_data, bool keep_prepended) const;
+
+  // Retrieves all data stored in the posting list.
+  //
+  // RETURNS:
+  //   - On success, a vector of JoinDataType sorted by the reverse order of
+  //     prepending.
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::StatusOr<std::vector<JoinDataType>> GetData(
+      const PostingListUsed* posting_list_used) const;
+
+  // Same as GetData but appends data to data_arr_out.
+  //
+  // RETURNS:
+  //   - OK on success, and data_arr_out will be appended JoinDataType sorted by
+  //     the reverse order of prepending.
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::Status GetData(
+      const PostingListUsed* posting_list_used,
+      std::vector<JoinDataType>* data_arr_out) const;
+
+  // Undo the last num_data data prepended. If num_data > number of data, then
+  // we clear all data.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::Status PopFrontData(PostingListUsed* posting_list_used,
+                                          uint32_t num_data) const;
+
+  // Helper function to determine if posting list is full.
+  bool IsFull(const PostingListUsed* posting_list_used) const {
+    return GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
+           GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+  }
+
+ private:
+  // In PostingListJoinDataSerializer, there is no compression, but we still use
+  // the traditional posting list implementation.
+  //
+  // Posting list layout formats:
+  //
+  // NOT_FULL
+  // +-special-data-0--+-special-data-1--+------------+-----------------------+
+  // |                 |                 |            |                       |
+  // |data-start-offset|  Data::Invalid  | 0x00000000 |   (compressed) data   |
+  // |                 |                 |            |                       |
+  // +-----------------+-----------------+------------+-----------------------+
+  //
+  // ALMOST_FULL
+  // +-special-data-0--+-special-data-1--+-----+------------------------------+
+  // |                 |                 |     |                              |
+  // |  Data::Invalid  |    1st data     |(pad)|      (compressed) data       |
+  // |                 |                 |     |                              |
+  // +-----------------+-----------------+-----+------------------------------+
+  //
+  // FULL
+  // +-special-data-0--+-special-data-1--+-----+------------------------------+
+  // |                 |                 |     |                              |
+  // |    1st data     |    2nd data     |(pad)|      (compressed) data       |
+  // |                 |                 |     |                              |
+  // +-----------------+-----------------+-----+------------------------------+
+  //
+  // The first two uncompressed (special) data also implicitly encode
+  // information about the size of the compressed data region.
+  //
+  // 1. If the posting list is NOT_FULL, then special_data_0 contains the byte
+  //    offset of the start of the compressed data. Thus, the size of the
+  //    compressed data is
+  //    posting_list_used->size_in_bytes() - special_data_0.data_start_offset().
+  //
+  // 2. If posting list is ALMOST_FULL or FULL, then the compressed data region
+  //    starts somewhere between
+  //    [kSpecialDataSize, kSpecialDataSize + sizeof(JoinDataType) - 1] and ends
+  //    at posting_list_used->size_in_bytes() - 1.
+  //
+  // EXAMPLE
+  // JoinDataType = DocumentIdToJoinInfo<int64_t>. Posting list size: 48 bytes
+  //
+  // EMPTY!
+  // +-- byte 0-11 --+---- 12-23 ----+------------ 24-47 -------------+
+  // |               |               |                                |
+  // |      48       | Data::Invalid |           0x00000000           |
+  // |               |               |                                |
+  // +---------------+---------------+--------------------------------+
+  //
+  // Add DocumentIdToJoinInfo<int64_t>(DocumentId = 12, JoinInteger = 5)
+  // NOT FULL!
+  // +-- byte 0-11 --+---- 12-23 ----+---- 24-35 ----+---- 36-47 ----+
+  // |               |               |               | 12            |
+  // |      36       | Data::Invalid |  0x00000000   |  5            |
+  // |               |               |               |               |
+  // +---------------+---------------+---------------+---------------+
+  //
+  // Add DocumentIdToJoinInfo<int64_t>(DocumentId = 18, JoinInteger = -2)
+  // +-- byte 0-11 --+---- 12-23 ----+---- 24-35 ----+---- 36-47 ----+
+  // |               |               | 18            | 12            |
+  // |      24       | Data::Invalid | -2            |  5            |
+  // |               |               |               |               |
+  // +---------------+---------------+---------------+---------------+
+  //
+  // Add DocumentIdToJoinInfo<int64_t>(DocumentId = 22, JoinInteger = 3)
+  // ALMOST_FULL!
+  // +-- byte 0-11 --+---- 12-23 ----+---- 24-35 ----+---- 36-47 ----+
+  // |               | 22            | 18            | 12            |
+  // | Data::Invalid |  3            | -2            |  5            |
+  // |               |               |               |               |
+  // +---------------+---------------+---------------+---------------+
+  //
+  // Add DocumentIdToJoinInfo<int64_t>(DocumentId = 27, JoinInteger = 0)
+  // FULL!
+  // +-- byte 0-11 --+---- 12-23 ----+---- 24-35 ----+---- 36-47 ----+
+  // | 27            | 22            | 18            | 12            |
+  // |  0            |  3            | -2            |  5            |
+  // |               |               |               |               |
+  // +---------------+---------------+---------------+---------------+
+
+  // Helpers to determine what state the posting list is in.
+  bool IsAlmostFull(const PostingListUsed* posting_list_used) const {
+    return !GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
+           GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+  }
+
+  bool IsEmpty(const PostingListUsed* posting_list_used) const {
+    return GetSpecialData(posting_list_used, /*index=*/0).data_start_offset() ==
+               posting_list_used->size_in_bytes() &&
+           !GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+  }
+
+  // Returns false if both special data are invalid or if data start offset
+  // stored in the special data is less than kSpecialDataSize or greater than
+  // posting_list_used->size_in_bytes(). Returns true, otherwise.
+  bool IsPostingListValid(const PostingListUsed* posting_list_used) const;
+
+  // Prepend data to a posting list that is in the ALMOST_FULL state.
+  //
+  // RETURNS:
+  //  - OK, if successful
+  //  - INVALID_ARGUMENT if data is not less than the previously added data.
+  libtextclassifier3::Status PrependDataToAlmostFull(
+      PostingListUsed* posting_list_used, const JoinDataType& data) const;
+
+  // Prepend data to a posting list that is in the EMPTY state. This will always
+  // succeed because there are no pre-existing data and no validly constructed
+  // posting list could fail to fit one data.
+  void PrependDataToEmpty(PostingListUsed* posting_list_used,
+                          const JoinDataType& data) const;
+
+  // Prepend data to a posting list that is in the NOT_FULL state.
+  //
+  // RETURNS:
+  //  - OK, if successful
+  //  - INVALID_ARGUMENT if data is not less than the previously added data.
+  libtextclassifier3::Status PrependDataToNotFull(
+      PostingListUsed* posting_list_used, const JoinDataType& data,
+      uint32_t offset) const;
+
+  // Returns either 0 (FULL state), sizeof(JoinDataType) (ALMOST_FULL state) or
+  // a byte offset between kSpecialDataSize and
+  // posting_list_used->size_in_bytes() (inclusive) (NOT_FULL state).
+  uint32_t GetStartByteOffset(const PostingListUsed* posting_list_used) const;
+
+  // Sets special data 0 to properly reflect what start byte offset is (see
+  // layout comment for further details).
+  //
+  // Returns false if offset > posting_list_used->size_in_bytes() or offset is
+  // in range (kSpecialDataSize, sizeof(JoinDataType)) or
+  // (sizeof(JoinDataType), 0). True, otherwise.
+  bool SetStartByteOffset(PostingListUsed* posting_list_used,
+                          uint32_t offset) const;
+
+  // Helper for MoveFrom/GetData/PopFrontData. Adds limit number of data to out
+  // or all data in the posting list if the posting list contains less than
+  // limit number of data. out can be NULL.
+  //
+  // NOTE: If called with limit=1, pop=true on a posting list that transitioned
+  // from NOT_FULL directly to FULL, GetDataInternal will not return the posting
+  // list to NOT_FULL. Instead it will leave it in a valid state, but it will be
+  // ALMOST_FULL.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::Status GetDataInternal(
+      const PostingListUsed* posting_list_used, uint32_t limit, bool pop,
+      std::vector<JoinDataType>* out) const;
+
+  // Retrieves the value stored in the index-th special data.
+  //
+  // REQUIRES:
+  //   0 <= index < kNumSpecialData.
+  //
+  // RETURNS:
+  //   - A valid SpecialData<JoinDataType>.
+  SpecialDataType GetSpecialData(const PostingListUsed* posting_list_used,
+                                 uint32_t index) const;
+
+  // Sets the value stored in the index-th special data to special_data.
+  //
+  // REQUIRES:
+  //   0 <= index < kNumSpecialData.
+  void SetSpecialData(PostingListUsed* posting_list_used, uint32_t index,
+                      const SpecialDataType& special_data) const;
+
+  // Prepends data to the memory region
+  // [offset - sizeof(JoinDataType), offset - 1] and
+  // returns the new beginning of the region.
+  //
+  // RETURNS:
+  //   - The new beginning of the padded region, if successful.
+  //   - INVALID_ARGUMENT if data will not fit (uncompressed) between
+  //       [kSpecialDataSize, offset - 1]
+  libtextclassifier3::StatusOr<uint32_t> PrependDataUncompressed(
+      PostingListUsed* posting_list_used, const JoinDataType& data,
+      uint32_t offset) const;
+};
+
+template <typename JoinDataType>
+uint32_t PostingListJoinDataSerializer<JoinDataType>::GetBytesUsed(
+    const PostingListUsed* posting_list_used) const {
+  // The special data will be included if they represent actual data. If they
+  // represent the data start offset or the invalid data sentinel, they are not
+  // included.
+  return posting_list_used->size_in_bytes() -
+         GetStartByteOffset(posting_list_used);
+}
+
+template <typename JoinDataType>
+uint32_t
+PostingListJoinDataSerializer<JoinDataType>::GetMinPostingListSizeToFit(
+    const PostingListUsed* posting_list_used) const {
+  if (IsFull(posting_list_used) || IsAlmostFull(posting_list_used)) {
+    // If in either the FULL state or ALMOST_FULL state, this posting list *is*
+    // the minimum size posting list that can fit these data. So just return the
+    // size of the posting list.
+    return posting_list_used->size_in_bytes();
+  }
+
+  // In NOT_FULL state, BytesUsed contains no special data. The minimum sized
+  // posting list that would be guaranteed to fit these data would be
+  // ALMOST_FULL, with kInvalidData in special data 0, the uncompressed data in
+  // special data 1 and the n compressed data in the compressed region.
+  // BytesUsed contains one uncompressed data and n compressed data. Therefore,
+  // fitting these data into a posting list would require BytesUsed plus one
+  // extra data.
+  return GetBytesUsed(posting_list_used) + GetDataTypeBytes();
+}
+
+template <typename JoinDataType>
+void PostingListJoinDataSerializer<JoinDataType>::Clear(
+    PostingListUsed* posting_list_used) const {
+  // Safe to ignore return value because posting_list_used->size_in_bytes() is
+  // a valid argument.
+  SetStartByteOffset(posting_list_used,
+                     /*offset=*/posting_list_used->size_in_bytes());
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::MoveFrom(
+    PostingListUsed* dst, PostingListUsed* src) const {
+  ICING_RETURN_ERROR_IF_NULL(dst);
+  ICING_RETURN_ERROR_IF_NULL(src);
+  if (GetMinPostingListSizeToFit(src) > dst->size_in_bytes()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "src MinPostingListSizeToFit %d must be larger than size %d.",
+        GetMinPostingListSizeToFit(src), dst->size_in_bytes()));
+  }
+
+  if (!IsPostingListValid(dst)) {
+    return absl_ports::FailedPreconditionError(
+        "Dst posting list is in an invalid state and can't be used!");
+  }
+  if (!IsPostingListValid(src)) {
+    return absl_ports::InvalidArgumentError(
+        "Cannot MoveFrom an invalid src posting list!");
+  }
+
+  // Pop just enough data that all of src's compressed data fit in
+  // dst posting_list's compressed area. Then we can memcpy that area.
+  std::vector<JoinDataType> data_arr;
+  while (IsFull(src) || IsAlmostFull(src) ||
+         (dst->size_in_bytes() - kSpecialDataSize < GetBytesUsed(src))) {
+    if (!GetDataInternal(src, /*limit=*/1, /*pop=*/true, &data_arr).ok()) {
+      return absl_ports::AbortedError(
+          "Unable to retrieve data from src posting list.");
+    }
+  }
+
+  // memcpy the area and set up start byte offset.
+  Clear(dst);
+  memcpy(dst->posting_list_buffer() + dst->size_in_bytes() - GetBytesUsed(src),
+         src->posting_list_buffer() + GetStartByteOffset(src),
+         GetBytesUsed(src));
+  // Because we popped all data from src outside of the compressed area and we
+  // guaranteed that GetBytesUsed(src) is less than dst->size_in_bytes() -
+  // kSpecialDataSize. This is guaranteed to be a valid byte offset for the
+  // NOT_FULL state, so ignoring the value is safe.
+  SetStartByteOffset(dst, dst->size_in_bytes() - GetBytesUsed(src));
+
+  // Put back remaining data.
+  for (auto riter = data_arr.rbegin(); riter != data_arr.rend(); ++riter) {
+    // PrependData may return:
+    // - INVALID_ARGUMENT: if data is invalid or not less than the previous data
+    // - RESOURCE_EXHAUSTED
+    // RESOURCE_EXHAUSTED should be impossible because we've already assured
+    // that there is enough room above.
+    ICING_RETURN_IF_ERROR(PrependData(dst, *riter));
+  }
+
+  Clear(src);
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::PrependDataToAlmostFull(
+    PostingListUsed* posting_list_used, const JoinDataType& data) const {
+  SpecialDataType special_data = GetSpecialData(posting_list_used, /*index=*/1);
+  if (data < special_data.data()) {
+    return absl_ports::InvalidArgumentError(
+        "JoinData being prepended must not be smaller than the most recent "
+        "JoinData");
+  }
+
+  // Without compression, prepend a new data into ALMOST_FULL posting list will
+  // change the posting list to FULL state. Therefore, set special data 0
+  // directly.
+  SetSpecialData(posting_list_used, /*index=*/0, SpecialDataType(data));
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+void PostingListJoinDataSerializer<JoinDataType>::PrependDataToEmpty(
+    PostingListUsed* posting_list_used, const JoinDataType& data) const {
+  // First data to be added. Just add verbatim, no compression.
+  if (posting_list_used->size_in_bytes() == kSpecialDataSize) {
+    // First data will be stored at special data 1.
+    // Safe to ignore the return value because 1 < kNumSpecialData
+    SetSpecialData(posting_list_used, /*index=*/1, SpecialDataType(data));
+    // Safe to ignore the return value because sizeof(JoinDataType) is a valid
+    // argument.
+    SetStartByteOffset(posting_list_used, /*offset=*/sizeof(JoinDataType));
+  } else {
+    // Since this is the first data, size != kSpecialDataSize and
+    // size % sizeof(JoinDataType) == 0, we know that there is room to fit
+    // 'data' into the compressed region, so ValueOrDie is safe.
+    uint32_t offset =
+        PrependDataUncompressed(posting_list_used, data,
+                                /*offset=*/posting_list_used->size_in_bytes())
+            .ValueOrDie();
+    // Safe to ignore the return value because PrependDataUncompressed is
+    // guaranteed to return a valid offset.
+    SetStartByteOffset(posting_list_used, offset);
+  }
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::PrependDataToNotFull(
+    PostingListUsed* posting_list_used, const JoinDataType& data,
+    uint32_t offset) const {
+  JoinDataType curr = JoinDataType::GetInvalid();
+  memcpy(&curr, posting_list_used->posting_list_buffer() + offset,
+         sizeof(JoinDataType));
+  if (data < curr) {
+    return absl_ports::InvalidArgumentError(
+        "JoinData being prepended must not be smaller than the most recent "
+        "JoinData");
+  }
+
+  if (offset >= kSpecialDataSize + sizeof(JoinDataType)) {
+    offset =
+        PrependDataUncompressed(posting_list_used, data, offset).ValueOrDie();
+    SetStartByteOffset(posting_list_used, offset);
+  } else {
+    // The new data must be put in special data 1.
+    SetSpecialData(posting_list_used, /*index=*/1, SpecialDataType(data));
+    // State ALMOST_FULL. Safe to ignore the return value because
+    // sizeof(JoinDataType) is a valid argument.
+    SetStartByteOffset(posting_list_used, /*offset=*/sizeof(JoinDataType));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::PrependData(
+    PostingListUsed* posting_list_used, const JoinDataType& data) const {
+  if (!data.is_valid()) {
+    return absl_ports::InvalidArgumentError("Cannot prepend an invalid data!");
+  }
+  if (!IsPostingListValid(posting_list_used)) {
+    return absl_ports::FailedPreconditionError(
+        "This PostingListUsed is in an invalid state and can't add any data!");
+  }
+
+  if (IsFull(posting_list_used)) {
+    // State FULL: no space left.
+    return absl_ports::ResourceExhaustedError("No more room for data");
+  } else if (IsAlmostFull(posting_list_used)) {
+    return PrependDataToAlmostFull(posting_list_used, data);
+  } else if (IsEmpty(posting_list_used)) {
+    PrependDataToEmpty(posting_list_used, data);
+    return libtextclassifier3::Status::OK;
+  } else {
+    uint32_t offset = GetStartByteOffset(posting_list_used);
+    return PrependDataToNotFull(posting_list_used, data, offset);
+  }
+}
+
+template <typename JoinDataType>
+libtextclassifier3::StatusOr<uint32_t>
+PostingListJoinDataSerializer<JoinDataType>::PrependDataArray(
+    PostingListUsed* posting_list_used, const JoinDataType* array,
+    uint32_t num_data, bool keep_prepended) const {
+  if (!IsPostingListValid(posting_list_used)) {
+    return 0;
+  }
+
+  uint32_t i;
+  for (i = 0; i < num_data; ++i) {
+    if (!PrependData(posting_list_used, array[i]).ok()) {
+      break;
+    }
+  }
+  if (i != num_data && !keep_prepended) {
+    // Didn't fit. Undo everything and check that we have the same offset as
+    // before. PopFrontData guarantees that it will remove all 'i' data so long
+    // as there are at least 'i' data in the posting list, which we know there
+    // are.
+    ICING_RETURN_IF_ERROR(PopFrontData(posting_list_used, /*num_data=*/i));
+    return 0;
+  }
+  return i;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::StatusOr<std::vector<JoinDataType>>
+PostingListJoinDataSerializer<JoinDataType>::GetData(
+    const PostingListUsed* posting_list_used) const {
+  std::vector<JoinDataType> data_arr_out;
+  ICING_RETURN_IF_ERROR(GetData(posting_list_used, &data_arr_out));
+  return data_arr_out;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status PostingListJoinDataSerializer<JoinDataType>::GetData(
+    const PostingListUsed* posting_list_used,
+    std::vector<JoinDataType>* data_arr_out) const {
+  return GetDataInternal(posting_list_used,
+                         /*limit=*/std::numeric_limits<uint32_t>::max(),
+                         /*pop=*/false, data_arr_out);
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::PopFrontData(
+    PostingListUsed* posting_list_used, uint32_t num_data) const {
+  if (num_data == 1 && IsFull(posting_list_used)) {
+    // The PL is in FULL state which means that we save 2 uncompressed data in
+    // the 2 special postions. But FULL state may be reached by 2 different
+    // states.
+    // (1) In ALMOST_FULL state
+    // +------------------+-----------------+-----+---------------------------+
+    // |Data::Invalid     |1st data         |(pad)|(compressed) data          |
+    // |                  |                 |     |                           |
+    // +------------------+-----------------+-----+---------------------------+
+    // When we prepend another data, we can only put it at special data 0, and
+    // thus get a FULL PL
+    // +------------------+-----------------+-----+---------------------------+
+    // |new 1st data      |original 1st data|(pad)|(compressed) data          |
+    // |                  |                 |     |                           |
+    // +------------------+-----------------+-----+---------------------------+
+    //
+    // (2) In NOT_FULL state
+    // +------------------+-----------------+-------+---------+---------------+
+    // |data-start-offset |Data::Invalid    |(pad)  |1st data |(compressed)   |
+    // |                  |                 |       |         |data           |
+    // +------------------+-----------------+-------+---------+---------------+
+    // When we prepend another data, we can reach any of the 3 following
+    // scenarios:
+    // (2.1) NOT_FULL
+    // if the space of pad and original 1st data can accommodate the new 1st
+    // data and the encoded delta value.
+    // +------------------+-----------------+-----+--------+------------------+
+    // |data-start-offset |Data::Invalid    |(pad)|new     |(compressed) data |
+    // |                  |                 |     |1st data|                  |
+    // +------------------+-----------------+-----+--------+------------------+
+    // (2.2) ALMOST_FULL
+    // If the space of pad and original 1st data cannot accommodate the new 1st
+    // data and the encoded delta value but can accommodate the encoded delta
+    // value only. We can put the new 1st data at special position 1.
+    // +------------------+-----------------+---------+-----------------------+
+    // |Data::Invalid     |new 1st data     |(pad)    |(compressed) data      |
+    // |                  |                 |         |                       |
+    // +------------------+-----------------+---------+-----------------------+
+    // (2.3) FULL
+    // In very rare case, it cannot even accommodate only the encoded delta
+    // value. we can move the original 1st data into special position 1 and the
+    // new 1st data into special position 0. This may happen because we use
+    // VarInt encoding method which may make the encoded value longer (about
+    // 4/3 times of original)
+    // +------------------+-----------------+--------------+------------------+
+    // |new 1st data      |original 1st data|(pad)         |(compressed) data |
+    // |                  |                 |              |                  |
+    // +------------------+-----------------+--------------+------------------+
+    //
+    // Suppose now the PL is in FULL state. But we don't know whether it arrived
+    // this state from NOT_FULL (like (2.3)) or from ALMOST_FULL (like (1)).
+    // We'll return to ALMOST_FULL state like (1) if we simply pop the new 1st
+    // data, but we want to make the prepending operation "reversible". So
+    // there should be some way to return to NOT_FULL if possible. A simple way
+    // to do is:
+    // - Pop 2 data out of the PL to state ALMOST_FULL or NOT_FULL.
+    // - Add the second data ("original 1st data") back.
+    //
+    // Then we can return to the correct original states of (2.1) or (1). This
+    // makes our prepending operation reversible.
+    std::vector<JoinDataType> out;
+
+    // Popping 2 data should never fail because we've just ensured that the
+    // posting list is in the FULL state.
+    ICING_RETURN_IF_ERROR(
+        GetDataInternal(posting_list_used, /*limit=*/2, /*pop=*/true, &out));
+
+    // PrependData should never fail because:
+    // - out[1] is a valid data less than all previous data in the posting list.
+    // - There's no way that the posting list could run out of room because it
+    //   previously stored these 2 data.
+    ICING_RETURN_IF_ERROR(PrependData(posting_list_used, out[1]));
+  } else if (num_data > 0) {
+    return GetDataInternal(posting_list_used, /*limit=*/num_data, /*pop=*/true,
+                           /*out=*/nullptr);
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::GetDataInternal(
+    const PostingListUsed* posting_list_used, uint32_t limit, bool pop,
+    std::vector<JoinDataType>* out) const {
+  uint32_t offset = GetStartByteOffset(posting_list_used);
+  uint32_t count = 0;
+
+  // First traverse the first two special positions.
+  while (count < limit && offset < kSpecialDataSize) {
+    // offset / sizeof(JoinDataType) < kNumSpecialData
+    // because of the check above.
+    SpecialDataType special_data = GetSpecialData(
+        posting_list_used, /*index=*/offset / sizeof(JoinDataType));
+    if (out != nullptr) {
+      out->push_back(special_data.data());
+    }
+    offset += sizeof(JoinDataType);
+    ++count;
+  }
+
+  // - We don't compress the data.
+  // - The posting list size is a multiple of data type bytes.
+  // So offset of the first non-special data is guaranteed to be at
+  // kSpecialDataSize if in ALMOST_FULL or FULL state. In fact, we must not
+  // apply padding skipping logic here when still storing uncompressed data,
+  // because in this case 0 bytes are meanful (e.g. inverted doc id byte = 0).
+  while (count < limit && offset < posting_list_used->size_in_bytes()) {
+    JoinDataType data = JoinDataType::GetInvalid();
+    memcpy(&data, posting_list_used->posting_list_buffer() + offset,
+           sizeof(JoinDataType));
+    offset += sizeof(JoinDataType);
+    if (out != nullptr) {
+      out->push_back(data);
+    }
+    ++count;
+  }
+
+  if (pop) {
+    PostingListUsed* mutable_posting_list_used =
+        const_cast<PostingListUsed*>(posting_list_used);
+    // Modify the posting list so that we pop all data actually traversed.
+    if (offset >= kSpecialDataSize &&
+        offset < posting_list_used->size_in_bytes()) {
+      memset(
+          mutable_posting_list_used->posting_list_buffer() + kSpecialDataSize,
+          0, offset - kSpecialDataSize);
+    }
+    SetStartByteOffset(mutable_posting_list_used, offset);
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+typename PostingListJoinDataSerializer<JoinDataType>::SpecialDataType
+PostingListJoinDataSerializer<JoinDataType>::GetSpecialData(
+    const PostingListUsed* posting_list_used, uint32_t index) const {
+  // It is ok to temporarily construct a SpecialData with offset = 0 since we're
+  // going to overwrite it by memcpy.
+  SpecialDataType special_data(0);
+  memcpy(&special_data,
+         posting_list_used->posting_list_buffer() +
+             index * sizeof(SpecialDataType),
+         sizeof(SpecialDataType));
+  return special_data;
+}
+
+template <typename JoinDataType>
+void PostingListJoinDataSerializer<JoinDataType>::SetSpecialData(
+    PostingListUsed* posting_list_used, uint32_t index,
+    const SpecialDataType& special_data) const {
+  memcpy(posting_list_used->posting_list_buffer() +
+             index * sizeof(SpecialDataType),
+         &special_data, sizeof(SpecialDataType));
+}
+
+template <typename JoinDataType>
+bool PostingListJoinDataSerializer<JoinDataType>::IsPostingListValid(
+    const PostingListUsed* posting_list_used) const {
+  if (IsAlmostFull(posting_list_used)) {
+    // Special data 1 should hold a valid data.
+    if (!GetSpecialData(posting_list_used, /*index=*/1).data().is_valid()) {
+      ICING_LOG(ERROR)
+          << "Both special data cannot be invalid at the same time.";
+      return false;
+    }
+  } else if (!IsFull(posting_list_used)) {
+    // NOT_FULL. Special data 0 should hold a valid offset.
+    SpecialDataType special_data =
+        GetSpecialData(posting_list_used, /*index=*/0);
+    if (special_data.data_start_offset() > posting_list_used->size_in_bytes() ||
+        special_data.data_start_offset() < kSpecialDataSize) {
+      ICING_LOG(ERROR) << "Offset: " << special_data.data_start_offset()
+                       << " size: " << posting_list_used->size_in_bytes()
+                       << " sp size: " << kSpecialDataSize;
+      return false;
+    }
+  }
+  return true;
+}
+
+template <typename JoinDataType>
+uint32_t PostingListJoinDataSerializer<JoinDataType>::GetStartByteOffset(
+    const PostingListUsed* posting_list_used) const {
+  if (IsFull(posting_list_used)) {
+    return 0;
+  } else if (IsAlmostFull(posting_list_used)) {
+    return sizeof(JoinDataType);
+  } else {
+    return GetSpecialData(posting_list_used, /*index=*/0).data_start_offset();
+  }
+}
+
+template <typename JoinDataType>
+bool PostingListJoinDataSerializer<JoinDataType>::SetStartByteOffset(
+    PostingListUsed* posting_list_used, uint32_t offset) const {
+  if (offset > posting_list_used->size_in_bytes()) {
+    ICING_LOG(ERROR) << "offset cannot be a value greater than size "
+                     << posting_list_used->size_in_bytes() << ". offset is "
+                     << offset << ".";
+    return false;
+  }
+  if (offset < kSpecialDataSize && offset > sizeof(JoinDataType)) {
+    ICING_LOG(ERROR) << "offset cannot be a value between ("
+                     << sizeof(JoinDataType) << ", " << kSpecialDataSize
+                     << "). offset is " << offset << ".";
+    return false;
+  }
+  if (offset < sizeof(JoinDataType) && offset != 0) {
+    ICING_LOG(ERROR) << "offset cannot be a value between (0, "
+                     << sizeof(JoinDataType) << "). offset is " << offset
+                     << ".";
+    return false;
+  }
+
+  if (offset >= kSpecialDataSize) {
+    // NOT_FULL state.
+    SetSpecialData(posting_list_used, /*index=*/0, SpecialDataType(offset));
+    SetSpecialData(posting_list_used, /*index=*/1,
+                   SpecialDataType(JoinDataType::GetInvalid()));
+  } else if (offset == sizeof(JoinDataType)) {
+    // ALMOST_FULL state.
+    SetSpecialData(posting_list_used, /*index=*/0,
+                   SpecialDataType(JoinDataType::GetInvalid()));
+  }
+  // Nothing to do for the FULL state - the offset isn't actually stored
+  // anywhere and both 2 special data hold valid data.
+  return true;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::StatusOr<uint32_t>
+PostingListJoinDataSerializer<JoinDataType>::PrependDataUncompressed(
+    PostingListUsed* posting_list_used, const JoinDataType& data,
+    uint32_t offset) const {
+  if (offset < kSpecialDataSize + sizeof(JoinDataType)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Not enough room to prepend JoinData at offset %d.", offset));
+  }
+  offset -= sizeof(JoinDataType);
+  memcpy(posting_list_used->posting_list_buffer() + offset, &data,
+         sizeof(JoinDataType));
+  return offset;
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_POSTING_LIST_JOIN_DATA_SERIALIZER_H_
diff --git a/icing/join/posting-list-join-data-serializer_test.cc b/icing/join/posting-list-join-data-serializer_test.cc
new file mode 100644
index 0000000..20137b6
--- /dev/null
+++ b/icing/join/posting-list-join-data-serializer_test.cc
@@ -0,0 +1,653 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/posting-list-join-data-serializer.h"
+
+#include <algorithm>
+#include <iterator>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/testing/common-matchers.h"
+
+using testing::ElementsAre;
+using testing::ElementsAreArray;
+using testing::Eq;
+using testing::IsEmpty;
+using testing::SizeIs;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+TEST(PostingListJoinDataSerializerTest, GetMinPostingListSizeToFitNotNull) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size =
+      2551 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/2))),
+      IsOk());
+  EXPECT_THAT(
+      serializer.GetMinPostingListSizeToFit(&pl_used),
+      Eq(2 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/5))),
+      IsOk());
+  EXPECT_THAT(
+      serializer.GetMinPostingListSizeToFit(&pl_used),
+      Eq(3 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+}
+
+TEST(PostingListJoinDataSerializerTest, GetMinPostingListSizeToFitAlmostFull) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 3 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/2))),
+      IsOk());
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/5))),
+      IsOk());
+  EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(size));
+}
+
+TEST(PostingListJoinDataSerializerTest, GetMinPostingListSizeToFitFull) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 3 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/2))),
+      IsOk());
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/5))),
+      IsOk());
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/2, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/10))),
+      IsOk());
+  EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(size));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataNotFull) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size =
+      2551 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  // Make used.
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data0(
+      /*document_id=*/0,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data0), IsOk());
+  // Size = sizeof(uncompressed data0)
+  int expected_size =
+      sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used), IsOkAndHolds(ElementsAre(data0)));
+
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data1(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data1), IsOk());
+  // Size = sizeof(uncompressed data1)
+  //        + sizeof(uncompressed data0)
+  expected_size += sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data1, data0)));
+
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data2(
+      /*document_id=*/2, NamespaceFingerprintIdentifier(
+                             /*namespace_id=*/1, /*fingerprint=*/10));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data2), IsOk());
+  // Size = sizeof(uncompressed data2)
+  //        + sizeof(uncompressed data1)
+  //        + sizeof(uncompressed data0)
+  expected_size += sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data2, data1, data0)));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataAlmostFull) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 4 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  // Fill up the compressed region.
+  // Transitions:
+  // Adding data0: EMPTY -> NOT_FULL
+  // Adding data1: NOT_FULL -> NOT_FULL
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data0(
+      /*document_id=*/0,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data1(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data0), IsOk());
+  EXPECT_THAT(serializer.PrependData(&pl_used, data1), IsOk());
+  int expected_size =
+      2 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data1, data0)));
+
+  // Add one more data to transition NOT_FULL -> ALMOST_FULL
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data2(
+      /*document_id=*/2, NamespaceFingerprintIdentifier(
+                             /*namespace_id=*/1, /*fingerprint=*/10));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data2), IsOk());
+  expected_size =
+      3 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data2, data1, data0)));
+
+  // Add one more data to transition ALMOST_FULL -> FULL
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data3(
+      /*document_id=*/3, NamespaceFingerprintIdentifier(
+                             /*namespace_id=*/1, /*fingerprint=*/0));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data3), IsOk());
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data3, data2, data1, data0)));
+
+  // The posting list is FULL. Adding another data should fail.
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data4(
+      /*document_id=*/4, NamespaceFingerprintIdentifier(
+                             /*namespace_id=*/0, /*fingerprint=*/1234));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data4),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependSmallerDataShouldFail) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 4 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data(
+      /*document_id=*/100,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> smaller_data(
+      /*document_id=*/99,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+
+  // NOT_FULL -> NOT_FULL
+  ASSERT_THAT(serializer.PrependData(&pl_used, data), IsOk());
+  EXPECT_THAT(serializer.PrependData(&pl_used, smaller_data),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // NOT_FULL -> ALMOST_FULL
+  ASSERT_THAT(serializer.PrependData(&pl_used, data), IsOk());
+  EXPECT_THAT(serializer.PrependData(&pl_used, smaller_data),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // ALMOST_FULL -> FULL
+  ASSERT_THAT(serializer.PrependData(&pl_used, data), IsOk());
+  EXPECT_THAT(serializer.PrependData(&pl_used, smaller_data),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataPostingListUsedMinSize) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  // PL State: EMPTY
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
+  EXPECT_THAT(serializer.GetData(&pl_used), IsOkAndHolds(IsEmpty()));
+
+  // Add a data. PL should shift to ALMOST_FULL state
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data0(
+      /*document_id=*/0,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data0), IsOk());
+  // Size = sizeof(uncompressed data0)
+  int expected_size =
+      sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used), IsOkAndHolds(ElementsAre(data0)));
+
+  // Add another data. PL should shift to FULL state.
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data1(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data1), IsOk());
+  // Size = sizeof(uncompressed data1) + sizeof(uncompressed data0)
+  expected_size += sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data1, data0)));
+
+  // The posting list is FULL. Adding another data should fail.
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data2(
+      /*document_id=*/2, NamespaceFingerprintIdentifier(
+                             /*namespace_id=*/1, /*fingerprint=*/10));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data2),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataArrayDoNotKeepPrepended) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 6 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_in;
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_pushed;
+
+  // Add 3 data. The PL is in the empty state and should be able to fit all 3
+  // data without issue, transitioning the PL from EMPTY -> NOT_FULL.
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/0,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/2,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/10)));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_in.size()));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() *
+                 sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+  // Add 2 data. The PL should transition from NOT_FULL to ALMOST_FULL.
+  data_in.clear();
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/3,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/0)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/4, NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+                                                        /*fingerprint=*/1234)));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_in.size()));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() *
+                 sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+  // Add 2 data. The PL should remain ALMOST_FULL since the remaining space can
+  // only fit 1 data.
+  data_in.clear();
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/5, NamespaceFingerprintIdentifier(/*namespace_id=*/2,
+                                                        /*fingerprint=*/99)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/6, NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+                                                        /*fingerprint=*/63)));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(0));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() *
+                 sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+  // Add 1 data. The PL should transition from ALMOST_FULL to FULL.
+  data_in.pop_back();
+  ASSERT_THAT(data_in, SizeIs(1));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_in.size()));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() *
+                 sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataArrayKeepPrepended) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 6 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_in;
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_pushed;
+
+  // Add 3 data. The PL is in the empty state and should be able to fit all 3
+  // data without issue, transitioning the PL from EMPTY -> NOT_FULL.
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/0,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/2,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/10)));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/true),
+      IsOkAndHolds(data_in.size()));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() *
+                 sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+  // Add 4 data. The PL should prepend 3 data and transition from NOT_FULL to
+  // FULL.
+  data_in.clear();
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/3,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/0)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/4, NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+                                                        /*fingerprint=*/1234)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/5, NamespaceFingerprintIdentifier(/*namespace_id=*/2,
+                                                        /*fingerprint=*/99)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/6, NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+                                                        /*fingerprint=*/63)));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/true),
+      IsOkAndHolds(3));
+  data_in.pop_back();
+  ASSERT_THAT(data_in, SizeIs(3));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() *
+                 sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+}
+
+TEST(PostingListJoinDataSerializerTest, MoveFrom) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used1,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr1 =
+      {DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/2)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/5))};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used1, data_arr1.data(), data_arr1.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr1.size()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used2,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr2 =
+      {DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/2, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/10)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/3, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/0)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/4,
+           NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+                                          /*fingerprint=*/1234)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/5,
+           NamespaceFingerprintIdentifier(/*namespace_id=*/2,
+                                          /*fingerprint=*/99))};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used2, data_arr2.data(), data_arr2.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr2.size()));
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
+              IsOk());
+  EXPECT_THAT(
+      serializer.GetData(&pl_used2),
+      IsOkAndHolds(ElementsAreArray(data_arr1.rbegin(), data_arr1.rend())));
+  EXPECT_THAT(serializer.GetData(&pl_used1), IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PostingListJoinDataSerializerTest, MoveToNullReturnsFailedPrecondition) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr = {
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+          /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                 /*namespace_id=*/1, /*fingerprint=*/2)),
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+          /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                 /*namespace_id=*/1, /*fingerprint=*/5))};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used, data_arr.data(), data_arr.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr.size()));
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used, /*src=*/nullptr),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/nullptr, /*src=*/&pl_used),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+}
+
+TEST(PostingListJoinDataSerializerTest, MoveToPostingListTooSmall) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size1 = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used1,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size1));
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr1 =
+      {DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/2)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/5)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/2, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/10)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/3, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/0)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/4,
+           NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+                                          /*fingerprint=*/1234))};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used1, data_arr1.data(), data_arr1.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr1.size()));
+
+  int size2 = serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used2,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size2));
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr2 =
+      {DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+          /*document_id=*/5, NamespaceFingerprintIdentifier(
+                                 /*namespace_id=*/2, /*fingerprint=*/99))};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used2, data_arr2.data(), data_arr2.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr2.size()));
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used1),
+      IsOkAndHolds(ElementsAreArray(data_arr1.rbegin(), data_arr1.rend())));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used2),
+      IsOkAndHolds(ElementsAreArray(data_arr2.rbegin(), data_arr2.rend())));
+}
+
+TEST(PostingListJoinDataSerializerTest, PopFrontData) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 2 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr = {
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+          /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                 /*namespace_id=*/1, /*fingerprint=*/2)),
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+          /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                 /*namespace_id=*/1, /*fingerprint=*/5)),
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+          /*document_id=*/2, NamespaceFingerprintIdentifier(
+                                 /*namespace_id=*/1, /*fingerprint=*/10))};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used, data_arr.data(), data_arr.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr.size()));
+  ASSERT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+
+  // Now, pop the last data. The posting list should contain the first three
+  // data.
+  EXPECT_THAT(serializer.PopFrontData(&pl_used, /*num_data=*/1), IsOk());
+  data_arr.pop_back();
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/qualified-id-join-index-impl-v1.cc b/icing/join/qualified-id-join-index-impl-v1.cc
new file mode 100644
index 0000000..cdcb5a9
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v1.cc
@@ -0,0 +1,476 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-index-impl-v1.h"
+
+#include <cstring>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/store/document-id.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/util/crc32.h"
+#include "icing/util/encode-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Set 1M for max # of qualified id entries and 10 bytes for key-value bytes.
+// This will take at most 23 MiB disk space and mmap for persistent hash map.
+static constexpr int32_t kDocJoinInfoMapperMaxNumEntries = 1 << 20;
+static constexpr int32_t kDocJoinInfoMapperAverageKVByteSize = 10;
+
+static constexpr int32_t kDocJoinInfoMapperDynamicTrieMaxSize =
+    128 * 1024 * 1024;  // 128 MiB
+
+DocumentId GetNewDocumentId(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    DocumentId old_document_id) {
+  if (old_document_id >= document_id_old_to_new.size()) {
+    return kInvalidDocumentId;
+  }
+  return document_id_old_to_new[old_document_id];
+}
+
+std::string GetMetadataFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/metadata");
+}
+
+std::string GetDocJoinInfoMapperPath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/doc_join_info_mapper");
+}
+
+std::string GetQualifiedIdStoragePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/qualified_id_storage");
+}
+
+}  // namespace
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+QualifiedIdJoinIndexImplV1::Create(const Filesystem& filesystem,
+                                   std::string working_path,
+                                   bool pre_mapping_fbv,
+                                   bool use_persistent_hash_map) {
+  if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
+      !filesystem.DirectoryExists(
+          GetDocJoinInfoMapperPath(working_path).c_str()) ||
+      !filesystem.FileExists(GetQualifiedIdStoragePath(working_path).c_str())) {
+    // Discard working_path if any file/directory is missing, and reinitialize.
+    if (filesystem.DirectoryExists(working_path.c_str())) {
+      ICING_RETURN_IF_ERROR(
+          QualifiedIdJoinIndex::Discard(filesystem, working_path));
+    }
+    return InitializeNewFiles(filesystem, std::move(working_path),
+                              pre_mapping_fbv, use_persistent_hash_map);
+  }
+  return InitializeExistingFiles(filesystem, std::move(working_path),
+                                 pre_mapping_fbv, use_persistent_hash_map);
+}
+
+QualifiedIdJoinIndexImplV1::~QualifiedIdJoinIndexImplV1() {
+  if (!PersistToDisk().ok()) {
+    ICING_LOG(WARNING) << "Failed to persist qualified id type joinable index "
+                          "to disk while destructing "
+                       << working_path_;
+  }
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::Put(
+    const DocJoinInfo& doc_join_info, std::string_view ref_qualified_id_str) {
+  SetDirty();
+
+  if (!doc_join_info.is_valid()) {
+    return absl_ports::InvalidArgumentError(
+        "Cannot put data for an invalid DocJoinInfo");
+  }
+
+  int32_t qualified_id_index = qualified_id_storage_->num_elements();
+  ICING_ASSIGN_OR_RETURN(
+      FileBackedVector<char>::MutableArrayView mutable_arr,
+      qualified_id_storage_->Allocate(ref_qualified_id_str.size() + 1));
+  mutable_arr.SetArray(/*idx=*/0, ref_qualified_id_str.data(),
+                       ref_qualified_id_str.size());
+  mutable_arr.SetArray(/*idx=*/ref_qualified_id_str.size(), /*arr=*/"\0",
+                       /*arr_len=*/1);
+
+  ICING_RETURN_IF_ERROR(doc_join_info_mapper_->Put(
+      encode_util::EncodeIntToCString(doc_join_info.value()),
+      qualified_id_index));
+
+  // TODO(b/268521214): add data into delete propagation storage
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<std::string_view> QualifiedIdJoinIndexImplV1::Get(
+    const DocJoinInfo& doc_join_info) const {
+  if (!doc_join_info.is_valid()) {
+    return absl_ports::InvalidArgumentError(
+        "Cannot get data for an invalid DocJoinInfo");
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      int32_t qualified_id_index,
+      doc_join_info_mapper_->Get(
+          encode_util::EncodeIntToCString(doc_join_info.value())));
+
+  const char* data = qualified_id_storage_->array() + qualified_id_index;
+  return std::string_view(data, strlen(data));
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::Optimize(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    const std::vector<NamespaceId>& namespace_id_old_to_new,
+    DocumentId new_last_added_document_id) {
+  std::string temp_working_path = working_path_ + "_temp";
+  ICING_RETURN_IF_ERROR(
+      QualifiedIdJoinIndex::Discard(filesystem_, temp_working_path));
+
+  DestructibleDirectory temp_working_path_ddir(&filesystem_,
+                                               std::move(temp_working_path));
+  if (!temp_working_path_ddir.is_valid()) {
+    return absl_ports::InternalError(
+        "Unable to create temp directory to build new qualified id type "
+        "joinable index");
+  }
+
+  {
+    // Transfer all data from the current to new qualified id type joinable
+    // index. Also PersistToDisk and destruct the instance after finishing, so
+    // we can safely swap directories later.
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> new_index,
+        Create(filesystem_, temp_working_path_ddir.dir(), pre_mapping_fbv_,
+               use_persistent_hash_map_));
+    ICING_RETURN_IF_ERROR(
+        TransferIndex(document_id_old_to_new, new_index.get()));
+    new_index->set_last_added_document_id(new_last_added_document_id);
+    ICING_RETURN_IF_ERROR(new_index->PersistToDisk());
+  }
+
+  // Destruct current index's storage instances to safely swap directories.
+  // TODO(b/268521214): handle delete propagation storage
+  doc_join_info_mapper_.reset();
+  qualified_id_storage_.reset();
+
+  if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
+                             working_path_.c_str())) {
+    return absl_ports::InternalError(
+        "Unable to apply new qualified id type joinable index due to failed "
+        "swap");
+  }
+
+  // Reinitialize qualified id type joinable index.
+  if (!filesystem_.PRead(GetMetadataFilePath(working_path_).c_str(),
+                         metadata_buffer_.get(), kMetadataFileSize,
+                         /*offset=*/0)) {
+    return absl_ports::InternalError("Fail to read metadata file");
+  }
+  if (use_persistent_hash_map_) {
+    ICING_ASSIGN_OR_RETURN(
+        doc_join_info_mapper_,
+        PersistentHashMapKeyMapper<int32_t>::Create(
+            filesystem_, GetDocJoinInfoMapperPath(working_path_),
+            pre_mapping_fbv_,
+            /*max_num_entries=*/kDocJoinInfoMapperMaxNumEntries,
+            /*average_kv_byte_size=*/kDocJoinInfoMapperAverageKVByteSize));
+  } else {
+    ICING_ASSIGN_OR_RETURN(
+        doc_join_info_mapper_,
+        DynamicTrieKeyMapper<int32_t>::Create(
+            filesystem_, GetDocJoinInfoMapperPath(working_path_),
+            kDocJoinInfoMapperDynamicTrieMaxSize));
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      qualified_id_storage_,
+      FileBackedVector<char>::Create(
+          filesystem_, GetQualifiedIdStoragePath(working_path_),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+          FileBackedVector<char>::kMaxFileSize,
+          /*pre_mapping_mmap_size=*/pre_mapping_fbv_ ? 1024 * 1024 : 0));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::Clear() {
+  SetDirty();
+
+  doc_join_info_mapper_.reset();
+  // Discard and reinitialize doc join info mapper.
+  std::string doc_join_info_mapper_path =
+      GetDocJoinInfoMapperPath(working_path_);
+  if (use_persistent_hash_map_) {
+    ICING_RETURN_IF_ERROR(PersistentHashMapKeyMapper<int32_t>::Delete(
+        filesystem_, doc_join_info_mapper_path));
+    ICING_ASSIGN_OR_RETURN(
+        doc_join_info_mapper_,
+        PersistentHashMapKeyMapper<int32_t>::Create(
+            filesystem_, std::move(doc_join_info_mapper_path), pre_mapping_fbv_,
+            /*max_num_entries=*/kDocJoinInfoMapperMaxNumEntries,
+            /*average_kv_byte_size=*/kDocJoinInfoMapperAverageKVByteSize));
+  } else {
+    ICING_RETURN_IF_ERROR(DynamicTrieKeyMapper<int32_t>::Delete(
+        filesystem_, doc_join_info_mapper_path));
+    ICING_ASSIGN_OR_RETURN(doc_join_info_mapper_,
+                           DynamicTrieKeyMapper<int32_t>::Create(
+                               filesystem_, doc_join_info_mapper_path,
+                               kDocJoinInfoMapperDynamicTrieMaxSize));
+  }
+
+  // Clear qualified_id_storage_.
+  if (qualified_id_storage_->num_elements() > 0) {
+    ICING_RETURN_IF_ERROR(qualified_id_storage_->TruncateTo(0));
+  }
+
+  // TODO(b/268521214): clear delete propagation storage
+
+  info().last_added_document_id = kInvalidDocumentId;
+  return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+QualifiedIdJoinIndexImplV1::InitializeNewFiles(const Filesystem& filesystem,
+                                               std::string&& working_path,
+                                               bool pre_mapping_fbv,
+                                               bool use_persistent_hash_map) {
+  // Create working directory.
+  if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to create directory: ", working_path));
+  }
+
+  // Initialize doc_join_info_mapper
+  std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper;
+  if (use_persistent_hash_map) {
+    // TODO(b/263890397): decide PersistentHashMapKeyMapper size
+    ICING_ASSIGN_OR_RETURN(
+        doc_join_info_mapper,
+        PersistentHashMapKeyMapper<int32_t>::Create(
+            filesystem, GetDocJoinInfoMapperPath(working_path), pre_mapping_fbv,
+            /*max_num_entries=*/kDocJoinInfoMapperMaxNumEntries,
+            /*average_kv_byte_size=*/kDocJoinInfoMapperAverageKVByteSize));
+  } else {
+    ICING_ASSIGN_OR_RETURN(
+        doc_join_info_mapper,
+        DynamicTrieKeyMapper<int32_t>::Create(
+            filesystem, GetDocJoinInfoMapperPath(working_path),
+            kDocJoinInfoMapperDynamicTrieMaxSize));
+  }
+
+  // Initialize qualified_id_storage
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
+      FileBackedVector<char>::Create(
+          filesystem, GetQualifiedIdStoragePath(working_path),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+          FileBackedVector<char>::kMaxFileSize,
+          /*pre_mapping_mmap_size=*/pre_mapping_fbv ? 1024 * 1024 : 0));
+
+  // Create instance.
+  auto new_index = std::unique_ptr<QualifiedIdJoinIndexImplV1>(
+      new QualifiedIdJoinIndexImplV1(
+          filesystem, std::move(working_path),
+          /*metadata_buffer=*/std::make_unique<uint8_t[]>(kMetadataFileSize),
+          std::move(doc_join_info_mapper), std::move(qualified_id_storage),
+          pre_mapping_fbv, use_persistent_hash_map));
+  // Initialize info content.
+  new_index->info().magic = Info::kMagic;
+  new_index->info().last_added_document_id = kInvalidDocumentId;
+  // Initialize new PersistentStorage. The initial checksums will be computed
+  // and set via InitializeNewStorage.
+  ICING_RETURN_IF_ERROR(new_index->InitializeNewStorage());
+
+  return new_index;
+}
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+QualifiedIdJoinIndexImplV1::InitializeExistingFiles(
+    const Filesystem& filesystem, std::string&& working_path,
+    bool pre_mapping_fbv, bool use_persistent_hash_map) {
+  // PRead metadata file.
+  auto metadata_buffer = std::make_unique<uint8_t[]>(kMetadataFileSize);
+  if (!filesystem.PRead(GetMetadataFilePath(working_path).c_str(),
+                        metadata_buffer.get(), kMetadataFileSize,
+                        /*offset=*/0)) {
+    return absl_ports::InternalError("Fail to read metadata file");
+  }
+
+  // Initialize doc_join_info_mapper
+  bool dynamic_trie_key_mapper_dir_exists = filesystem.DirectoryExists(
+      absl_ports::StrCat(GetDocJoinInfoMapperPath(working_path),
+                         "/key_mapper_dir")
+          .c_str());
+  if ((use_persistent_hash_map && dynamic_trie_key_mapper_dir_exists) ||
+      (!use_persistent_hash_map && !dynamic_trie_key_mapper_dir_exists)) {
+    // Return a failure here so that the caller can properly delete and rebuild
+    // this component.
+    return absl_ports::FailedPreconditionError("Key mapper type mismatch");
+  }
+
+  std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper;
+  if (use_persistent_hash_map) {
+    ICING_ASSIGN_OR_RETURN(
+        doc_join_info_mapper,
+        PersistentHashMapKeyMapper<int32_t>::Create(
+            filesystem, GetDocJoinInfoMapperPath(working_path), pre_mapping_fbv,
+            /*max_num_entries=*/kDocJoinInfoMapperMaxNumEntries,
+            /*average_kv_byte_size=*/kDocJoinInfoMapperAverageKVByteSize));
+  } else {
+    ICING_ASSIGN_OR_RETURN(
+        doc_join_info_mapper,
+        DynamicTrieKeyMapper<int32_t>::Create(
+            filesystem, GetDocJoinInfoMapperPath(working_path),
+            kDocJoinInfoMapperDynamicTrieMaxSize));
+  }
+
+  // Initialize qualified_id_storage
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
+      FileBackedVector<char>::Create(
+          filesystem, GetQualifiedIdStoragePath(working_path),
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+          FileBackedVector<char>::kMaxFileSize,
+          /*pre_mapping_mmap_size=*/pre_mapping_fbv ? 1024 * 1024 : 0));
+
+  // Create instance.
+  auto type_joinable_index = std::unique_ptr<QualifiedIdJoinIndexImplV1>(
+      new QualifiedIdJoinIndexImplV1(
+          filesystem, std::move(working_path), std::move(metadata_buffer),
+          std::move(doc_join_info_mapper), std::move(qualified_id_storage),
+          pre_mapping_fbv, use_persistent_hash_map));
+  // Initialize existing PersistentStorage. Checksums will be validated.
+  ICING_RETURN_IF_ERROR(type_joinable_index->InitializeExistingStorage());
+
+  // Validate magic.
+  if (type_joinable_index->info().magic != Info::kMagic) {
+    return absl_ports::FailedPreconditionError("Incorrect magic value");
+  }
+
+  return type_joinable_index;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::TransferIndex(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    QualifiedIdJoinIndexImplV1* new_index) const {
+  std::unique_ptr<KeyMapper<int32_t>::Iterator> iter =
+      doc_join_info_mapper_->GetIterator();
+  while (iter->Advance()) {
+    DocJoinInfo old_doc_join_info(
+        encode_util::DecodeIntFromCString(iter->GetKey()));
+    int32_t qualified_id_index = iter->GetValue();
+
+    const char* data = qualified_id_storage_->array() + qualified_id_index;
+    std::string_view ref_qualified_id_str(data, strlen(data));
+
+    // Translate to new doc id.
+    DocumentId new_document_id = GetNewDocumentId(
+        document_id_old_to_new, old_doc_join_info.document_id());
+
+    if (new_document_id != kInvalidDocumentId) {
+      ICING_RETURN_IF_ERROR(
+          new_index->Put(DocJoinInfo(new_document_id,
+                                     old_doc_join_info.joinable_property_id()),
+                         ref_qualified_id_str));
+    }
+  }
+
+  // TODO(b/268521214): transfer delete propagation storage
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::PersistMetadataToDisk(
+    bool force) {
+  if (!force && !is_info_dirty() && !is_storage_dirty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  std::string metadata_file_path = GetMetadataFilePath(working_path_);
+
+  ScopedFd sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+  if (!sfd.is_valid()) {
+    return absl_ports::InternalError("Fail to open metadata file for write");
+  }
+
+  if (!filesystem_.PWrite(sfd.get(), /*offset=*/0, metadata_buffer_.get(),
+                          kMetadataFileSize)) {
+    return absl_ports::InternalError("Fail to write metadata file");
+  }
+
+  if (!filesystem_.DataSync(sfd.get())) {
+    return absl_ports::InternalError("Fail to sync metadata to disk");
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::PersistStoragesToDisk(
+    bool force) {
+  if (!force && !is_storage_dirty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  ICING_RETURN_IF_ERROR(doc_join_info_mapper_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(qualified_id_storage_->PersistToDisk());
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<Crc32>
+QualifiedIdJoinIndexImplV1::ComputeInfoChecksum(bool force) {
+  if (!force && !is_info_dirty()) {
+    return Crc32(crcs().component_crcs.info_crc);
+  }
+
+  return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32>
+QualifiedIdJoinIndexImplV1::ComputeStoragesChecksum(bool force) {
+  if (!force && !is_storage_dirty()) {
+    return Crc32(crcs().component_crcs.storages_crc);
+  }
+
+  ICING_ASSIGN_OR_RETURN(Crc32 doc_join_info_mapper_crc,
+                         doc_join_info_mapper_->ComputeChecksum());
+  ICING_ASSIGN_OR_RETURN(Crc32 qualified_id_storage_crc,
+                         qualified_id_storage_->ComputeChecksum());
+
+  return Crc32(doc_join_info_mapper_crc.Get() ^ qualified_id_storage_crc.Get());
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/qualified-id-join-index-impl-v1.h b/icing/join/qualified-id-join-index-impl-v1.h
new file mode 100644
index 0000000..9314602
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v1.h
@@ -0,0 +1,327 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V1_H_
+#define ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V1_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// QualifiedIdJoinIndexImplV1: a class to maintain data mapping DocJoinInfo to
+// joinable qualified ids and delete propagation info.
+class QualifiedIdJoinIndexImplV1 : public QualifiedIdJoinIndex {
+ public:
+  struct Info {
+    static constexpr int32_t kMagic = 0x48cabdc6;
+
+    int32_t magic;
+    DocumentId last_added_document_id;
+
+    Crc32 ComputeChecksum() const {
+      return Crc32(
+          std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
+    }
+  } __attribute__((packed));
+  static_assert(sizeof(Info) == 8, "");
+
+  // Metadata file layout: <Crcs><Info>
+  static constexpr int32_t kCrcsMetadataBufferOffset = 0;
+  static constexpr int32_t kInfoMetadataBufferOffset =
+      static_cast<int32_t>(sizeof(Crcs));
+  static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+  static_assert(kMetadataFileSize == 20, "");
+
+  // Creates a QualifiedIdJoinIndexImplV1 instance to store qualified ids for
+  // future joining search. If any of the underlying file is missing, then
+  // delete the whole working_path and (re)initialize with new ones. Otherwise
+  // initialize and create the instance by existing files.
+  //
+  // filesystem: Object to make system level calls
+  // working_path: Specifies the working path for PersistentStorage.
+  //               QualifiedIdJoinIndexImplV1 uses working path as working
+  //               directory and all related files will be stored under this
+  //               directory. It takes full ownership and of working_path_,
+  //               including creation/deletion. It is the caller's
+  //               responsibility to specify correct working path and avoid
+  //               mixing different persistent storages together under the same
+  //               path. Also the caller has the ownership for the parent
+  //               directory of working_path_, and it is responsible for parent
+  //               directory creation/deletion. See PersistentStorage for more
+  //               details about the concept of working_path.
+  // pre_mapping_fbv: flag indicating whether memory map max possible file size
+  //                  for underlying FileBackedVector before growing the actual
+  //                  file size.
+  // use_persistent_hash_map: flag indicating whether use persistent hash map as
+  //                          the key mapper (if false, then fall back to
+  //                          dynamic trie key mapper).
+  //
+  // Returns:
+  //   - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+  //                               checksum
+  //   - INTERNAL_ERROR on I/O errors
+  //   - Any KeyMapper errors
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+  Create(const Filesystem& filesystem, std::string working_path,
+         bool pre_mapping_fbv, bool use_persistent_hash_map);
+
+  // Delete copy and move constructor/assignment operator.
+  QualifiedIdJoinIndexImplV1(const QualifiedIdJoinIndexImplV1&) = delete;
+  QualifiedIdJoinIndexImplV1& operator=(const QualifiedIdJoinIndexImplV1&) =
+      delete;
+
+  QualifiedIdJoinIndexImplV1(QualifiedIdJoinIndexImplV1&&) = delete;
+  QualifiedIdJoinIndexImplV1& operator=(QualifiedIdJoinIndexImplV1&&) = delete;
+
+  ~QualifiedIdJoinIndexImplV1() override;
+
+  // v2 only API. Returns UNIMPLEMENTED_ERROR.
+  libtextclassifier3::Status Put(SchemaTypeId schema_type_id,
+                                 JoinablePropertyId joinable_property_id,
+                                 DocumentId document_id,
+                                 std::vector<NamespaceFingerprintIdentifier>&&
+                                     ref_namespace_fingerprint_ids) override {
+    return absl_ports::UnimplementedError("This API is not supported in V2");
+  }
+
+  // v2 only API. Returns UNIMPLEMENTED_ERROR.
+  libtextclassifier3::StatusOr<std::unique_ptr<JoinDataIteratorBase>>
+  GetIterator(SchemaTypeId schema_type_id,
+              JoinablePropertyId joinable_property_id) const override {
+    return absl_ports::UnimplementedError("This API is not supported in V2");
+  }
+
+  // Puts a new data into index: DocJoinInfo (DocumentId, JoinablePropertyId)
+  // references to ref_qualified_id_str (the identifier of another document).
+  //
+  // REQUIRES: ref_qualified_id_str contains no '\0'.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
+  //   - Any KeyMapper errors
+  libtextclassifier3::Status Put(
+      const DocJoinInfo& doc_join_info,
+      std::string_view ref_qualified_id_str) override;
+
+  // Gets the referenced document's qualified id string by DocJoinInfo.
+  //
+  // Returns:
+  //   - A qualified id string referenced by the given DocJoinInfo (DocumentId,
+  //     JoinablePropertyId) on success
+  //   - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
+  //   - NOT_FOUND_ERROR if doc_join_info doesn't exist
+  //   - Any KeyMapper errors
+  libtextclassifier3::StatusOr<std::string_view> Get(
+      const DocJoinInfo& doc_join_info) const override;
+
+  // Reduces internal file sizes by reclaiming space and ids of deleted
+  // documents. Qualified id type joinable index will convert all entries to the
+  // new document ids.
+  //
+  // - document_id_old_to_new: a map for converting old document id to new
+  //   document id.
+  // - namespace_id_old_to_new: a map for converting old namespace id to new
+  //   namespace id. It is unused in this implementation since we store raw
+  //   qualified id string (which contains raw namespace string).
+  // - new_last_added_document_id: will be used to update the last added
+  //                               document id in the qualified id type joinable
+  //                               index.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error. This could potentially leave the index in
+  //     an invalid state and the caller should handle it properly (e.g. discard
+  //     and rebuild)
+  libtextclassifier3::Status Optimize(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      const std::vector<NamespaceId>& namespace_id_old_to_new,
+      DocumentId new_last_added_document_id) override;
+
+  // Clears all data and set last_added_document_id to kInvalidDocumentId.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status Clear() override;
+
+  bool is_v2() const override { return false; }
+
+  int32_t size() const override { return doc_join_info_mapper_->num_keys(); }
+
+  bool empty() const override { return size() == 0; }
+
+  DocumentId last_added_document_id() const override {
+    return info().last_added_document_id;
+  }
+
+  void set_last_added_document_id(DocumentId document_id) override {
+    SetInfoDirty();
+
+    Info& info_ref = info();
+    if (info_ref.last_added_document_id == kInvalidDocumentId ||
+        document_id > info_ref.last_added_document_id) {
+      info_ref.last_added_document_id = document_id;
+    }
+  }
+
+ private:
+  explicit QualifiedIdJoinIndexImplV1(
+      const Filesystem& filesystem, std::string&& working_path,
+      std::unique_ptr<uint8_t[]> metadata_buffer,
+      std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper,
+      std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
+      bool pre_mapping_fbv, bool use_persistent_hash_map)
+      : QualifiedIdJoinIndex(filesystem, std::move(working_path)),
+        metadata_buffer_(std::move(metadata_buffer)),
+        doc_join_info_mapper_(std::move(doc_join_info_mapper)),
+        qualified_id_storage_(std::move(qualified_id_storage)),
+        pre_mapping_fbv_(pre_mapping_fbv),
+        use_persistent_hash_map_(use_persistent_hash_map),
+        is_info_dirty_(false),
+        is_storage_dirty_(false) {}
+
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+  InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+                     bool pre_mapping_fbv, bool use_persistent_hash_map);
+
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+  InitializeExistingFiles(const Filesystem& filesystem,
+                          std::string&& working_path, bool pre_mapping_fbv,
+                          bool use_persistent_hash_map);
+
+  // Transfers qualified id join index data from the current to new_index and
+  // convert to new document id according to document_id_old_to_new. It is a
+  // helper function for Optimize.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status TransferIndex(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      QualifiedIdJoinIndexImplV1* new_index) const;
+
+  // Flushes contents of metadata file.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
+
+  // Flushes contents of all storages to underlying files.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
+
+  // Computes and returns Info checksum.
+  //
+  // Returns:
+  //   - Crc of the Info on success
+  libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
+
+  // Computes and returns all storages checksum.
+  //
+  // Returns:
+  //   - Crc of all storages on success
+  //   - INTERNAL_ERROR if any data inconsistency
+  libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+      bool force) override;
+
+  Crcs& crcs() override {
+    return *reinterpret_cast<Crcs*>(metadata_buffer_.get() +
+                                    kCrcsMetadataBufferOffset);
+  }
+
+  const Crcs& crcs() const override {
+    return *reinterpret_cast<const Crcs*>(metadata_buffer_.get() +
+                                          kCrcsMetadataBufferOffset);
+  }
+
+  Info& info() {
+    return *reinterpret_cast<Info*>(metadata_buffer_.get() +
+                                    kInfoMetadataBufferOffset);
+  }
+
+  const Info& info() const {
+    return *reinterpret_cast<const Info*>(metadata_buffer_.get() +
+                                          kInfoMetadataBufferOffset);
+  }
+
+  void SetInfoDirty() { is_info_dirty_ = true; }
+  // When storage is dirty, we have to set info dirty as well. So just expose
+  // SetDirty to set both.
+  void SetDirty() {
+    is_info_dirty_ = true;
+    is_storage_dirty_ = true;
+  }
+
+  bool is_info_dirty() const { return is_info_dirty_; }
+  bool is_storage_dirty() const { return is_storage_dirty_; }
+
+  // Metadata buffer
+  std::unique_ptr<uint8_t[]> metadata_buffer_;
+
+  // Persistent KeyMapper for mapping (encoded) DocJoinInfo (DocumentId,
+  // JoinablePropertyId) to another referenced document's qualified id string
+  // index in qualified_id_storage_.
+  std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper_;
+
+  // Storage for qualified id strings.
+  std::unique_ptr<FileBackedVector<char>> qualified_id_storage_;
+
+  // TODO(b/268521214): add delete propagation storage
+
+  // Flag indicating whether memory map max possible file size for underlying
+  // FileBackedVector before growing the actual file size.
+  bool pre_mapping_fbv_;
+
+  // Flag indicating whether use persistent hash map as the key mapper (if
+  // false, then fall back to dynamic trie key mapper).
+  bool use_persistent_hash_map_;
+
+  bool is_info_dirty_;
+  bool is_storage_dirty_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V1_H_
diff --git a/icing/join/qualified-id-join-index-impl-v1_test.cc b/icing/join/qualified-id-join-index-impl-v1_test.cc
new file mode 100644
index 0000000..a6e19bb
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v1_test.cc
@@ -0,0 +1,931 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-index-impl-v1.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/store/document-id.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+using ::testing::Lt;
+using ::testing::Ne;
+using ::testing::Not;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+
+using Crcs = PersistentStorage::Crcs;
+using Info = QualifiedIdJoinIndexImplV1::Info;
+
+static constexpr int32_t kCorruptedValueOffset = 3;
+
+struct QualifiedIdJoinIndexImplV1TestParam {
+  bool pre_mapping_fbv;
+  bool use_persistent_hash_map;
+
+  explicit QualifiedIdJoinIndexImplV1TestParam(bool pre_mapping_fbv_in,
+                                               bool use_persistent_hash_map_in)
+      : pre_mapping_fbv(pre_mapping_fbv_in),
+        use_persistent_hash_map(use_persistent_hash_map_in) {}
+};
+
+class QualifiedIdJoinIndexImplV1Test
+    : public ::testing::TestWithParam<QualifiedIdJoinIndexImplV1TestParam> {
+ protected:
+  void SetUp() override {
+    base_dir_ = GetTestTempDir() + "/icing";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    working_path_ = base_dir_ + "/qualified_id_join_index_test";
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  std::string base_dir_;
+  std::string working_path_;
+};
+
+TEST_P(QualifiedIdJoinIndexImplV1Test, InvalidWorkingPath) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(
+                  filesystem_, "/dev/null/qualified_id_join_index_test",
+                  param.pre_mapping_fbv, param.use_persistent_hash_map),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test, InitializeNewFiles) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  {
+    // Create new qualified id join index
+    ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
+    EXPECT_THAT(index, Pointee(IsEmpty()));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  // Metadata file should be initialized correctly for both info and crcs
+  // sections.
+  const std::string metadata_file_path =
+      absl_ports::StrCat(working_path_, "/metadata");
+  auto metadata_buffer = std::make_unique<uint8_t[]>(
+      QualifiedIdJoinIndexImplV1::kMetadataFileSize);
+  ASSERT_THAT(
+      filesystem_.PRead(metadata_file_path.c_str(), metadata_buffer.get(),
+                        QualifiedIdJoinIndexImplV1::kMetadataFileSize,
+                        /*offset=*/0),
+      IsTrue());
+
+  // Check info section
+  const Info* info = reinterpret_cast<const Info*>(
+      metadata_buffer.get() +
+      QualifiedIdJoinIndexImplV1::kInfoMetadataBufferOffset);
+  EXPECT_THAT(info->magic, Eq(Info::kMagic));
+  EXPECT_THAT(info->last_added_document_id, Eq(kInvalidDocumentId));
+
+  // Check crcs section
+  const Crcs* crcs = reinterpret_cast<const Crcs*>(
+      metadata_buffer.get() +
+      QualifiedIdJoinIndexImplV1::kCrcsMetadataBufferOffset);
+  // There are some initial info in KeyMapper, so storages_crc should be
+  // non-zero.
+  EXPECT_THAT(crcs->component_crcs.storages_crc, Ne(0));
+  EXPECT_THAT(crcs->component_crcs.info_crc,
+              Eq(Crc32(std::string_view(reinterpret_cast<const char*>(info),
+                                        sizeof(Info)))
+                     .Get()));
+  EXPECT_THAT(crcs->all_crc,
+              Eq(Crc32(std::string_view(
+                           reinterpret_cast<const char*>(&crcs->component_crcs),
+                           sizeof(Crcs::ComponentCrcs)))
+                     .Get()));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test,
+       InitializationShouldFailWithoutPersistToDiskOrDestruction) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
+
+  // Insert some data.
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+                 /*ref_qualified_id_str=*/"namespace#uriA"));
+  ICING_ASSERT_OK(index->PersistToDisk());
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20),
+                 /*ref_qualified_id_str=*/"namespace#uriB"));
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20),
+                 /*ref_qualified_id_str=*/"namespace#uriC"));
+
+  // Without calling PersistToDisk, checksums will not be recomputed or synced
+  // to disk, so initializing another instance on the same files should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 param.use_persistent_hash_map),
+              StatusIs(param.use_persistent_hash_map
+                           ? libtextclassifier3::StatusCode::FAILED_PRECONDITION
+                           : libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test,
+       InitializationShouldSucceedWithPersistToDisk) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index1,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
+
+  // Insert some data.
+  ICING_ASSERT_OK(
+      index1->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+                  /*ref_qualified_id_str=*/"namespace#uriA"));
+  ICING_ASSERT_OK(
+      index1->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20),
+                  /*ref_qualified_id_str=*/"namespace#uriB"));
+  ICING_ASSERT_OK(
+      index1->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20),
+                  /*ref_qualified_id_str=*/"namespace#uriC"));
+  ASSERT_THAT(index1, Pointee(SizeIs(3)));
+
+  // After calling PersistToDisk, all checksums should be recomputed and synced
+  // correctly to disk, so initializing another instance on the same files
+  // should succeed, and we should be able to get the same contents.
+  ICING_EXPECT_OK(index1->PersistToDisk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index2,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
+  EXPECT_THAT(index2, Pointee(SizeIs(3)));
+  EXPECT_THAT(
+      index2->Get(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20)),
+      IsOkAndHolds(/*ref_qualified_id_str=*/"namespace#uriA"));
+  EXPECT_THAT(
+      index2->Get(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20)),
+      IsOkAndHolds(/*ref_qualified_id_str=*/"namespace#uriB"));
+  EXPECT_THAT(
+      index2->Get(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20)),
+      IsOkAndHolds(/*ref_qualified_id_str=*/"namespace#uriC"));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test,
+       InitializationShouldSucceedAfterDestruction) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
+
+    // Insert some data.
+    ICING_ASSERT_OK(
+        index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+                   /*ref_qualified_id_str=*/"namespace#uriA"));
+    ICING_ASSERT_OK(
+        index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20),
+                   /*ref_qualified_id_str=*/"namespace#uriB"));
+    ICING_ASSERT_OK(
+        index->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/20),
+                   /*ref_qualified_id_str=*/"namespace#uriC"));
+    ASSERT_THAT(index, Pointee(SizeIs(3)));
+  }
+
+  {
+    // The previous instance went out of scope and was destructed. Although we
+    // didn't call PersistToDisk explicitly, the destructor should invoke it and
+    // thus initializing another instance on the same files should succeed, and
+    // we should be able to get the same contents.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
+    EXPECT_THAT(index, Pointee(SizeIs(3)));
+    EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/1,
+                                       /*joinable_property_id=*/20)),
+                IsOkAndHolds("namespace#uriA"));
+    EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/3,
+                                       /*joinable_property_id=*/20)),
+                IsOkAndHolds("namespace#uriB"));
+    EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/5,
+                                       /*joinable_property_id=*/20)),
+                IsOkAndHolds("namespace#uriC"));
+  }
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test,
+       InitializeExistingFilesWithDifferentMagicShouldFail) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
+    ICING_ASSERT_OK(
+        index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+                   /*ref_qualified_id_str=*/"namespace#uriA"));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  {
+    // Manually change magic and update checksum
+    const std::string metadata_file_path =
+        absl_ports::StrCat(working_path_, "/metadata");
+    ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+    ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+    auto metadata_buffer = std::make_unique<uint8_t[]>(
+        QualifiedIdJoinIndexImplV1::kMetadataFileSize);
+    ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+                                  QualifiedIdJoinIndexImplV1::kMetadataFileSize,
+                                  /*offset=*/0),
+                IsTrue());
+
+    // Manually change magic and update checksums.
+    Crcs* crcs = reinterpret_cast<Crcs*>(
+        metadata_buffer.get() +
+        QualifiedIdJoinIndexImplV1::kCrcsMetadataBufferOffset);
+    Info* info = reinterpret_cast<Info*>(
+        metadata_buffer.get() +
+        QualifiedIdJoinIndexImplV1::kInfoMetadataBufferOffset);
+    info->magic += kCorruptedValueOffset;
+    crcs->component_crcs.info_crc = info->ComputeChecksum().Get();
+    crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get();
+    ASSERT_THAT(filesystem_.PWrite(
+                    metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+                    QualifiedIdJoinIndexImplV1::kMetadataFileSize),
+                IsTrue());
+  }
+
+  // Attempt to create the qualified id join index with different magic. This
+  // should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 param.use_persistent_hash_map),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+                       HasSubstr("Incorrect magic value")));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test,
+       InitializeExistingFilesWithWrongAllCrcShouldFail) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
+    ICING_ASSERT_OK(
+        index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+                   /*ref_qualified_id_str=*/"namespace#uriA"));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  {
+    const std::string metadata_file_path =
+        absl_ports::StrCat(working_path_, "/metadata");
+    ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+    ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+    auto metadata_buffer = std::make_unique<uint8_t[]>(
+        QualifiedIdJoinIndexImplV1::kMetadataFileSize);
+    ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+                                  QualifiedIdJoinIndexImplV1::kMetadataFileSize,
+                                  /*offset=*/0),
+                IsTrue());
+
+    // Manually corrupt all_crc
+    Crcs* crcs = reinterpret_cast<Crcs*>(
+        metadata_buffer.get() +
+        QualifiedIdJoinIndexImplV1::kCrcsMetadataBufferOffset);
+    crcs->all_crc += kCorruptedValueOffset;
+
+    ASSERT_THAT(filesystem_.PWrite(
+                    metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+                    QualifiedIdJoinIndexImplV1::kMetadataFileSize),
+                IsTrue());
+  }
+
+  // Attempt to create the qualified id join index with metadata containing
+  // corrupted all_crc. This should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 param.use_persistent_hash_map),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+                       HasSubstr("Invalid all crc")));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test,
+       InitializeExistingFilesWithCorruptedInfoShouldFail) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
+    ICING_ASSERT_OK(
+        index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+                   /*ref_qualified_id_str=*/"namespace#uriA"));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  {
+    const std::string metadata_file_path =
+        absl_ports::StrCat(working_path_, "/metadata");
+    ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+    ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+    auto metadata_buffer = std::make_unique<uint8_t[]>(
+        QualifiedIdJoinIndexImplV1::kMetadataFileSize);
+    ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+                                  QualifiedIdJoinIndexImplV1::kMetadataFileSize,
+                                  /*offset=*/0),
+                IsTrue());
+
+    // Modify info, but don't update the checksum. This would be similar to
+    // corruption of info.
+    Info* info = reinterpret_cast<Info*>(
+        metadata_buffer.get() +
+        QualifiedIdJoinIndexImplV1::kInfoMetadataBufferOffset);
+    info->last_added_document_id += kCorruptedValueOffset;
+
+    ASSERT_THAT(filesystem_.PWrite(
+                    metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+                    QualifiedIdJoinIndexImplV1::kMetadataFileSize),
+                IsTrue());
+  }
+
+  // Attempt to create the qualified id join index with info that doesn't match
+  // its checksum. This should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 param.use_persistent_hash_map),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+                       HasSubstr("Invalid info crc")));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test,
+       InitializeExistingFilesWithCorruptedDocJoinInfoMapperShouldFail) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
+    ICING_ASSERT_OK(
+        index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+                   /*ref_qualified_id_str=*/"namespace#uriA"));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  // Corrupt doc_join_info_mapper manually.
+  {
+    std::string mapper_working_path =
+        absl_ports::StrCat(working_path_, "/doc_join_info_mapper");
+    std::unique_ptr<KeyMapper<int32_t>> mapper;
+    if (param.use_persistent_hash_map) {
+      ICING_ASSERT_OK_AND_ASSIGN(
+          mapper, PersistentHashMapKeyMapper<int32_t>::Create(
+                      filesystem_, std::move(mapper_working_path),
+                      param.pre_mapping_fbv));
+    } else {
+      ICING_ASSERT_OK_AND_ASSIGN(mapper,
+                                 DynamicTrieKeyMapper<int32_t>::Create(
+                                     filesystem_, mapper_working_path,
+                                     /*maximum_size_bytes=*/128 * 1024 * 1024));
+    }
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, mapper->ComputeChecksum());
+    ICING_ASSERT_OK(mapper->Put("foo", 12345));
+    ICING_ASSERT_OK(mapper->PersistToDisk());
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, mapper->ComputeChecksum());
+    ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+  }
+
+  // Attempt to create the qualified id join index with corrupted
+  // doc_join_info_mapper. This should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 param.use_persistent_hash_map),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+                       HasSubstr("Invalid storages crc")));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test,
+       InitializeExistingFilesWithCorruptedQualifiedIdStorageShouldFail) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
+    ICING_ASSERT_OK(
+        index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+                   /*ref_qualified_id_str=*/"namespace#uriA"));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  {
+    // Corrupt qualified_id_storage manually.
+    std::string qualified_id_storage_path =
+        absl_ports::StrCat(working_path_, "/qualified_id_storage");
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
+        FileBackedVector<char>::Create(
+            filesystem_, qualified_id_storage_path,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc,
+                               qualified_id_storage->ComputeChecksum());
+    ICING_ASSERT_OK(qualified_id_storage->Append('a'));
+    ICING_ASSERT_OK(qualified_id_storage->Append('b'));
+    ICING_ASSERT_OK(qualified_id_storage->PersistToDisk());
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc,
+                               qualified_id_storage->ComputeChecksum());
+    ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+  }
+
+  // Attempt to create the qualified id join index with corrupted
+  // qualified_id_storage. This should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 param.use_persistent_hash_map),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+                       HasSubstr("Invalid storages crc")));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test, InvalidPut) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
+
+  DocJoinInfo default_invalid;
+  EXPECT_THAT(
+      index->Put(default_invalid, /*ref_qualified_id_str=*/"namespace#uriA"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test, InvalidGet) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
+
+  DocJoinInfo default_invalid;
+  EXPECT_THAT(index->Get(default_invalid),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test, PutAndGet) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  DocJoinInfo target_info1(/*document_id=*/1, /*joinable_property_id=*/20);
+  std::string_view ref_qualified_id_str_a = "namespace#uriA";
+
+  DocJoinInfo target_info2(/*document_id=*/3, /*joinable_property_id=*/13);
+  std::string_view ref_qualified_id_str_b = "namespace#uriB";
+
+  DocJoinInfo target_info3(/*document_id=*/4, /*joinable_property_id=*/4);
+  std::string_view ref_qualified_id_str_c = "namespace#uriC";
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
+
+    EXPECT_THAT(index->Put(target_info1, ref_qualified_id_str_a), IsOk());
+    EXPECT_THAT(index->Put(target_info2, ref_qualified_id_str_b), IsOk());
+    EXPECT_THAT(index->Put(target_info3, ref_qualified_id_str_c), IsOk());
+    EXPECT_THAT(index, Pointee(SizeIs(3)));
+
+    EXPECT_THAT(index->Get(target_info1), IsOkAndHolds(ref_qualified_id_str_a));
+    EXPECT_THAT(index->Get(target_info2), IsOkAndHolds(ref_qualified_id_str_b));
+    EXPECT_THAT(index->Get(target_info3), IsOkAndHolds(ref_qualified_id_str_c));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  // Verify we can get all of them after destructing and re-initializing.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
+  EXPECT_THAT(index, Pointee(SizeIs(3)));
+  EXPECT_THAT(index->Get(target_info1), IsOkAndHolds(ref_qualified_id_str_a));
+  EXPECT_THAT(index->Get(target_info2), IsOkAndHolds(ref_qualified_id_str_b));
+  EXPECT_THAT(index->Get(target_info3), IsOkAndHolds(ref_qualified_id_str_c));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test, GetShouldReturnNotFoundErrorIfNotExist) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  DocJoinInfo target_info(/*document_id=*/1, /*joinable_property_id=*/20);
+  std::string_view ref_qualified_id_str = "namespace#uriA";
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
+
+  // Verify entry is not found in the beginning.
+  EXPECT_THAT(index->Get(target_info),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  ICING_ASSERT_OK(index->Put(target_info, ref_qualified_id_str));
+  ASSERT_THAT(index->Get(target_info), IsOkAndHolds(ref_qualified_id_str));
+
+  // Get another non-existing entry. This should get NOT_FOUND_ERROR.
+  DocJoinInfo another_target_info(/*document_id=*/2,
+                                  /*joinable_property_id=*/20);
+  EXPECT_THAT(index->Get(another_target_info),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test, SetLastAddedDocumentId) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
+
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  constexpr DocumentId kDocumentId = 100;
+  index->set_last_added_document_id(kDocumentId);
+  EXPECT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+
+  constexpr DocumentId kNextDocumentId = 123;
+  index->set_last_added_document_id(kNextDocumentId);
+  EXPECT_THAT(index->last_added_document_id(), Eq(kNextDocumentId));
+}
+
+TEST_P(
+    QualifiedIdJoinIndexImplV1Test,
+    SetLastAddedDocumentIdShouldIgnoreNewDocumentIdNotGreaterThanTheCurrent) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
+
+  constexpr DocumentId kDocumentId = 123;
+  index->set_last_added_document_id(kDocumentId);
+  ASSERT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+
+  constexpr DocumentId kNextDocumentId = 100;
+  ASSERT_THAT(kNextDocumentId, Lt(kDocumentId));
+  index->set_last_added_document_id(kNextDocumentId);
+  // last_added_document_id() should remain unchanged.
+  EXPECT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test, Optimize) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
+
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/10),
+                 /*ref_qualified_id_str=*/"namespace#uriA"));
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/3),
+                 /*ref_qualified_id_str=*/"namespace#uriA"));
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/8, /*joinable_property_id=*/9),
+                 /*ref_qualified_id_str=*/"namespace#uriB"));
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/13, /*joinable_property_id=*/4),
+                 /*ref_qualified_id_str=*/"namespace#uriC"));
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/21, /*joinable_property_id=*/12),
+                 /*ref_qualified_id_str=*/"namespace#uriC"));
+  index->set_last_added_document_id(21);
+
+  ASSERT_THAT(index, Pointee(SizeIs(5)));
+
+  // Delete doc id = 5, 8, compress and keep the rest.
+  std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
+  document_id_old_to_new[3] = 0;
+  document_id_old_to_new[13] = 1;
+  document_id_old_to_new[21] = 2;
+
+  DocumentId new_last_added_document_id = 2;
+  EXPECT_THAT(
+      index->Optimize(document_id_old_to_new, /*namespace_id_old_to_new=*/{},
+                      new_last_added_document_id),
+      IsOk());
+  EXPECT_THAT(index, Pointee(SizeIs(3)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+  // Verify Put and Get API still work normally after Optimize().
+  // (old_doc_id=3, joinable_property_id=10), which is now (doc_id=0,
+  // joinable_property_id=10), has referenced qualified id str =
+  // "namespace#uriA".
+  EXPECT_THAT(
+      index->Get(DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/10)),
+      IsOkAndHolds("namespace#uriA"));
+
+  // (old_doc_id=5, joinable_property_id=3) and (old_doc_id=8,
+  // joinable_property_id=9) are now not found since we've deleted old_doc_id =
+  // 5, 8. It is not testable via Get() because there is no valid doc_id mapping
+  // for old_doc_id = 5, 8 and we cannot generate a valid DocJoinInfo for it.
+
+  // (old_doc_id=13, joinable_property_id=4), which is now (doc_id=1,
+  // joinable_property_id=4), has referenced qualified id str =
+  // "namespace#uriC".
+  EXPECT_THAT(
+      index->Get(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/4)),
+      IsOkAndHolds("namespace#uriC"));
+
+  // (old_doc_id=21, joinable_property_id=12), which is now (doc_id=2,
+  // joinable_property_id=12), has referenced qualified id str =
+  // "namespace#uriC".
+  EXPECT_THAT(
+      index->Get(DocJoinInfo(/*document_id=*/2, /*joinable_property_id=*/12)),
+      IsOkAndHolds("namespace#uriC"));
+
+  // Joinable index should be able to work normally after Optimize().
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/99, /*joinable_property_id=*/2),
+                 /*ref_qualified_id_str=*/"namespace#uriD"));
+  index->set_last_added_document_id(99);
+
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(99));
+  EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/99,
+                                     /*joinable_property_id=*/2)),
+              IsOkAndHolds("namespace#uriD"));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test, OptimizeOutOfRangeDocumentId) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
+
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/99, /*joinable_property_id=*/10),
+                 /*ref_qualified_id_str=*/"namespace#uriA"));
+  index->set_last_added_document_id(99);
+
+  // Create document_id_old_to_new with size = 1. Optimize should handle out of
+  // range DocumentId properly.
+  std::vector<DocumentId> document_id_old_to_new = {kInvalidDocumentId};
+
+  // There shouldn't be any error due to vector index.
+  EXPECT_THAT(
+      index->Optimize(document_id_old_to_new, /*namespace_id_old_to_new=*/{},
+                      /*new_last_added_document_id=*/kInvalidDocumentId),
+      IsOk());
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  // Verify all data are discarded after Optimize().
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test, OptimizeDeleteAll) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
+
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/10),
+                 /*ref_qualified_id_str=*/"namespace#uriA"));
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/5, /*joinable_property_id=*/3),
+                 /*ref_qualified_id_str=*/"namespace#uriA"));
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/8, /*joinable_property_id=*/9),
+                 /*ref_qualified_id_str=*/"namespace#uriB"));
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/13, /*joinable_property_id=*/4),
+                 /*ref_qualified_id_str=*/"namespace#uriC"));
+  ICING_ASSERT_OK(
+      index->Put(DocJoinInfo(/*document_id=*/21, /*joinable_property_id=*/12),
+                 /*ref_qualified_id_str=*/"namespace#uriC"));
+  index->set_last_added_document_id(21);
+
+  // Delete all documents.
+  std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
+
+  EXPECT_THAT(
+      index->Optimize(document_id_old_to_new, /*namespace_id_old_to_new=*/{},
+                      /*new_last_added_document_id=*/kInvalidDocumentId),
+      IsOk());
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  // Verify all data are discarded after Optimize().
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test, Clear) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  DocJoinInfo target_info1(/*document_id=*/1, /*joinable_property_id=*/20);
+  DocJoinInfo target_info2(/*document_id=*/3, /*joinable_property_id=*/5);
+  DocJoinInfo target_info3(/*document_id=*/6, /*joinable_property_id=*/13);
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
+  ICING_ASSERT_OK(
+      index->Put(target_info1, /*ref_qualified_id_str=*/"namespace#uriA"));
+  ICING_ASSERT_OK(
+      index->Put(target_info2, /*ref_qualified_id_str=*/"namespace#uriB"));
+  ICING_ASSERT_OK(
+      index->Put(target_info3, /*ref_qualified_id_str=*/"namespace#uriC"));
+  ASSERT_THAT(index, Pointee(SizeIs(3)));
+  index->set_last_added_document_id(6);
+  ASSERT_THAT(index->last_added_document_id(), Eq(6));
+
+  // After resetting, last_added_document_id should be set to
+  // kInvalidDocumentId, and the previous added data should be deleted.
+  EXPECT_THAT(index->Clear(), IsOk());
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+  EXPECT_THAT(index->Get(target_info1),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(index->Get(target_info2),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(index->Get(target_info3),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Join index should be able to work normally after Clear().
+  DocJoinInfo target_info4(/*document_id=*/2, /*joinable_property_id=*/19);
+  ICING_ASSERT_OK(
+      index->Put(target_info4, /*ref_qualified_id_str=*/"namespace#uriD"));
+  index->set_last_added_document_id(2);
+
+  EXPECT_THAT(index->last_added_document_id(), Eq(2));
+  EXPECT_THAT(index->Get(target_info4), IsOkAndHolds("namespace#uriD"));
+
+  ICING_ASSERT_OK(index->PersistToDisk());
+  index.reset();
+
+  // Verify index after reconstructing.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index, QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                param.pre_mapping_fbv,
+                                                param.use_persistent_hash_map));
+  EXPECT_THAT(index->last_added_document_id(), Eq(2));
+  EXPECT_THAT(index->Get(target_info1),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(index->Get(target_info2),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(index->Get(target_info3),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(index->Get(target_info4), IsOkAndHolds("namespace#uriD"));
+}
+
+TEST_P(QualifiedIdJoinIndexImplV1Test, SwitchKeyMapperTypeShouldReturnError) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
+    ICING_ASSERT_OK(
+        index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
+                   /*ref_qualified_id_str=*/"namespace#uriA"));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  bool switch_key_mapper_flag = !param.use_persistent_hash_map;
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 switch_key_mapper_flag),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    QualifiedIdJoinIndexImplV1Test, QualifiedIdJoinIndexImplV1Test,
+    testing::Values(QualifiedIdJoinIndexImplV1TestParam(
+                        /*pre_mapping_fbv_in=*/true,
+                        /*use_persistent_hash_map_in=*/true),
+                    QualifiedIdJoinIndexImplV1TestParam(
+                        /*pre_mapping_fbv_in=*/true,
+                        /*use_persistent_hash_map_in=*/false),
+                    QualifiedIdJoinIndexImplV1TestParam(
+                        /*pre_mapping_fbv_in=*/false,
+                        /*use_persistent_hash_map_in=*/true),
+                    QualifiedIdJoinIndexImplV1TestParam(
+                        /*pre_mapping_fbv_in=*/false,
+                        /*use_persistent_hash_map_in=*/false)));
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/qualified-id-join-index-impl-v2.cc b/icing/join/qualified-id-join-index-impl-v2.cc
new file mode 100644
index 0000000..70fd13c
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v2.cc
@@ -0,0 +1,681 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-index-impl-v2.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-accessor.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/posting-list-join-data-accessor.h"
+#include "icing/join/posting-list-join-data-serializer.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/util/crc32.h"
+#include "icing/util/encode-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Set 1M for max # of qualified id entries and 10 bytes for key-value bytes.
+// This will take at most 23 MiB disk space and mmap for persistent hash map.
+static constexpr int32_t kSchemaJoinableIdToPostingListMapperMaxNumEntries =
+    1 << 20;
+static constexpr int32_t kSchemaJoinableIdToPostingListMapperAverageKVByteSize =
+    10;
+
+inline DocumentId GetNewDocumentId(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    DocumentId old_document_id) {
+  if (old_document_id >= document_id_old_to_new.size()) {
+    return kInvalidDocumentId;
+  }
+  return document_id_old_to_new[old_document_id];
+}
+
+inline NamespaceId GetNewNamespaceId(
+    const std::vector<NamespaceId>& namespace_id_old_to_new,
+    NamespaceId namespace_id) {
+  if (namespace_id >= namespace_id_old_to_new.size()) {
+    return kInvalidNamespaceId;
+  }
+  return namespace_id_old_to_new[namespace_id];
+}
+
+libtextclassifier3::StatusOr<PostingListIdentifier> GetPostingListIdentifier(
+    const KeyMapper<PostingListIdentifier>&
+        schema_joinable_id_to_posting_list_mapper,
+    const std::string& encoded_schema_type_joinable_property_id_str) {
+  auto posting_list_identifier_or =
+      schema_joinable_id_to_posting_list_mapper.Get(
+          encoded_schema_type_joinable_property_id_str);
+  if (!posting_list_identifier_or.ok()) {
+    if (absl_ports::IsNotFound(posting_list_identifier_or.status())) {
+      // Not found. Return invalid posting list id.
+      return PostingListIdentifier::kInvalid;
+    }
+    // Real error.
+    return posting_list_identifier_or;
+  }
+  return std::move(posting_list_identifier_or).ValueOrDie();
+}
+
+libtextclassifier3::StatusOr<std::string> EncodeSchemaTypeJoinablePropertyId(
+    SchemaTypeId schema_type_id, JoinablePropertyId joinable_property_id) {
+  if (schema_type_id < 0) {
+    return absl_ports::InvalidArgumentError("Invalid schema type id");
+  }
+
+  if (!IsJoinablePropertyIdValid(joinable_property_id)) {
+    return absl_ports::InvalidArgumentError("Invalid joinable property id");
+  }
+
+  static constexpr int kEncodedSchemaTypeIdLength = 3;
+
+  // encoded_schema_type_id_str should be 1 to 3 bytes based on the value of
+  // schema_type_id.
+  std::string encoded_schema_type_id_str =
+      encode_util::EncodeIntToCString(schema_type_id);
+  // Make encoded_schema_type_id_str to fixed kEncodedSchemaTypeIdLength bytes.
+  while (encoded_schema_type_id_str.size() < kEncodedSchemaTypeIdLength) {
+    // C string cannot contain 0 bytes, so we append it using 1, just like what
+    // we do in encode_util::EncodeIntToCString.
+    //
+    // The reason that this works is because DecodeIntToString decodes a byte
+    // value of 0x01 as 0x00. When EncodeIntToCString returns an encoded
+    // schema type id that is less than 3 bytes, it means that the id contains
+    // unencoded leading 0x00. So here we're explicitly encoding those bytes as
+    // 0x01.
+    encoded_schema_type_id_str.push_back(1);
+  }
+
+  return absl_ports::StrCat(
+      encoded_schema_type_id_str,
+      encode_util::EncodeIntToCString(joinable_property_id));
+}
+
+std::string GetMetadataFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/metadata");
+}
+
+std::string GetSchemaJoinableIdToPostingListMapperPath(
+    std::string_view working_path) {
+  return absl_ports::StrCat(working_path,
+                            "/schema_joinable_id_to_posting_list_mapper");
+}
+
+std::string GetFlashIndexStorageFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/flash_index_storage");
+}
+
+}  // namespace
+
+libtextclassifier3::Status
+QualifiedIdJoinIndexImplV2::JoinDataIterator::Advance() {
+  if (pl_accessor_ == nullptr) {
+    return absl_ports::ResourceExhaustedError("End of iterator");
+  }
+
+  if (!should_retrieve_next_batch_) {
+    // In this case, cached_batch_join_data_ is not empty (contains some data
+    // fetched in the previous round), so move curr_ to the next position and
+    // check if we have to fetch the next batch.
+    //
+    // Note: in the 1st round, should_retrieve_next_batch_ is true, so this part
+    // will never be executed.
+    ++curr_;
+    should_retrieve_next_batch_ = curr_ >= cached_batch_join_data_.cend();
+  }
+
+  if (should_retrieve_next_batch_) {
+    // Fetch next batch if needed.
+    ICING_RETURN_IF_ERROR(GetNextDataBatch());
+    should_retrieve_next_batch_ = false;
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status
+QualifiedIdJoinIndexImplV2::JoinDataIterator::GetNextDataBatch() {
+  auto cached_batch_join_data_or = pl_accessor_->GetNextDataBatch();
+  if (!cached_batch_join_data_or.ok()) {
+    ICING_LOG(WARNING)
+        << "Fail to get next batch data from posting list due to: "
+        << cached_batch_join_data_or.status().error_message();
+    return std::move(cached_batch_join_data_or).status();
+  }
+
+  cached_batch_join_data_ = std::move(cached_batch_join_data_or).ValueOrDie();
+  curr_ = cached_batch_join_data_.cbegin();
+
+  if (cached_batch_join_data_.empty()) {
+    return absl_ports::ResourceExhaustedError("End of iterator");
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+QualifiedIdJoinIndexImplV2::Create(const Filesystem& filesystem,
+                                   std::string working_path,
+                                   bool pre_mapping_fbv) {
+  if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
+      !filesystem.DirectoryExists(
+          GetSchemaJoinableIdToPostingListMapperPath(working_path).c_str()) ||
+      !filesystem.FileExists(
+          GetFlashIndexStorageFilePath(working_path).c_str())) {
+    // Discard working_path if any file/directory is missing, and reinitialize.
+    if (filesystem.DirectoryExists(working_path.c_str())) {
+      ICING_RETURN_IF_ERROR(
+          QualifiedIdJoinIndex::Discard(filesystem, working_path));
+    }
+    return InitializeNewFiles(filesystem, std::move(working_path),
+                              pre_mapping_fbv);
+  }
+  return InitializeExistingFiles(filesystem, std::move(working_path),
+                                 pre_mapping_fbv);
+}
+
+QualifiedIdJoinIndexImplV2::~QualifiedIdJoinIndexImplV2() {
+  if (!PersistToDisk().ok()) {
+    ICING_LOG(WARNING) << "Failed to persist qualified id join index (v2) to "
+                          "disk while destructing "
+                       << working_path_;
+  }
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::Put(
+    SchemaTypeId schema_type_id, JoinablePropertyId joinable_property_id,
+    DocumentId document_id,
+    std::vector<NamespaceFingerprintIdentifier>&&
+        ref_namespace_fingerprint_ids) {
+  std::sort(ref_namespace_fingerprint_ids.begin(),
+            ref_namespace_fingerprint_ids.end());
+
+  // Dedupe.
+  auto last = std::unique(ref_namespace_fingerprint_ids.begin(),
+                          ref_namespace_fingerprint_ids.end());
+  ref_namespace_fingerprint_ids.erase(last,
+                                      ref_namespace_fingerprint_ids.end());
+  if (ref_namespace_fingerprint_ids.empty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  SetDirty();
+  ICING_ASSIGN_OR_RETURN(
+      std::string encoded_schema_type_joinable_property_id_str,
+      EncodeSchemaTypeJoinablePropertyId(schema_type_id, joinable_property_id));
+
+  ICING_ASSIGN_OR_RETURN(
+      PostingListIdentifier posting_list_identifier,
+      GetPostingListIdentifier(*schema_joinable_id_to_posting_list_mapper_,
+                               encoded_schema_type_joinable_property_id_str));
+  std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor;
+  if (posting_list_identifier.is_valid()) {
+    ICING_ASSIGN_OR_RETURN(
+        pl_accessor,
+        PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+            flash_index_storage_.get(), posting_list_serializer_.get(),
+            posting_list_identifier));
+  } else {
+    ICING_ASSIGN_OR_RETURN(
+        pl_accessor,
+        PostingListJoinDataAccessor<JoinDataType>::Create(
+            flash_index_storage_.get(), posting_list_serializer_.get()));
+  }
+
+  // Prepend join data into posting list.
+  for (const NamespaceFingerprintIdentifier& ref_namespace_fingerprint_id :
+       ref_namespace_fingerprint_ids) {
+    ICING_RETURN_IF_ERROR(pl_accessor->PrependData(
+        DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+            document_id, ref_namespace_fingerprint_id)));
+  }
+
+  // Finalize the posting list and update mapper.
+  PostingListAccessor::FinalizeResult result =
+      std::move(*pl_accessor).Finalize();
+  if (!result.status.ok()) {
+    return result.status;
+  }
+  if (!result.id.is_valid()) {
+    return absl_ports::InternalError("Fail to flush data into posting list(s)");
+  }
+  ICING_RETURN_IF_ERROR(schema_joinable_id_to_posting_list_mapper_->Put(
+      encoded_schema_type_joinable_property_id_str, result.id));
+
+  // Update info.
+  info().num_data += ref_namespace_fingerprint_ids.size();
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase>>
+QualifiedIdJoinIndexImplV2::GetIterator(
+    SchemaTypeId schema_type_id,
+    JoinablePropertyId joinable_property_id) const {
+  ICING_ASSIGN_OR_RETURN(
+      std::string encoded_schema_type_joinable_property_id_str,
+      EncodeSchemaTypeJoinablePropertyId(schema_type_id, joinable_property_id));
+
+  ICING_ASSIGN_OR_RETURN(
+      PostingListIdentifier posting_list_identifier,
+      GetPostingListIdentifier(*schema_joinable_id_to_posting_list_mapper_,
+                               encoded_schema_type_joinable_property_id_str));
+
+  if (!posting_list_identifier.is_valid()) {
+    return std::make_unique<JoinDataIterator>(nullptr);
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+          flash_index_storage_.get(), posting_list_serializer_.get(),
+          posting_list_identifier));
+
+  return std::make_unique<JoinDataIterator>(std::move(pl_accessor));
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::Optimize(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    const std::vector<NamespaceId>& namespace_id_old_to_new,
+    DocumentId new_last_added_document_id) {
+  std::string temp_working_path = working_path_ + "_temp";
+  ICING_RETURN_IF_ERROR(
+      QualifiedIdJoinIndex::Discard(filesystem_, temp_working_path));
+
+  DestructibleDirectory temp_working_path_ddir(&filesystem_,
+                                               std::move(temp_working_path));
+  if (!temp_working_path_ddir.is_valid()) {
+    return absl_ports::InternalError(
+        "Unable to create temp directory to build new qualified id join index "
+        "(v2)");
+  }
+
+  {
+    // Transfer all data from the current to new qualified id join index. Also
+    // PersistToDisk and destruct the instance after finishing, so we can safely
+    // swap directories later.
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> new_index,
+        Create(filesystem_, temp_working_path_ddir.dir(), pre_mapping_fbv_));
+    ICING_RETURN_IF_ERROR(TransferIndex(
+        document_id_old_to_new, namespace_id_old_to_new, new_index.get()));
+    new_index->set_last_added_document_id(new_last_added_document_id);
+    ICING_RETURN_IF_ERROR(new_index->PersistToDisk());
+  }
+
+  // Destruct current index's storage instances to safely swap directories.
+  // TODO(b/268521214): handle delete propagation storage
+  schema_joinable_id_to_posting_list_mapper_.reset();
+  flash_index_storage_.reset();
+
+  if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
+                             working_path_.c_str())) {
+    return absl_ports::InternalError(
+        "Unable to apply new qualified id join index (v2) due to failed swap");
+  }
+
+  // Reinitialize qualified id join index.
+  if (!filesystem_.PRead(GetMetadataFilePath(working_path_).c_str(),
+                         metadata_buffer_.get(), kMetadataFileSize,
+                         /*offset=*/0)) {
+    return absl_ports::InternalError("Fail to read metadata file");
+  }
+  ICING_ASSIGN_OR_RETURN(
+      schema_joinable_id_to_posting_list_mapper_,
+      PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+          filesystem_,
+          GetSchemaJoinableIdToPostingListMapperPath(working_path_),
+          pre_mapping_fbv_,
+          /*max_num_entries=*/
+          kSchemaJoinableIdToPostingListMapperMaxNumEntries,
+          /*average_kv_byte_size=*/
+          kSchemaJoinableIdToPostingListMapperAverageKVByteSize));
+  ICING_ASSIGN_OR_RETURN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path_),
+                                &filesystem_, posting_list_serializer_.get()));
+  flash_index_storage_ =
+      std::make_unique<FlashIndexStorage>(std::move(flash_index_storage));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::Clear() {
+  SetDirty();
+
+  schema_joinable_id_to_posting_list_mapper_.reset();
+  // Discard and reinitialize schema_joinable_id_to_posting_list_mapper.
+  std::string schema_joinable_id_to_posting_list_mapper_path =
+      GetSchemaJoinableIdToPostingListMapperPath(working_path_);
+  ICING_RETURN_IF_ERROR(
+      PersistentHashMapKeyMapper<PostingListIdentifier>::Delete(
+          filesystem_, schema_joinable_id_to_posting_list_mapper_path));
+  ICING_ASSIGN_OR_RETURN(
+      schema_joinable_id_to_posting_list_mapper_,
+      PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+          filesystem_,
+          std::move(schema_joinable_id_to_posting_list_mapper_path),
+          pre_mapping_fbv_,
+          /*max_num_entries=*/
+          kSchemaJoinableIdToPostingListMapperMaxNumEntries,
+          /*average_kv_byte_size=*/
+          kSchemaJoinableIdToPostingListMapperAverageKVByteSize));
+
+  // Discard and reinitialize flash_index_storage.
+  flash_index_storage_.reset();
+  if (!filesystem_.DeleteFile(
+          GetFlashIndexStorageFilePath(working_path_).c_str())) {
+    return absl_ports::InternalError("Fail to delete flash index storage file");
+  }
+  ICING_ASSIGN_OR_RETURN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path_),
+                                &filesystem_, posting_list_serializer_.get()));
+  flash_index_storage_ =
+      std::make_unique<FlashIndexStorage>(std::move(flash_index_storage));
+
+  // TODO(b/268521214): clear delete propagation storage
+
+  info().num_data = 0;
+  info().last_added_document_id = kInvalidDocumentId;
+  return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+QualifiedIdJoinIndexImplV2::InitializeNewFiles(const Filesystem& filesystem,
+                                               std::string&& working_path,
+                                               bool pre_mapping_fbv) {
+  // Create working directory.
+  if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to create directory: ", working_path));
+  }
+
+  // Initialize schema_joinable_id_to_posting_list_mapper
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<KeyMapper<PostingListIdentifier>>
+          schema_joinable_id_to_posting_list_mapper,
+      PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+          filesystem, GetSchemaJoinableIdToPostingListMapperPath(working_path),
+          pre_mapping_fbv,
+          /*max_num_entries=*/
+          kSchemaJoinableIdToPostingListMapperMaxNumEntries,
+          /*average_kv_byte_size=*/
+          kSchemaJoinableIdToPostingListMapperAverageKVByteSize));
+
+  // Initialize flash_index_storage
+  auto posting_list_serializer =
+      std::make_unique<PostingListJoinDataSerializer<JoinDataType>>();
+  ICING_ASSIGN_OR_RETURN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path),
+                                &filesystem, posting_list_serializer.get()));
+
+  // Create instance.
+  auto new_join_index = std::unique_ptr<QualifiedIdJoinIndexImplV2>(
+      new QualifiedIdJoinIndexImplV2(
+          filesystem, std::move(working_path),
+          /*metadata_buffer=*/std::make_unique<uint8_t[]>(kMetadataFileSize),
+          std::move(schema_joinable_id_to_posting_list_mapper),
+          std::move(posting_list_serializer),
+          std::make_unique<FlashIndexStorage>(std::move(flash_index_storage)),
+          pre_mapping_fbv));
+  // Initialize info content.
+  new_join_index->info().magic = Info::kMagic;
+  new_join_index->info().num_data = 0;
+  new_join_index->info().last_added_document_id = kInvalidDocumentId;
+  // Initialize new PersistentStorage. The initial checksums will be computed
+  // and set via InitializeNewStorage.
+  ICING_RETURN_IF_ERROR(new_join_index->InitializeNewStorage());
+
+  return new_join_index;
+}
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+QualifiedIdJoinIndexImplV2::InitializeExistingFiles(
+    const Filesystem& filesystem, std::string&& working_path,
+    bool pre_mapping_fbv) {
+  // PRead metadata file.
+  auto metadata_buffer = std::make_unique<uint8_t[]>(kMetadataFileSize);
+  if (!filesystem.PRead(GetMetadataFilePath(working_path).c_str(),
+                        metadata_buffer.get(), kMetadataFileSize,
+                        /*offset=*/0)) {
+    return absl_ports::InternalError("Fail to read metadata file");
+  }
+
+  // Initialize schema_joinable_id_to_posting_list_mapper
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<KeyMapper<PostingListIdentifier>>
+          schema_joinable_id_to_posting_list_mapper,
+      PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+          filesystem, GetSchemaJoinableIdToPostingListMapperPath(working_path),
+          pre_mapping_fbv,
+          /*max_num_entries=*/
+          kSchemaJoinableIdToPostingListMapperMaxNumEntries,
+          /*average_kv_byte_size=*/
+          kSchemaJoinableIdToPostingListMapperAverageKVByteSize));
+
+  // Initialize flash_index_storage
+  auto posting_list_serializer =
+      std::make_unique<PostingListJoinDataSerializer<JoinDataType>>();
+  ICING_ASSIGN_OR_RETURN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path),
+                                &filesystem, posting_list_serializer.get()));
+
+  // Create instance.
+  auto join_index = std::unique_ptr<QualifiedIdJoinIndexImplV2>(
+      new QualifiedIdJoinIndexImplV2(
+          filesystem, std::move(working_path), std::move(metadata_buffer),
+          std::move(schema_joinable_id_to_posting_list_mapper),
+          std::move(posting_list_serializer),
+          std::make_unique<FlashIndexStorage>(std::move(flash_index_storage)),
+          pre_mapping_fbv));
+  // Initialize existing PersistentStorage. Checksums will be validated.
+  ICING_RETURN_IF_ERROR(join_index->InitializeExistingStorage());
+
+  // Validate magic.
+  if (join_index->info().magic != Info::kMagic) {
+    return absl_ports::FailedPreconditionError("Incorrect magic value");
+  }
+
+  return join_index;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::TransferIndex(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    const std::vector<NamespaceId>& namespace_id_old_to_new,
+    QualifiedIdJoinIndexImplV2* new_index) const {
+  std::unique_ptr<KeyMapper<PostingListIdentifier>::Iterator> iter =
+      schema_joinable_id_to_posting_list_mapper_->GetIterator();
+
+  // Iterate through all (schema_type_id, joinable_property_id).
+  while (iter->Advance()) {
+    PostingListIdentifier old_pl_id = iter->GetValue();
+    if (!old_pl_id.is_valid()) {
+      // Skip invalid posting list id.
+      continue;
+    }
+
+    // Read all join data from old posting lists and convert to new join data
+    // with new document id, namespace id.
+    std::vector<JoinDataType> new_join_data_vec;
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>
+            old_pl_accessor,
+        PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+            flash_index_storage_.get(), posting_list_serializer_.get(),
+            old_pl_id));
+    ICING_ASSIGN_OR_RETURN(std::vector<JoinDataType> batch_old_join_data,
+                           old_pl_accessor->GetNextDataBatch());
+    while (!batch_old_join_data.empty()) {
+      for (const JoinDataType& old_join_data : batch_old_join_data) {
+        DocumentId new_document_id = GetNewDocumentId(
+            document_id_old_to_new, old_join_data.document_id());
+        NamespaceId new_ref_namespace_id = GetNewNamespaceId(
+            namespace_id_old_to_new, old_join_data.join_info().namespace_id());
+
+        // Transfer if the document and namespace are not deleted or outdated.
+        if (new_document_id != kInvalidDocumentId &&
+            new_ref_namespace_id != kInvalidNamespaceId) {
+          // We can reuse the fingerprint from old_join_data, since document uri
+          // (and its fingerprint) will never change.
+          new_join_data_vec.push_back(JoinDataType(
+              new_document_id, NamespaceFingerprintIdentifier(
+                                   new_ref_namespace_id,
+                                   old_join_data.join_info().fingerprint())));
+        }
+      }
+      ICING_ASSIGN_OR_RETURN(batch_old_join_data,
+                             old_pl_accessor->GetNextDataBatch());
+    }
+
+    if (new_join_data_vec.empty()) {
+      continue;
+    }
+
+    // NamespaceId order may change, so we have to sort the vector.
+    std::sort(new_join_data_vec.begin(), new_join_data_vec.end());
+
+    // Create new posting list in new_index and prepend all new join data into
+    // it.
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>
+            new_pl_accessor,
+        PostingListJoinDataAccessor<JoinDataType>::Create(
+            new_index->flash_index_storage_.get(),
+            new_index->posting_list_serializer_.get()));
+    for (const JoinDataType& new_join_data : new_join_data_vec) {
+      ICING_RETURN_IF_ERROR(new_pl_accessor->PrependData(new_join_data));
+    }
+
+    // Finalize the posting list and update mapper of new_index.
+    PostingListAccessor::FinalizeResult result =
+        std::move(*new_pl_accessor).Finalize();
+    if (!result.status.ok()) {
+      return result.status;
+    }
+    if (!result.id.is_valid()) {
+      return absl_ports::InternalError(
+          "Fail to flush data into posting list(s)");
+    }
+    ICING_RETURN_IF_ERROR(
+        new_index->schema_joinable_id_to_posting_list_mapper_->Put(
+            iter->GetKey(), result.id));
+
+    // Update info.
+    new_index->info().num_data += new_join_data_vec.size();
+  }
+
+  // TODO(b/268521214): transfer delete propagation storage
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::PersistMetadataToDisk(
+    bool force) {
+  if (!force && !is_info_dirty() && !is_storage_dirty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  std::string metadata_file_path = GetMetadataFilePath(working_path_);
+
+  ScopedFd sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+  if (!sfd.is_valid()) {
+    return absl_ports::InternalError("Fail to open metadata file for write");
+  }
+
+  if (!filesystem_.PWrite(sfd.get(), /*offset=*/0, metadata_buffer_.get(),
+                          kMetadataFileSize)) {
+    return absl_ports::InternalError("Fail to write metadata file");
+  }
+
+  if (!filesystem_.DataSync(sfd.get())) {
+    return absl_ports::InternalError("Fail to sync metadata to disk");
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::PersistStoragesToDisk(
+    bool force) {
+  if (!force && !is_storage_dirty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  ICING_RETURN_IF_ERROR(
+      schema_joinable_id_to_posting_list_mapper_->PersistToDisk());
+  if (!flash_index_storage_->PersistToDisk()) {
+    return absl_ports::InternalError(
+        "Fail to persist FlashIndexStorage to disk");
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<Crc32>
+QualifiedIdJoinIndexImplV2::ComputeInfoChecksum(bool force) {
+  if (!force && !is_info_dirty()) {
+    return Crc32(crcs().component_crcs.info_crc);
+  }
+
+  return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32>
+QualifiedIdJoinIndexImplV2::ComputeStoragesChecksum(bool force) {
+  if (!force && !is_storage_dirty()) {
+    return Crc32(crcs().component_crcs.storages_crc);
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      Crc32 schema_joinable_id_to_posting_list_mapper_crc,
+      schema_joinable_id_to_posting_list_mapper_->ComputeChecksum());
+
+  return Crc32(schema_joinable_id_to_posting_list_mapper_crc.Get());
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/qualified-id-join-index-impl-v2.h b/icing/join/qualified-id-join-index-impl-v2.h
new file mode 100644
index 0000000..2b0bf3f
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v2.h
@@ -0,0 +1,369 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V2_H_
+#define ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V2_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/posting-list-join-data-accessor.h"
+#include "icing/join/posting-list-join-data-serializer.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// QualifiedIdJoinIndexImplV2: a class to maintain join data (DocumentId to
+// referenced NamespaceFingerprintIdentifier). It stores join data in posting
+// lists and bucketizes them by (schema_type_id, joinable_property_id).
+class QualifiedIdJoinIndexImplV2 : public QualifiedIdJoinIndex {
+ public:
+  using JoinDataType = DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>;
+
+  class JoinDataIterator : public JoinDataIteratorBase {
+   public:
+    explicit JoinDataIterator(
+        std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor)
+        : pl_accessor_(std::move(pl_accessor)),
+          should_retrieve_next_batch_(true) {}
+
+    ~JoinDataIterator() override = default;
+
+    // Advances to the next data.
+    //
+    // Returns:
+    //   - OK on success
+    //   - RESOURCE_EXHAUSTED_ERROR if reaching the end (i.e. no more relevant
+    //     data)
+    //   - Any other PostingListJoinDataAccessor errors
+    libtextclassifier3::Status Advance() override;
+
+    const JoinDataType& GetCurrent() const override { return *curr_; }
+
+   private:
+    // Gets next batch of data from the posting list chain, caches in
+    // cached_batch_integer_index_data_, and sets curr_ to the begin of the
+    // cache.
+    libtextclassifier3::Status GetNextDataBatch();
+
+    std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor_;
+    std::vector<JoinDataType> cached_batch_join_data_;
+    std::vector<JoinDataType>::const_iterator curr_;
+    bool should_retrieve_next_batch_;
+  };
+
+  struct Info {
+    static constexpr int32_t kMagic = 0x12d1c074;
+
+    int32_t magic;
+    int32_t num_data;
+    DocumentId last_added_document_id;
+
+    Crc32 ComputeChecksum() const {
+      return Crc32(
+          std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
+    }
+  } __attribute__((packed));
+  static_assert(sizeof(Info) == 12, "");
+
+  // Metadata file layout: <Crcs><Info>
+  static constexpr int32_t kCrcsMetadataBufferOffset = 0;
+  static constexpr int32_t kInfoMetadataBufferOffset =
+      static_cast<int32_t>(sizeof(Crcs));
+  static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+  static_assert(kMetadataFileSize == 24, "");
+
+  static constexpr WorkingPathType kWorkingPathType =
+      WorkingPathType::kDirectory;
+
+  // Creates a QualifiedIdJoinIndexImplV2 instance to store join data
+  // (DocumentId to referenced NamespaceFingerPrintIdentifier) for future
+  // joining search. If any of the underlying file is missing, then delete the
+  // whole working_path and (re)initialize with new ones. Otherwise initialize
+  // and create the instance by existing files.
+  //
+  // filesystem: Object to make system level calls
+  // working_path: Specifies the working path for PersistentStorage.
+  //               QualifiedIdJoinIndexImplV2 uses working path as working
+  //               directory and all related files will be stored under this
+  //               directory. It takes full ownership and of working_path_,
+  //               including creation/deletion. It is the caller's
+  //               responsibility to specify correct working path and avoid
+  //               mixing different persistent storages together under the same
+  //               path. Also the caller has the ownership for the parent
+  //               directory of working_path_, and it is responsible for parent
+  //               directory creation/deletion. See PersistentStorage for more
+  //               details about the concept of working_path.
+  // pre_mapping_fbv: flag indicating whether memory map max possible file size
+  //                  for underlying FileBackedVector before growing the actual
+  //                  file size.
+  //
+  // Returns:
+  //   - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+  //                               checksum
+  //   - INTERNAL_ERROR on I/O errors
+  //   - Any KeyMapper errors
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+  Create(const Filesystem& filesystem, std::string working_path,
+         bool pre_mapping_fbv);
+
+  // Delete copy and move constructor/assignment operator.
+  QualifiedIdJoinIndexImplV2(const QualifiedIdJoinIndexImplV2&) = delete;
+  QualifiedIdJoinIndexImplV2& operator=(const QualifiedIdJoinIndexImplV2&) =
+      delete;
+
+  QualifiedIdJoinIndexImplV2(QualifiedIdJoinIndexImplV2&&) = delete;
+  QualifiedIdJoinIndexImplV2& operator=(QualifiedIdJoinIndexImplV2&&) = delete;
+
+  ~QualifiedIdJoinIndexImplV2() override;
+
+  // v1 only API. Returns UNIMPLEMENTED_ERROR.
+  libtextclassifier3::Status Put(
+      const DocJoinInfo& doc_join_info,
+      std::string_view ref_qualified_id_str) override {
+    return absl_ports::UnimplementedError("This API is not supported in V2");
+  }
+
+  // v1 only API. Returns UNIMPLEMENTED_ERROR.
+  libtextclassifier3::StatusOr<std::string_view> Get(
+      const DocJoinInfo& doc_join_info) const override {
+    return absl_ports::UnimplementedError("This API is not supported in V2");
+  }
+
+  // Puts a list of referenced (parent) NamespaceFingerprintIdentifiers into
+  // the join index, given the (child) DocumentId, SchemaTypeId and
+  // JoinablePropertyId.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INVALID_ARGUMENT_ERROR if schema_type_id, joinable_property_id, or
+  //     document_id is invalid
+  //   - Any KeyMapper/FlashIndexStorage errors
+  libtextclassifier3::Status Put(SchemaTypeId schema_type_id,
+                                 JoinablePropertyId joinable_property_id,
+                                 DocumentId document_id,
+                                 std::vector<NamespaceFingerprintIdentifier>&&
+                                     ref_namespace_fingerprint_ids) override;
+
+  // Returns a JoinDataIterator for iterating through all join data of the
+  // specified (schema_type_id, joinable_property_id).
+  //
+  // Returns:
+  //   - On success: a JoinDataIterator
+  //   - INVALID_ARGUMENT_ERROR if schema_type_id or joinable_property_id is
+  //     invalid
+  //   - Any KeyMapper/FlashIndexStorage errors
+  libtextclassifier3::StatusOr<std::unique_ptr<JoinDataIteratorBase>>
+  GetIterator(SchemaTypeId schema_type_id,
+              JoinablePropertyId joinable_property_id) const override;
+
+  // Reduces internal file sizes by reclaiming space and ids of deleted
+  // documents. Qualified id join index will convert all entries to the new
+  // document ids and namespace ids.
+  //
+  // - document_id_old_to_new: a map for converting old document id to new
+  //   document id.
+  // - namespace_id_old_to_new: a map for converting old namespace id to new
+  //   namespace id.
+  // - new_last_added_document_id: will be used to update the last added
+  //                               document id in the qualified id join index.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error. This could potentially leave the index in
+  //     an invalid state and the caller should handle it properly (e.g. discard
+  //     and rebuild)
+  libtextclassifier3::Status Optimize(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      const std::vector<NamespaceId>& namespace_id_old_to_new,
+      DocumentId new_last_added_document_id) override;
+
+  // Clears all data and set last_added_document_id to kInvalidDocumentId.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status Clear() override;
+
+  bool is_v2() const override { return true; }
+
+  int32_t size() const override { return info().num_data; }
+
+  bool empty() const override { return size() == 0; }
+
+  DocumentId last_added_document_id() const override {
+    return info().last_added_document_id;
+  }
+
+  void set_last_added_document_id(DocumentId document_id) override {
+    SetInfoDirty();
+
+    Info& info_ref = info();
+    if (info_ref.last_added_document_id == kInvalidDocumentId ||
+        document_id > info_ref.last_added_document_id) {
+      info_ref.last_added_document_id = document_id;
+    }
+  }
+
+ private:
+  explicit QualifiedIdJoinIndexImplV2(
+      const Filesystem& filesystem, std::string&& working_path,
+      std::unique_ptr<uint8_t[]> metadata_buffer,
+      std::unique_ptr<KeyMapper<PostingListIdentifier>>
+          schema_joinable_id_to_posting_list_mapper,
+      std::unique_ptr<PostingListJoinDataSerializer<JoinDataType>>
+          posting_list_serializer,
+      std::unique_ptr<FlashIndexStorage> flash_index_storage,
+      bool pre_mapping_fbv)
+      : QualifiedIdJoinIndex(filesystem, std::move(working_path)),
+        metadata_buffer_(std::move(metadata_buffer)),
+        schema_joinable_id_to_posting_list_mapper_(
+            std::move(schema_joinable_id_to_posting_list_mapper)),
+        posting_list_serializer_(std::move(posting_list_serializer)),
+        flash_index_storage_(std::move(flash_index_storage)),
+        pre_mapping_fbv_(pre_mapping_fbv),
+        is_info_dirty_(false),
+        is_storage_dirty_(false) {}
+
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+  InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+                     bool pre_mapping_fbv);
+
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+  InitializeExistingFiles(const Filesystem& filesystem,
+                          std::string&& working_path, bool pre_mapping_fbv);
+
+  // Transfers qualified id join index data from the current to new_index and
+  // convert to new document id according to document_id_old_to_new and
+  // namespace_id_old_to_new. It is a helper function for Optimize.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status TransferIndex(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      const std::vector<NamespaceId>& namespace_id_old_to_new,
+      QualifiedIdJoinIndexImplV2* new_index) const;
+
+  // Flushes contents of metadata file.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
+
+  // Flushes contents of all storages to underlying files.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
+
+  // Computes and returns Info checksum.
+  //
+  // Returns:
+  //   - Crc of the Info on success
+  libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
+
+  // Computes and returns all storages checksum.
+  //
+  // Returns:
+  //   - Crc of all storages on success
+  //   - INTERNAL_ERROR if any data inconsistency
+  libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+      bool force) override;
+
+  Crcs& crcs() override {
+    return *reinterpret_cast<Crcs*>(metadata_buffer_.get() +
+                                    kCrcsMetadataBufferOffset);
+  }
+
+  const Crcs& crcs() const override {
+    return *reinterpret_cast<const Crcs*>(metadata_buffer_.get() +
+                                          kCrcsMetadataBufferOffset);
+  }
+
+  Info& info() {
+    return *reinterpret_cast<Info*>(metadata_buffer_.get() +
+                                    kInfoMetadataBufferOffset);
+  }
+
+  const Info& info() const {
+    return *reinterpret_cast<const Info*>(metadata_buffer_.get() +
+                                          kInfoMetadataBufferOffset);
+  }
+
+  void SetInfoDirty() { is_info_dirty_ = true; }
+  // When storage is dirty, we have to set info dirty as well. So just expose
+  // SetDirty to set both.
+  void SetDirty() {
+    is_info_dirty_ = true;
+    is_storage_dirty_ = true;
+  }
+
+  bool is_info_dirty() const { return is_info_dirty_; }
+  bool is_storage_dirty() const { return is_storage_dirty_; }
+
+  // Metadata buffer
+  std::unique_ptr<uint8_t[]> metadata_buffer_;
+
+  // Persistent KeyMapper for mapping (schema_type_id, joinable_property_id) to
+  // PostingListIdentifier.
+  std::unique_ptr<KeyMapper<PostingListIdentifier>>
+      schema_joinable_id_to_posting_list_mapper_;
+
+  // Posting list related members. Use posting list to store join data
+  // (document id to referenced NamespaceFingerprintIdentifier).
+  std::unique_ptr<PostingListJoinDataSerializer<JoinDataType>>
+      posting_list_serializer_;
+  std::unique_ptr<FlashIndexStorage> flash_index_storage_;
+
+  // TODO(b/268521214): add delete propagation storage
+
+  // Flag indicating whether memory map max possible file size for underlying
+  // FileBackedVector before growing the actual file size.
+  bool pre_mapping_fbv_;
+
+  bool is_info_dirty_;
+  bool is_storage_dirty_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V2_H_
diff --git a/icing/join/qualified-id-join-index-impl-v2_test.cc b/icing/join/qualified-id-join-index-impl-v2_test.cc
new file mode 100644
index 0000000..d73d6c2
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v2_test.cc
@@ -0,0 +1,1414 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-index-impl-v2.h"
+
+#include <cstdint>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+using ::testing::Lt;
+using ::testing::Ne;
+using ::testing::Not;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+
+using Crcs = PersistentStorage::Crcs;
+using Info = QualifiedIdJoinIndexImplV2::Info;
+
+static constexpr int32_t kCorruptedValueOffset = 3;
+
+class QualifiedIdJoinIndexImplV2Test : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    base_dir_ = GetTestTempDir() + "/icing";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    working_path_ = base_dir_ + "/qualified_id_join_index_impl_v2_test";
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  std::string base_dir_;
+  std::string working_path_;
+};
+
+libtextclassifier3::StatusOr<
+    std::vector<QualifiedIdJoinIndexImplV2::JoinDataType>>
+GetJoinData(const QualifiedIdJoinIndexImplV2& index,
+            SchemaTypeId schema_type_id,
+            JoinablePropertyId joinable_property_id) {
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase> iter,
+      index.GetIterator(schema_type_id, joinable_property_id));
+
+  std::vector<QualifiedIdJoinIndexImplV2::JoinDataType> result;
+  while (iter->Advance().ok()) {
+    result.push_back(iter->GetCurrent());
+  }
+
+  return result;
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, InvalidWorkingPath) {
+  EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(
+                  filesystem_, "/dev/null/qualified_id_join_index_impl_v2_test",
+                  /*pre_mapping_fbv=*/false),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, InitializeNewFiles) {
+  {
+    // Create new qualified id join index
+    ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+    EXPECT_THAT(index, Pointee(IsEmpty()));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  // Metadata file should be initialized correctly for both info and crcs
+  // sections.
+  const std::string metadata_file_path =
+      absl_ports::StrCat(working_path_, "/metadata");
+  auto metadata_buffer = std::make_unique<uint8_t[]>(
+      QualifiedIdJoinIndexImplV2::kMetadataFileSize);
+  ASSERT_THAT(
+      filesystem_.PRead(metadata_file_path.c_str(), metadata_buffer.get(),
+                        QualifiedIdJoinIndexImplV2::kMetadataFileSize,
+                        /*offset=*/0),
+      IsTrue());
+
+  // Check info section
+  const Info* info = reinterpret_cast<const Info*>(
+      metadata_buffer.get() +
+      QualifiedIdJoinIndexImplV2::kInfoMetadataBufferOffset);
+  EXPECT_THAT(info->magic, Eq(Info::kMagic));
+  EXPECT_THAT(info->num_data, Eq(0));
+  EXPECT_THAT(info->last_added_document_id, Eq(kInvalidDocumentId));
+
+  // Check crcs section
+  const Crcs* crcs = reinterpret_cast<const Crcs*>(
+      metadata_buffer.get() +
+      QualifiedIdJoinIndexImplV2::kCrcsMetadataBufferOffset);
+  // There are some initial info in KeyMapper, so storages_crc should be
+  // non-zero.
+  EXPECT_THAT(crcs->component_crcs.storages_crc, Ne(0));
+  EXPECT_THAT(crcs->component_crcs.info_crc,
+              Eq(Crc32(std::string_view(reinterpret_cast<const char*>(info),
+                                        sizeof(Info)))
+                     .Get()));
+  EXPECT_THAT(crcs->all_crc,
+              Eq(Crc32(std::string_view(
+                           reinterpret_cast<const char*>(&crcs->component_crcs),
+                           sizeof(Crcs::ComponentCrcs)))
+                     .Get()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       InitializationShouldFailWithoutPersistToDiskOrDestruction) {
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/12);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/56);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/78);
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  // Insert some data.
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+      /*ref_namespace_fingerprint_ids=*/{id2, id1}));
+  ICING_ASSERT_OK(index->PersistToDisk());
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/3, /*joinable_property_id=*/10, /*document_id=*/6,
+      /*ref_namespace_fingerprint_ids=*/{id3}));
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/12,
+      /*ref_namespace_fingerprint_ids=*/{id4}));
+
+  // Without calling PersistToDisk, checksums will not be recomputed or synced
+  // to disk, so initializing another instance on the same files should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                                 /*pre_mapping_fbv=*/false),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       InitializationShouldSucceedWithPersistToDisk) {
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/12);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/56);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/78);
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index1,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  // Insert some data.
+  ICING_ASSERT_OK(index1->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+      /*ref_namespace_fingerprint_ids=*/{id2, id1}));
+  ICING_ASSERT_OK(index1->Put(
+      /*schema_type_id=*/3, /*joinable_property_id=*/10, /*document_id=*/6,
+      /*ref_namespace_fingerprint_ids=*/{id3}));
+  ICING_ASSERT_OK(index1->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/12,
+      /*ref_namespace_fingerprint_ids=*/{id4}));
+  ASSERT_THAT(index1, Pointee(SizeIs(4)));
+
+  // After calling PersistToDisk, all checksums should be recomputed and synced
+  // correctly to disk, so initializing another instance on the same files
+  // should succeed, and we should be able to get the same contents.
+  ICING_EXPECT_OK(index1->PersistToDisk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index2,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+  EXPECT_THAT(index2, Pointee(SizeIs(4)));
+  EXPECT_THAT(
+      GetJoinData(*index2, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/12, /*join_info=*/id4),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/5, /*join_info=*/id2),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/5, /*join_info=*/id1))));
+  EXPECT_THAT(
+      GetJoinData(*index2, /*schema_type_id=*/3, /*joinable_property_id=*/10),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/6, /*join_info=*/id3))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       InitializationShouldSucceedAfterDestruction) {
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/12);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/56);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/78);
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+
+    // Insert some data.
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+        /*ref_namespace_fingerprint_ids=*/{id2, id1}));
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/3, /*joinable_property_id=*/10, /*document_id=*/6,
+        /*ref_namespace_fingerprint_ids=*/{id3}));
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/12,
+        /*ref_namespace_fingerprint_ids=*/{id4}));
+    ASSERT_THAT(index, Pointee(SizeIs(4)));
+  }
+
+  {
+    // The previous instance went out of scope and was destructed. Although we
+    // didn't call PersistToDisk explicitly, the destructor should invoke it and
+    // thus initializing another instance on the same files should succeed, and
+    // we should be able to get the same contents.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+    EXPECT_THAT(index, Pointee(SizeIs(4)));
+    EXPECT_THAT(
+        GetJoinData(*index, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+        IsOkAndHolds(
+            ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                            /*document_id=*/12, /*join_info=*/id4),
+                        DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                            /*document_id=*/5, /*join_info=*/id2),
+                        DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                            /*document_id=*/5, /*join_info=*/id1))));
+    EXPECT_THAT(
+        GetJoinData(*index, /*schema_type_id=*/3, /*joinable_property_id=*/10),
+        IsOkAndHolds(
+            ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                /*document_id=*/6, /*join_info=*/id3))));
+  }
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       InitializeExistingFilesWithDifferentMagicShouldFail) {
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+        /*ref_namespace_fingerprint_ids=*/
+        {NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+                                        /*fingerprint=*/12)}));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  {
+    const std::string metadata_file_path =
+        absl_ports::StrCat(working_path_, "/metadata");
+    ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+    ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+    auto metadata_buffer = std::make_unique<uint8_t[]>(
+        QualifiedIdJoinIndexImplV2::kMetadataFileSize);
+    ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+                                  QualifiedIdJoinIndexImplV2::kMetadataFileSize,
+                                  /*offset=*/0),
+                IsTrue());
+
+    // Manually change magic and update checksum
+    Crcs* crcs = reinterpret_cast<Crcs*>(
+        metadata_buffer.get() +
+        QualifiedIdJoinIndexImplV2::kCrcsMetadataBufferOffset);
+    Info* info = reinterpret_cast<Info*>(
+        metadata_buffer.get() +
+        QualifiedIdJoinIndexImplV2::kInfoMetadataBufferOffset);
+    info->magic += kCorruptedValueOffset;
+    crcs->component_crcs.info_crc = info->ComputeChecksum().Get();
+    crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get();
+    ASSERT_THAT(filesystem_.PWrite(
+                    metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+                    QualifiedIdJoinIndexImplV2::kMetadataFileSize),
+                IsTrue());
+  }
+
+  // Attempt to create the qualified id join index with different magic. This
+  // should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                                 /*pre_mapping_fbv=*/false),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+                       HasSubstr("Incorrect magic value")));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       InitializeExistingFilesWithWrongAllCrcShouldFail) {
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+        /*ref_namespace_fingerprint_ids=*/
+        {NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+                                        /*fingerprint=*/12)}));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  {
+    const std::string metadata_file_path =
+        absl_ports::StrCat(working_path_, "/metadata");
+    ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+    ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+    auto metadata_buffer = std::make_unique<uint8_t[]>(
+        QualifiedIdJoinIndexImplV2::kMetadataFileSize);
+    ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+                                  QualifiedIdJoinIndexImplV2::kMetadataFileSize,
+                                  /*offset=*/0),
+                IsTrue());
+
+    // Manually corrupt all_crc
+    Crcs* crcs = reinterpret_cast<Crcs*>(
+        metadata_buffer.get() +
+        QualifiedIdJoinIndexImplV2::kCrcsMetadataBufferOffset);
+    crcs->all_crc += kCorruptedValueOffset;
+
+    ASSERT_THAT(filesystem_.PWrite(
+                    metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+                    QualifiedIdJoinIndexImplV2::kMetadataFileSize),
+                IsTrue());
+  }
+
+  // Attempt to create the qualified id join index with metadata containing
+  // corrupted all_crc. This should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                                 /*pre_mapping_fbv=*/false),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+                       HasSubstr("Invalid all crc")));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       InitializeExistingFilesWithCorruptedInfoShouldFail) {
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+        /*ref_namespace_fingerprint_ids=*/
+        {NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+                                        /*fingerprint=*/12)}));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  {
+    const std::string metadata_file_path =
+        absl_ports::StrCat(working_path_, "/metadata");
+    ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+    ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+    auto metadata_buffer = std::make_unique<uint8_t[]>(
+        QualifiedIdJoinIndexImplV2::kMetadataFileSize);
+    ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+                                  QualifiedIdJoinIndexImplV2::kMetadataFileSize,
+                                  /*offset=*/0),
+                IsTrue());
+
+    // Modify info, but don't update the checksum. This would be similar to
+    // corruption of info.
+    Info* info = reinterpret_cast<Info*>(
+        metadata_buffer.get() +
+        QualifiedIdJoinIndexImplV2::kInfoMetadataBufferOffset);
+    info->last_added_document_id += kCorruptedValueOffset;
+
+    ASSERT_THAT(filesystem_.PWrite(
+                    metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+                    QualifiedIdJoinIndexImplV2::kMetadataFileSize),
+                IsTrue());
+  }
+
+  // Attempt to create the qualified id join index with info that doesn't match
+  // its checksum. This should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                                 /*pre_mapping_fbv=*/false),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+                       HasSubstr("Invalid info crc")));
+}
+
+TEST_F(
+    QualifiedIdJoinIndexImplV2Test,
+    InitializeExistingFilesWithCorruptedSchemaJoinableIdToPostingListMapperShouldFail) {
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+        /*ref_namespace_fingerprint_ids=*/
+        {NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+                                        /*fingerprint=*/12)}));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  // Corrupt schema_joinable_id_to_posting_list_mapper manually.
+  {
+    std::string mapper_working_path = absl_ports::StrCat(
+        working_path_, "/schema_joinable_id_to_posting_list_mapper");
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<KeyMapper<PostingListIdentifier>> mapper,
+        PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+            filesystem_, std::move(mapper_working_path),
+            /*pre_mapping_fbv=*/false));
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, mapper->ComputeChecksum());
+    ICING_ASSERT_OK(mapper->Put("foo", PostingListIdentifier::kInvalid));
+    ICING_ASSERT_OK(mapper->PersistToDisk());
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, mapper->ComputeChecksum());
+    ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+  }
+
+  // Attempt to create the qualified id join index with corrupted
+  // doc_join_info_mapper. This should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                                 /*pre_mapping_fbv=*/false),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+                       HasSubstr("Invalid storages crc")));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, InvalidPut) {
+  NamespaceFingerprintIdentifier id(/*namespace_id=*/1, /*fingerprint=*/12);
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  EXPECT_THAT(
+      index->Put(/*schema_type_id=*/-1, /*joinable_property_id=*/1,
+                 /*document_id=*/5, /*ref_namespace_fingerprint_ids=*/{id}),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(
+      index->Put(/*schema_type_id=*/2, /*joinable_property_id=*/-1,
+                 /*document_id=*/5, /*ref_namespace_fingerprint_ids=*/{id}),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(index->Put(/*schema_type_id=*/2, /*joinable_property_id=*/1,
+                         /*document_id=*/kInvalidDocumentId,
+                         /*ref_namespace_fingerprint_ids=*/{id}),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, InvalidGetIterator) {
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  EXPECT_THAT(
+      index->GetIterator(/*schema_type_id=*/-1, /*joinable_property_id=*/1),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(
+      index->GetIterator(/*schema_type_id=*/2, /*joinable_property_id=*/-1),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       PutEmptyRefNamespaceFingerprintIdsShouldReturnOk) {
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+                 /*ref_namespace_fingerprint_ids=*/{}),
+      IsOk());
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id + 1, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id + 1),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       PutAndGetSingleSchemaTypeAndJoinableProperty) {
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/3, /*fingerprint=*/12);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/2, /*fingerprint=*/56);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/0, /*fingerprint=*/78);
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+
+    EXPECT_THAT(
+        index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+                   /*ref_namespace_fingerprint_ids=*/{id2, id1}),
+        IsOk());
+    EXPECT_THAT(
+        index->Put(schema_type_id, joinable_property_id, /*document_id=*/6,
+                   /*ref_namespace_fingerprint_ids=*/{id3}),
+        IsOk());
+    EXPECT_THAT(
+        index->Put(schema_type_id, joinable_property_id, /*document_id=*/12,
+                   /*ref_namespace_fingerprint_ids=*/{id4}),
+        IsOk());
+    EXPECT_THAT(index, Pointee(SizeIs(4)));
+
+    EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+                IsOkAndHolds(ElementsAre(
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/12, /*join_info=*/id4),
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/6, /*join_info=*/id3),
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/5, /*join_info=*/id1),
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/5, /*join_info=*/id2))));
+    EXPECT_THAT(GetJoinData(*index, schema_type_id + 1, joinable_property_id),
+                IsOkAndHolds(IsEmpty()));
+    EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id + 1),
+                IsOkAndHolds(IsEmpty()));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  // Verify we can get all of them after destructing and re-initializing.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/12, /*join_info=*/id4),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/6, /*join_info=*/id3),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/5, /*join_info=*/id1),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/5, /*join_info=*/id2))));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id + 1, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id + 1),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       PutAndGetMultipleSchemaTypesAndJoinableProperties) {
+  SchemaTypeId schema_type_id1 = 2;
+  SchemaTypeId schema_type_id2 = 4;
+
+  JoinablePropertyId joinable_property_id1 = 1;
+  JoinablePropertyId joinable_property_id2 = 10;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/3, /*fingerprint=*/12);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/2, /*fingerprint=*/56);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/0, /*fingerprint=*/78);
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+
+    EXPECT_THAT(
+        index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/5,
+                   /*ref_namespace_fingerprint_ids=*/{id1}),
+        IsOk());
+    EXPECT_THAT(
+        index->Put(schema_type_id1, joinable_property_id2, /*document_id=*/5,
+                   /*ref_namespace_fingerprint_ids=*/{id2}),
+        IsOk());
+    EXPECT_THAT(
+        index->Put(schema_type_id2, joinable_property_id1, /*document_id=*/12,
+                   /*ref_namespace_fingerprint_ids=*/{id3}),
+        IsOk());
+    EXPECT_THAT(
+        index->Put(schema_type_id2, joinable_property_id2, /*document_id=*/12,
+                   /*ref_namespace_fingerprint_ids=*/{id4}),
+        IsOk());
+    EXPECT_THAT(index, Pointee(SizeIs(4)));
+
+    EXPECT_THAT(GetJoinData(*index, schema_type_id1, joinable_property_id1),
+                IsOkAndHolds(ElementsAre(
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/5, /*join_info=*/id1))));
+    EXPECT_THAT(GetJoinData(*index, schema_type_id1, joinable_property_id2),
+                IsOkAndHolds(ElementsAre(
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/5, /*join_info=*/id2))));
+    EXPECT_THAT(GetJoinData(*index, schema_type_id2, joinable_property_id1),
+                IsOkAndHolds(ElementsAre(
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/12, /*join_info=*/id3))));
+    EXPECT_THAT(GetJoinData(*index, schema_type_id2, joinable_property_id2),
+                IsOkAndHolds(ElementsAre(
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/12, /*join_info=*/id4))));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  // Verify we can get all of them after destructing and re-initializing.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id1, joinable_property_id1),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/5, /*join_info=*/id1))));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id1, joinable_property_id2),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/5, /*join_info=*/id2))));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id2, joinable_property_id1),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/12, /*join_info=*/id3))));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id2, joinable_property_id2),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/12, /*join_info=*/id4))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, SetLastAddedDocumentId) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  constexpr DocumentId kDocumentId = 100;
+  index->set_last_added_document_id(kDocumentId);
+  EXPECT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+
+  constexpr DocumentId kNextDocumentId = 123;
+  index->set_last_added_document_id(kNextDocumentId);
+  EXPECT_THAT(index->last_added_document_id(), Eq(kNextDocumentId));
+}
+
+TEST_F(
+    QualifiedIdJoinIndexImplV2Test,
+    SetLastAddedDocumentIdShouldIgnoreNewDocumentIdNotGreaterThanTheCurrent) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  constexpr DocumentId kDocumentId = 123;
+  index->set_last_added_document_id(kDocumentId);
+  ASSERT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+
+  constexpr DocumentId kNextDocumentId = 100;
+  ASSERT_THAT(kNextDocumentId, Lt(kDocumentId));
+  index->set_last_added_document_id(kNextDocumentId);
+  // last_added_document_id() should remain unchanged.
+  EXPECT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, Optimize) {
+  // General test for Optimize().
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id1 = 2;
+  SchemaTypeId schema_type_id2 = 5;
+
+  JoinablePropertyId joinable_property_id1 = 11;
+  JoinablePropertyId joinable_property_id2 = 15;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/2, /*fingerprint=*/101);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/3, /*fingerprint=*/102);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/4, /*fingerprint=*/103);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/0, /*fingerprint=*/104);
+  NamespaceFingerprintIdentifier id5(/*namespace_id=*/0, /*fingerprint=*/105);
+  NamespaceFingerprintIdentifier id6(/*namespace_id=*/1, /*fingerprint=*/106);
+  NamespaceFingerprintIdentifier id7(/*namespace_id=*/3, /*fingerprint=*/107);
+  NamespaceFingerprintIdentifier id8(/*namespace_id=*/2, /*fingerprint=*/108);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/3,
+                 /*ref_namespace_fingerprint_ids=*/{id1, id2, id3}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id2, joinable_property_id2, /*document_id=*/5,
+                 /*ref_namespace_fingerprint_ids=*/{id4}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id2, joinable_property_id2, /*document_id=*/8,
+                 /*ref_namespace_fingerprint_ids=*/{id5, id6}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/13,
+                 /*ref_namespace_fingerprint_ids=*/{id7}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/21,
+                 /*ref_namespace_fingerprint_ids=*/{id8}),
+      IsOk());
+  index->set_last_added_document_id(21);
+
+  ASSERT_THAT(index, Pointee(SizeIs(8)));
+
+  // Delete doc id = 5, 13, compress and keep the rest.
+  std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
+  document_id_old_to_new[3] = 0;
+  document_id_old_to_new[8] = 1;
+  document_id_old_to_new[21] = 2;
+
+  // Delete namespace id 1, 2 (and invalidate id1, id6, id8). Reorder namespace
+  // ids [0, 3, 4] to [1, 2, 0].
+  std::vector<NamespaceId> namespace_id_old_to_new(5, kInvalidNamespaceId);
+  namespace_id_old_to_new[0] = 1;
+  namespace_id_old_to_new[3] = 2;
+  namespace_id_old_to_new[4] = 0;
+
+  DocumentId new_last_added_document_id = 2;
+  EXPECT_THAT(index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                              new_last_added_document_id),
+              IsOk());
+  EXPECT_THAT(index, Pointee(SizeIs(3)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+  // Verify GetIterator API should work normally after Optimize().
+  // 1) schema_type_id1, joinable_property_id1:
+  //   - old_doc_id=21, old_ref_namespace_id=2: NOT FOUND
+  //   - old_doc_id=13, old_ref_namespace_id=3: NOT FOUND
+  //   - old_doc_id=3, old_ref_namespace_id=4:
+  //     become new_doc_id=0, new_ref_namespace_id=0
+  //   - old_doc_id=3, old_ref_namespace_id=3:
+  //     become new_doc_id=0, new_ref_namespace_id=2
+  //   - old_doc_id=3, old_ref_namespace_id=2: NOT FOUND
+  //
+  // For new_doc_id=0, it should reorder due to posting list restriction.
+  EXPECT_THAT(
+      GetJoinData(*index, schema_type_id1, joinable_property_id1),
+      IsOkAndHolds(ElementsAre(
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/2, /*fingerprint=*/102)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/0, /*fingerprint=*/103)))));
+
+  // 2) schema_type_id2, joinable_property_id2:
+  //   - old_doc_id=8, old_ref_namespace_id=1: NOT FOUND
+  //   - old_doc_id=8, old_ref_namespace_id=0:
+  //     become new_doc_id=1, new_ref_namespace_id=1
+  //   - old_doc_id=5, old_ref_namespace_id=0: NOT FOUND
+  EXPECT_THAT(
+      GetJoinData(*index, schema_type_id2, joinable_property_id2),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/1, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/1, /*fingerprint=*/105)))));
+
+  // Verify Put API should work normally after Optimize().
+  NamespaceFingerprintIdentifier id9(/*namespace_id=*/1, /*fingerprint=*/109);
+  EXPECT_THAT(
+      index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/99,
+                 /*ref_namespace_fingerprint_ids=*/{id9}),
+      IsOk());
+  index->set_last_added_document_id(99);
+
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(99));
+  EXPECT_THAT(
+      GetJoinData(*index, schema_type_id1, joinable_property_id1),
+      IsOkAndHolds(ElementsAre(
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/99, /*join_info=*/id9),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/2, /*fingerprint=*/102)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/0, /*fingerprint=*/103)))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeDocumentIdChange) {
+  // Specific test for Optimize(): document id compaction.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/101);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/102);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/103);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/104);
+  NamespaceFingerprintIdentifier id5(/*namespace_id=*/1, /*fingerprint=*/105);
+  NamespaceFingerprintIdentifier id6(/*namespace_id=*/1, /*fingerprint=*/106);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/3,
+                 /*ref_namespace_fingerprint_ids=*/{id1, id2}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+                 /*ref_namespace_fingerprint_ids=*/{id3}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/8,
+                 /*ref_namespace_fingerprint_ids=*/{id4}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/13,
+                 /*ref_namespace_fingerprint_ids=*/{id5}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/21,
+                 /*ref_namespace_fingerprint_ids=*/{id6}),
+      IsOk());
+  index->set_last_added_document_id(21);
+
+  ASSERT_THAT(index, Pointee(SizeIs(6)));
+
+  // Delete doc id = 5, 8, compress and keep the rest.
+  std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
+  document_id_old_to_new[3] = 0;
+  document_id_old_to_new[13] = 1;
+  document_id_old_to_new[21] = 2;
+
+  // No change for namespace id.
+  std::vector<NamespaceId> namespace_id_old_to_new = {0, 1};
+
+  DocumentId new_last_added_document_id = 2;
+  EXPECT_THAT(index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                              new_last_added_document_id),
+              IsOk());
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+  // Verify GetIterator API should work normally after Optimize().
+  // - old_doc_id=21, join_info=id6: become doc_id=2, join_info=id6
+  // - old_doc_id=13, join_info=id5: become doc_id=1, join_info=id5
+  // - old_doc_id=8, join_info=id4: NOT FOUND
+  // - old_doc_id=5, join_info=id3: NOT FOUND
+  // - old_doc_id=3, join_info=id2: become doc_id=0, join_info=id2
+  // - old_doc_id=3, join_info=id1: become doc_id=0, join_info=id1
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/2, /*join_info=*/id6),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/1, /*join_info=*/id5),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/0, /*join_info=*/id2),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/0, /*join_info=*/id1))));
+
+  // Verify Put API should work normally after Optimize().
+  NamespaceFingerprintIdentifier id7(/*namespace_id=*/1, /*fingerprint=*/107);
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/99,
+                 /*ref_namespace_fingerprint_ids=*/{id7}),
+      IsOk());
+  index->set_last_added_document_id(99);
+
+  EXPECT_THAT(index, Pointee(SizeIs(5)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(99));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/99, /*join_info=*/id7),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/2, /*join_info=*/id6),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/1, /*join_info=*/id5),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/0, /*join_info=*/id2),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/0, /*join_info=*/id1))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeOutOfRangeDocumentId) {
+  // Specific test for Optimize() for out of range document id.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+  NamespaceFingerprintIdentifier id(/*namespace_id=*/1, /*fingerprint=*/101);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/99,
+                 /*ref_namespace_fingerprint_ids=*/{id}),
+      IsOk());
+  index->set_last_added_document_id(99);
+
+  // Create document_id_old_to_new with size = 1. Optimize should handle out of
+  // range DocumentId properly.
+  std::vector<DocumentId> document_id_old_to_new = {kInvalidDocumentId};
+  std::vector<NamespaceId> namespace_id_old_to_new = {0, 1};
+
+  // There shouldn't be any error due to vector index.
+  EXPECT_THAT(
+      index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                      /*new_last_added_document_id=*/kInvalidDocumentId),
+      IsOk());
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  // Verify all data are discarded after Optimize().
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeDeleteAllDocuments) {
+  // Specific test for Optimize(): delete all document ids.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/101);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/102);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/103);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/104);
+  NamespaceFingerprintIdentifier id5(/*namespace_id=*/1, /*fingerprint=*/105);
+  NamespaceFingerprintIdentifier id6(/*namespace_id=*/1, /*fingerprint=*/106);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/3,
+                 /*ref_namespace_fingerprint_ids=*/{id1, id2}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+                 /*ref_namespace_fingerprint_ids=*/{id3}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/8,
+                 /*ref_namespace_fingerprint_ids=*/{id4}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/13,
+                 /*ref_namespace_fingerprint_ids=*/{id5}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/21,
+                 /*ref_namespace_fingerprint_ids=*/{id6}),
+      IsOk());
+  index->set_last_added_document_id(21);
+
+  ASSERT_THAT(index, Pointee(SizeIs(6)));
+
+  // Delete all documents.
+  std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
+
+  // No change for namespace id.
+  std::vector<NamespaceId> namespace_id_old_to_new = {0, 1};
+
+  EXPECT_THAT(
+      index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                      /*new_last_added_document_id=*/kInvalidDocumentId),
+      IsOk());
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  // Verify all data are discarded after Optimize().
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeNamespaceIdChange) {
+  // Specific test for Optimize(): referenced namespace id compaction.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/3, /*fingerprint=*/101);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/5, /*fingerprint=*/102);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/4, /*fingerprint=*/103);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/0, /*fingerprint=*/104);
+  NamespaceFingerprintIdentifier id5(/*namespace_id=*/2, /*fingerprint=*/105);
+  NamespaceFingerprintIdentifier id6(/*namespace_id=*/1, /*fingerprint=*/106);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/2,
+                 /*ref_namespace_fingerprint_ids=*/{id1}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/3,
+                 /*ref_namespace_fingerprint_ids=*/{id2}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+                 /*ref_namespace_fingerprint_ids=*/{id3}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/8,
+                 /*ref_namespace_fingerprint_ids=*/{id4}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/13,
+                 /*ref_namespace_fingerprint_ids=*/{id5}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/21,
+                 /*ref_namespace_fingerprint_ids=*/{id6}),
+      IsOk());
+  index->set_last_added_document_id(21);
+
+  ASSERT_THAT(index, Pointee(SizeIs(6)));
+
+  // No change for document id.
+  std::vector<DocumentId> document_id_old_to_new(22);
+  std::iota(document_id_old_to_new.begin(), document_id_old_to_new.end(), 0);
+
+  // Delete namespace id 2, 4. Reorder namespace id [0, 1, 3, 5] to [2, 3, 1,
+  // 0].
+  std::vector<NamespaceId> namespace_id_old_to_new(6, kInvalidNamespaceId);
+  namespace_id_old_to_new[0] = 2;
+  namespace_id_old_to_new[1] = 3;
+  namespace_id_old_to_new[3] = 1;
+  namespace_id_old_to_new[5] = 0;
+
+  DocumentId new_last_added_document_id = 21;
+  EXPECT_THAT(index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                              new_last_added_document_id),
+              IsOk());
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+  // Verify GetIterator API should work normally after Optimize().
+  // - id6 (old_namespace_id=1): new_namespace_id=3 (document_id = 21)
+  // - id5 (old_namespace_id=2): NOT FOUND
+  // - id4 (old_namespace_id=0): new_namespace_id=2 (document_id = 8)
+  // - id3 (old_namespace_id=4): NOT FOUND
+  // - id2 (old_namespace_id=5): new_namespace_id=0 (document_id = 3)
+  // - id1 (old_namespace_id=3): new_namespace_id=1 (document_id = 2)
+  EXPECT_THAT(
+      GetJoinData(*index, schema_type_id, joinable_property_id),
+      IsOkAndHolds(ElementsAre(
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/21, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/3, /*fingerprint=*/106)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/8, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/2, /*fingerprint=*/104)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/3, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/0, /*fingerprint=*/102)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/2, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/1, /*fingerprint=*/101)))));
+
+  // Verify Put API should work normally after Optimize().
+  NamespaceFingerprintIdentifier id7(/*namespace_id=*/1, /*fingerprint=*/107);
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/99,
+                 /*ref_namespace_fingerprint_ids=*/{id7}),
+      IsOk());
+  index->set_last_added_document_id(99);
+
+  EXPECT_THAT(index, Pointee(SizeIs(5)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(99));
+  EXPECT_THAT(
+      GetJoinData(*index, schema_type_id, joinable_property_id),
+      IsOkAndHolds(ElementsAre(
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/99, /*join_info=*/id7),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/21, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/3, /*fingerprint=*/106)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/8, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/2, /*fingerprint=*/104)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/3, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/0, /*fingerprint=*/102)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/2, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/1, /*fingerprint=*/101)))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeNamespaceIdChangeShouldReorder) {
+  // Specific test for Optimize(): referenced namespace id reorder.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/0, /*fingerprint=*/101);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/102);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/2, /*fingerprint=*/103);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/104);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/0,
+                 /*ref_namespace_fingerprint_ids=*/{id1, id2, id3}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/1,
+                 /*ref_namespace_fingerprint_ids=*/{id4}),
+      IsOk());
+  index->set_last_added_document_id(1);
+
+  ASSERT_THAT(index, Pointee(SizeIs(4)));
+
+  // No change for document id.
+  std::vector<DocumentId> document_id_old_to_new = {0, 1};
+
+  // Reorder namespace id [0, 1, 2] to [2, 0, 1].
+  std::vector<NamespaceId> namespace_id_old_to_new = {2, 0, 1};
+
+  DocumentId new_last_added_document_id = 1;
+  EXPECT_THAT(index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                              new_last_added_document_id),
+              IsOk());
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+  // Verify GetIterator API should work normally after Optimize().
+  // - id4 (old_namespace_id=1): new_namespace_id=0 (document_id = 1)
+  // - id3 (old_namespace_id=2): new_namespace_id=1 (document_id = 0)
+  // - id2 (old_namespace_id=1): new_namespace_id=0 (document_id = 0)
+  // - id1 (old_namespace_id=0): new_namespace_id=2 (document_id = 0)
+  //
+  // Should reorder to [id4, id1, id3, id2] due to posting list restriction.
+  EXPECT_THAT(
+      GetJoinData(*index, schema_type_id, joinable_property_id),
+      IsOkAndHolds(ElementsAre(
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/1, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/0, /*fingerprint=*/104)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/2, /*fingerprint=*/101)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/1, /*fingerprint=*/103)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/0, /*fingerprint=*/102)))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeOutOfRangeNamespaceId) {
+  // Specific test for Optimize(): out of range referenced namespace id.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+  NamespaceFingerprintIdentifier id(/*namespace_id=*/99, /*fingerprint=*/101);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/0,
+                 /*ref_namespace_fingerprint_ids=*/{id}),
+      IsOk());
+  index->set_last_added_document_id(0);
+
+  // Create namespace_id_old_to_new with size = 1. Optimize should handle out of
+  // range NamespaceId properly.
+  std::vector<DocumentId> document_id_old_to_new = {0};
+  std::vector<NamespaceId> namespace_id_old_to_new = {kInvalidNamespaceId};
+
+  // There shouldn't be any error due to vector index.
+  EXPECT_THAT(
+      index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                      /*new_last_added_document_id=*/kInvalidDocumentId),
+      IsOk());
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  // Verify all data are discarded after Optimize().
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeDeleteAllNamespaces) {
+  // Specific test for Optimize(): delete all referenced namespace ids.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/0, /*fingerprint=*/101);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/102);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/2, /*fingerprint=*/103);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/0,
+                 /*ref_namespace_fingerprint_ids=*/{id1}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/1,
+                 /*ref_namespace_fingerprint_ids=*/{id2}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/2,
+                 /*ref_namespace_fingerprint_ids=*/{id3}),
+      IsOk());
+  index->set_last_added_document_id(3);
+
+  ASSERT_THAT(index, Pointee(SizeIs(3)));
+
+  // No change for document id.
+  std::vector<DocumentId> document_id_old_to_new = {0, 1, 2};
+
+  // Delete all namespaces.
+  std::vector<NamespaceId> namespace_id_old_to_new(3, kInvalidNamespaceId);
+
+  EXPECT_THAT(
+      index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                      /*new_last_added_document_id=*/kInvalidDocumentId),
+      IsOk());
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  // Verify all data are discarded after Optimize().
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, Clear) {
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/12);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/56);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/78);
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+  // Insert some data.
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+      /*ref_namespace_fingerprint_ids=*/{id2, id1}));
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/3, /*joinable_property_id=*/10, /*document_id=*/6,
+      /*ref_namespace_fingerprint_ids=*/{id3}));
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/12,
+      /*ref_namespace_fingerprint_ids=*/{id4}));
+  ASSERT_THAT(index, Pointee(SizeIs(4)));
+  index->set_last_added_document_id(12);
+  ASSERT_THAT(index->last_added_document_id(), Eq(12));
+
+  // After Clear(), last_added_document_id should be set to kInvalidDocumentId,
+  // and the previous added data should be deleted.
+  EXPECT_THAT(index->Clear(), IsOk());
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+  EXPECT_THAT(
+      GetJoinData(*index, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(
+      GetJoinData(*index, /*schema_type_id=*/3, /*joinable_property_id=*/10),
+      IsOkAndHolds(IsEmpty()));
+
+  // Join index should be able to work normally after Clear().
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/20,
+      /*ref_namespace_fingerprint_ids=*/{id4, id2, id1, id3}));
+  index->set_last_added_document_id(20);
+
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(20));
+  EXPECT_THAT(
+      GetJoinData(*index, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id4),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id3),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id2),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id1))));
+
+  ICING_ASSERT_OK(index->PersistToDisk());
+  index.reset();
+
+  // Verify index after reconstructing.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index, QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                                /*pre_mapping_fbv=*/false));
+  EXPECT_THAT(index->last_added_document_id(), Eq(20));
+  EXPECT_THAT(
+      GetJoinData(*index, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id4),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id3),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id2),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id1))));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/qualified-id-join-index.h b/icing/join/qualified-id-join-index.h
new file mode 100644
index 0000000..4e487f9
--- /dev/null
+++ b/icing/join/qualified-id-join-index.h
@@ -0,0 +1,187 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_H_
+#define ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// QualifiedIdJoinIndex: an abstract class to maintain data for qualified id
+// joining.
+class QualifiedIdJoinIndex : public PersistentStorage {
+ public:
+  class JoinDataIteratorBase {
+   public:
+    virtual ~JoinDataIteratorBase() = default;
+
+    virtual libtextclassifier3::Status Advance() = 0;
+
+    virtual const DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>&
+    GetCurrent() const = 0;
+  };
+
+  static constexpr WorkingPathType kWorkingPathType =
+      WorkingPathType::kDirectory;
+
+  // Deletes QualifiedIdJoinIndex under working_path.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+                                            const std::string& working_path) {
+    return PersistentStorage::Discard(filesystem, working_path,
+                                      kWorkingPathType);
+  }
+
+  virtual ~QualifiedIdJoinIndex() override = default;
+
+  // (v1 only) Puts a new data into index: DocJoinInfo (DocumentId,
+  // JoinablePropertyId) references to ref_qualified_id_str (the identifier of
+  // another document).
+  //
+  // REQUIRES: ref_qualified_id_str contains no '\0'.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
+  //   - Any KeyMapper errors
+  virtual libtextclassifier3::Status Put(
+      const DocJoinInfo& doc_join_info,
+      std::string_view ref_qualified_id_str) = 0;
+
+  // (v2 only) Puts a list of referenced NamespaceFingerprintIdentifier into
+  // index, given the DocumentId, SchemaTypeId and JoinablePropertyId.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INVALID_ARGUMENT_ERROR if schema_type_id, joinable_property_id, or
+  //     document_id is invalid
+  //   - Any KeyMapper/FlashIndexStorage errors
+  virtual libtextclassifier3::Status Put(
+      SchemaTypeId schema_type_id, JoinablePropertyId joinable_property_id,
+      DocumentId document_id,
+      std::vector<NamespaceFingerprintIdentifier>&&
+          ref_namespace_fingerprint_ids) = 0;
+
+  // (v1 only) Gets the referenced document's qualified id string by
+  // DocJoinInfo.
+  //
+  // Returns:
+  //   - A qualified id string referenced by the given DocJoinInfo (DocumentId,
+  //     JoinablePropertyId) on success
+  //   - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
+  //   - NOT_FOUND_ERROR if doc_join_info doesn't exist
+  //   - Any KeyMapper errors
+  virtual libtextclassifier3::StatusOr<std::string_view> Get(
+      const DocJoinInfo& doc_join_info) const = 0;
+
+  // (v2 only) Returns a JoinDataIterator for iterating through all join data of
+  // the specified (schema_type_id, joinable_property_id).
+  //
+  // Returns:
+  //   - On success: a JoinDataIterator
+  //   - INVALID_ARGUMENT_ERROR if schema_type_id or joinable_property_id is
+  //     invalid
+  //   - Any KeyMapper/FlashIndexStorage errors
+  virtual libtextclassifier3::StatusOr<std::unique_ptr<JoinDataIteratorBase>>
+  GetIterator(SchemaTypeId schema_type_id,
+              JoinablePropertyId joinable_property_id) const = 0;
+
+  // Reduces internal file sizes by reclaiming space and ids of deleted
+  // documents. Qualified id type joinable index will convert all entries to the
+  // new document ids.
+  //
+  // - document_id_old_to_new: a map for converting old document id to new
+  //   document id.
+  // - namespace_id_old_to_new: a map for converting old namespace id to new
+  //   namespace id.
+  // - new_last_added_document_id: will be used to update the last added
+  //                               document id in the qualified id type joinable
+  //                               index.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error. This could potentially leave the index in
+  //     an invalid state and the caller should handle it properly (e.g. discard
+  //     and rebuild)
+  virtual libtextclassifier3::Status Optimize(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      const std::vector<NamespaceId>& namespace_id_old_to_new,
+      DocumentId new_last_added_document_id) = 0;
+
+  // Clears all data and set last_added_document_id to kInvalidDocumentId.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  virtual libtextclassifier3::Status Clear() = 0;
+
+  virtual bool is_v2() const = 0;
+
+  virtual int32_t size() const = 0;
+
+  virtual bool empty() const = 0;
+
+  virtual DocumentId last_added_document_id() const = 0;
+
+  virtual void set_last_added_document_id(DocumentId document_id) = 0;
+
+ protected:
+  explicit QualifiedIdJoinIndex(const Filesystem& filesystem,
+                                std::string&& working_path)
+      : PersistentStorage(filesystem, std::move(working_path),
+                          kWorkingPathType) {}
+
+  virtual libtextclassifier3::Status PersistStoragesToDisk(
+      bool force) override = 0;
+
+  virtual libtextclassifier3::Status PersistMetadataToDisk(
+      bool force) override = 0;
+
+  virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(
+      bool force) override = 0;
+
+  virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+      bool force) override = 0;
+
+  virtual Crcs& crcs() override = 0;
+  virtual const Crcs& crcs() const override = 0;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_H_
diff --git a/icing/join/qualified-id-join-indexing-handler-v1_test.cc b/icing/join/qualified-id-join-indexing-handler-v1_test.cc
new file mode 100644
index 0000000..9700132
--- /dev/null
+++ b/icing/join/qualified-id-join-indexing-handler-v1_test.cc
@@ -0,0 +1,558 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
+#include "icing/join/qualified-id.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+
+// Schema type for referenced documents: ReferencedType
+static constexpr std::string_view kReferencedType = "ReferencedType";
+static constexpr std::string_view kPropertyName = "name";
+
+// Joinable properties and joinable property id. Joinable property id is
+// determined by the lexicographical order of joinable property path.
+// Schema type with joinable property: FakeType
+static constexpr std::string_view kFakeType = "FakeType";
+static constexpr std::string_view kPropertyQualifiedId = "qualifiedId";
+
+static constexpr JoinablePropertyId kQualifiedIdJoinablePropertyId = 0;
+
+// Schema type with nested joinable properties: NestedType
+static constexpr std::string_view kNestedType = "NestedType";
+static constexpr std::string_view kPropertyNestedDoc = "nested";
+static constexpr std::string_view kPropertyQualifiedId2 = "qualifiedId2";
+
+static constexpr JoinablePropertyId kNestedQualifiedIdJoinablePropertyId = 0;
+static constexpr JoinablePropertyId kQualifiedId2JoinablePropertyId = 1;
+
+static constexpr DocumentId kDefaultDocumentId = 3;
+
+// TODO(b/275121148): remove this test after deprecating
+// QualifiedIdJoinIndexImplV1.
+class QualifiedIdJoinIndexingHandlerV1Test : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    base_dir_ = GetTestTempDir() + "/icing_test";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index";
+    schema_store_dir_ = base_dir_ + "/schema_store";
+    doc_store_dir_ = base_dir_ + "/doc_store";
+
+    ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+                               QualifiedIdJoinIndexImplV1::Create(
+                                   filesystem_, qualified_id_join_index_dir_,
+                                   /*pre_mapping_fbv=*/false,
+                                   /*use_persistent_hash_map=*/false));
+
+    language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        lang_segmenter_,
+        language_segmenter_factory::Create(std::move(segmenter_options)));
+
+    ASSERT_THAT(
+        filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+        IsTrue());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kReferencedType)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyName)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(SchemaTypeConfigBuilder().SetType(kFakeType).AddProperty(
+                PropertyConfigBuilder()
+                    .SetName(kPropertyQualifiedId)
+                    .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                    .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kNestedType)
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kPropertyNestedDoc)
+                            .SetDataTypeDocument(
+                                kFakeType, /*index_nested_properties=*/true)
+                            .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyQualifiedId2)
+                                     .SetDataTypeJoinableString(
+                                         JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()),
+                IsTrue());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
+                              schema_store_.get(),
+                              /*force_recovery_and_revalidate_documents=*/false,
+                              /*namespace_id_fingerprint=*/false,
+                              /*pre_mapping_fbv=*/false,
+                              /*use_persistent_hash_map=*/false,
+                              PortableFileBackedProtoLog<
+                                  DocumentWrapper>::kDeflateCompressionLevel,
+                              /*initialize_stats=*/nullptr));
+    doc_store_ = std::move(create_result.document_store);
+  }
+
+  void TearDown() override {
+    doc_store_.reset();
+    schema_store_.reset();
+    lang_segmenter_.reset();
+    qualified_id_join_index_.reset();
+
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  FakeClock fake_clock_;
+  std::string base_dir_;
+  std::string qualified_id_join_index_dir_;
+  std::string schema_store_dir_;
+  std::string doc_store_dir_;
+
+  std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index_;
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> doc_store_;
+};
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+       CreationWithNullPointerShouldFail) {
+  EXPECT_THAT(
+      QualifiedIdJoinIndexingHandler::Create(
+          /*clock=*/nullptr, doc_store_.get(), qualified_id_join_index_.get()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  EXPECT_THAT(
+      QualifiedIdJoinIndexingHandler::Create(
+          &fake_clock_, /*doc_store=*/nullptr, qualified_id_join_index_.get()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  EXPECT_THAT(
+      QualifiedIdJoinIndexingHandler::Create(
+          &fake_clock_, doc_store_.get(), /*qualified_id_join_index=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test, HandleJoinableProperty) {
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  // Handle document.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      IsOk());
+
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test, HandleNestedJoinableProperty) {
+  DocumentProto referenced_document1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+  DocumentProto referenced_document2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/2")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "two")
+          .Build();
+
+  DocumentProto nested_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "nested_type/1")
+          .SetSchema(std::string(kNestedType))
+          .AddDocumentProperty(
+              std::string(kPropertyNestedDoc),
+              DocumentBuilder()
+                  .SetKey("pkg$db/ns", "nested_fake_type/1")
+                  .SetSchema(std::string(kFakeType))
+                  .AddStringProperty(std::string(kPropertyQualifiedId),
+                                     "pkg$db/ns#ref_type/2")
+                  .Build())
+          .AddStringProperty(std::string(kPropertyQualifiedId2),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                nested_document));
+
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  // Handle nested_document.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(handler->Handle(tokenized_document, kDefaultDocumentId,
+                              /*recovery_mode=*/false,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kNestedQualifiedIdJoinablePropertyId)),
+              IsOkAndHolds("pkg$db/ns#ref_type/2"));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedId2JoinablePropertyId)),
+              IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+       HandleShouldSkipInvalidFormatQualifiedId) {
+  static constexpr std::string_view kInvalidFormatQualifiedId =
+      "invalid_format_qualified_id";
+  ASSERT_THAT(QualifiedId::Parse(kInvalidFormatQualifiedId),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             std::string(kInvalidFormatQualifiedId))
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  // Handle document. Should ignore invalid format qualified id.
+  // Index data should remain unchanged since there is no valid qualified id,
+  // but last_added_document_id should be updated.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test, HandleShouldSkipEmptyQualifiedId) {
+  // Create a document without any qualified id.
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/1")
+                               .SetSchema(std::string(kFakeType))
+                               .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  ASSERT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  // Handle document. Index data should remain unchanged since there is no
+  // qualified id, but last_added_document_id should be updated.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+       HandleInvalidDocumentIdShouldReturnInvalidArgumentError) {
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+
+  // Handling document with kInvalidDocumentId should cause a failure, and both
+  // index data and last_added_document_id should remain unchanged.
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kInvalidDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kInvalidDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Recovery mode should get the same result.
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kInvalidDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kInvalidDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+       HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) {
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+
+  // Handling document with document_id < last_added_document_id should cause a
+  // failure, and both index data and last_added_document_id should remain
+  // unchanged.
+  ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue());
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId - 1,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Handling document with document_id == last_added_document_id should cause a
+  // failure, and both index data and last_added_document_id should remain
+  // unchanged.
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+       HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId) {
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+
+  // Handle document with document_id < last_added_document_id in recovery mode.
+  // We should not get any error, but the handler should ignore the document, so
+  // both index data and last_added_document_id should remain unchanged.
+  ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue());
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId - 1,
+                      /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Handle document with document_id == last_added_document_id in recovery
+  // mode. We should not get any error, but the handler should ignore the
+  // document, so both index data and last_added_document_id should remain
+  // unchanged.
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId,
+                      /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Handle document with document_id > last_added_document_id in recovery mode.
+  // The handler should index this document and update last_added_document_id.
+  ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId + 1), IsTrue());
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId + 1,
+                      /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId + 1));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId + 1, kQualifiedIdJoinablePropertyId)),
+              IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/qualified-id-join-indexing-handler.cc b/icing/join/qualified-id-join-indexing-handler.cc
new file mode 100644
index 0000000..df86cba
--- /dev/null
+++ b/icing/join/qualified-id-join-indexing-handler.cc
@@ -0,0 +1,179 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-indexing-handler.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <optional>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/util/clock.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndexingHandler>>
+QualifiedIdJoinIndexingHandler::Create(
+    const Clock* clock, const DocumentStore* doc_store,
+    QualifiedIdJoinIndex* qualified_id_join_index) {
+  ICING_RETURN_ERROR_IF_NULL(clock);
+  ICING_RETURN_ERROR_IF_NULL(doc_store);
+  ICING_RETURN_ERROR_IF_NULL(qualified_id_join_index);
+
+  return std::unique_ptr<QualifiedIdJoinIndexingHandler>(
+      new QualifiedIdJoinIndexingHandler(clock, doc_store,
+                                         qualified_id_join_index));
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexingHandler::Handle(
+    const TokenizedDocument& tokenized_document, DocumentId document_id,
+    bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
+  std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
+
+  if (!IsDocumentIdValid(document_id)) {
+    return absl_ports::InvalidArgumentError(
+        IcingStringUtil::StringPrintf("Invalid DocumentId %d", document_id));
+  }
+
+  if (qualified_id_join_index_.last_added_document_id() != kInvalidDocumentId &&
+      document_id <= qualified_id_join_index_.last_added_document_id()) {
+    if (recovery_mode) {
+      // Skip the document if document_id <= last_added_document_id in recovery
+      // mode without returning an error.
+      return libtextclassifier3::Status::OK;
+    }
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "DocumentId %d must be greater than last added document_id %d",
+        document_id, qualified_id_join_index_.last_added_document_id()));
+  }
+  qualified_id_join_index_.set_last_added_document_id(document_id);
+
+  if (qualified_id_join_index_.is_v2()) {
+    // v2
+    std::optional<DocumentFilterData> filter_data =
+        doc_store_.GetAliveDocumentFilterData(
+            document_id,
+            /*current_time_ms=*/std::numeric_limits<int64_t>::min());
+    if (!filter_data) {
+      // This should not happen.
+      return absl_ports::InternalError(
+          "Failed to get alive document filter data when indexing");
+    }
+
+    for (const JoinableProperty<std::string_view>& qualified_id_property :
+         tokenized_document.qualified_id_join_properties()) {
+      // Parse all qualified id strings and convert them to
+      // NamespaceFingerprintIdentifier.
+      std::vector<NamespaceFingerprintIdentifier> ref_doc_ns_fingerprint_ids;
+      for (std::string_view ref_qualified_id_str :
+           qualified_id_property.values) {
+        // Attempt to parse qualified id string to make sure the format is
+        // correct.
+        auto ref_qualified_id_or = QualifiedId::Parse(ref_qualified_id_str);
+        if (!ref_qualified_id_or.ok()) {
+          // Skip incorrect format of qualified id string.
+          continue;
+        }
+
+        QualifiedId ref_qualified_id =
+            std::move(ref_qualified_id_or).ValueOrDie();
+        auto ref_namespace_id_or =
+            doc_store_.GetNamespaceId(ref_qualified_id.name_space());
+        if (!ref_namespace_id_or.ok()) {
+          // Skip invalid namespace id.
+          continue;
+        }
+        NamespaceId ref_namespace_id =
+            std::move(ref_namespace_id_or).ValueOrDie();
+
+        ref_doc_ns_fingerprint_ids.push_back(NamespaceFingerprintIdentifier(
+            ref_namespace_id, ref_qualified_id.uri()));
+      }
+
+      // Batch add all join data of this (schema_type_id, joinable_property_id)
+      // into to the index.
+      libtextclassifier3::Status status = qualified_id_join_index_.Put(
+          filter_data->schema_type_id(), qualified_id_property.metadata.id,
+          document_id, std::move(ref_doc_ns_fingerprint_ids));
+      if (!status.ok()) {
+        ICING_LOG(WARNING)
+            << "Failed to add data into qualified id join index v2 due to: "
+            << status.error_message();
+        return status;
+      }
+    }
+  } else {
+    // v1
+    // TODO(b/275121148): deprecate this part after rollout v2.
+    for (const JoinableProperty<std::string_view>& qualified_id_property :
+         tokenized_document.qualified_id_join_properties()) {
+      if (qualified_id_property.values.empty()) {
+        continue;
+      }
+
+      DocJoinInfo info(document_id, qualified_id_property.metadata.id);
+      // Currently we only support single (non-repeated) joinable value under a
+      // property.
+      std::string_view ref_qualified_id_str = qualified_id_property.values[0];
+
+      // Attempt to parse qualified id string to make sure the format is
+      // correct.
+      if (!QualifiedId::Parse(ref_qualified_id_str).ok()) {
+        // Skip incorrect format of qualified id string to save disk space.
+        continue;
+      }
+
+      libtextclassifier3::Status status =
+          qualified_id_join_index_.Put(info, ref_qualified_id_str);
+      if (!status.ok()) {
+        ICING_LOG(WARNING)
+            << "Failed to add data into qualified id join index due to: "
+            << status.error_message();
+        return status;
+      }
+    }
+  }
+
+  if (put_document_stats != nullptr) {
+    put_document_stats->set_qualified_id_join_index_latency_ms(
+        index_timer->GetElapsedMilliseconds());
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/qualified-id-join-indexing-handler.h b/icing/join/qualified-id-join-indexing-handler.h
new file mode 100644
index 0000000..8a11bf9
--- /dev/null
+++ b/icing/join/qualified-id-join-indexing-handler.h
@@ -0,0 +1,78 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_
+#define ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_
+
+#include <memory>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/data-indexing-handler.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/util/clock.h"
+#include "icing/util/tokenized-document.h"
+
+namespace icing {
+namespace lib {
+
+class QualifiedIdJoinIndexingHandler : public DataIndexingHandler {
+ public:
+  // Creates a QualifiedIdJoinIndexingHandler instance which does not take
+  // ownership of any input components. All pointers must refer to valid objects
+  // that outlive the created QualifiedIdJoinIndexingHandler instance.
+  //
+  // Returns:
+  //   - A QualifiedIdJoinIndexingHandler instance on success
+  //   - FAILED_PRECONDITION_ERROR if any of the input pointer is null
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<QualifiedIdJoinIndexingHandler>>
+  Create(const Clock* clock, const DocumentStore* doc_store,
+         QualifiedIdJoinIndex* qualified_id_join_index);
+
+  ~QualifiedIdJoinIndexingHandler() override = default;
+
+  // Handles the joinable qualified id data indexing process: add data into the
+  // qualified id join index.
+  //
+  /// Returns:
+  //   - OK on success.
+  //   - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less
+  //     than or equal to the document_id of a previously indexed document in
+  //     non recovery mode.
+  //   - INTERNAL_ERROR if any other errors occur.
+  //   - Any QualifiedIdJoinIndex errors.
+  libtextclassifier3::Status Handle(
+      const TokenizedDocument& tokenized_document, DocumentId document_id,
+      bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
+
+ private:
+  explicit QualifiedIdJoinIndexingHandler(
+      const Clock* clock, const DocumentStore* doc_store,
+      QualifiedIdJoinIndex* qualified_id_join_index)
+      : DataIndexingHandler(clock),
+        doc_store_(*doc_store),
+        qualified_id_join_index_(*qualified_id_join_index) {}
+
+  const DocumentStore& doc_store_;                 // Does not own.
+  QualifiedIdJoinIndex& qualified_id_join_index_;  // Does not own.
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_
diff --git a/icing/join/qualified-id-join-indexing-handler_test.cc b/icing/join/qualified-id-join-indexing-handler_test.cc
new file mode 100644
index 0000000..53d35c7
--- /dev/null
+++ b/icing/join/qualified-id-join-indexing-handler_test.cc
@@ -0,0 +1,829 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-indexing-handler.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/qualified-id-join-index-impl-v2.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+using ::testing::NotNull;
+
+// Schema type for referenced documents: ReferencedType
+static constexpr std::string_view kReferencedType = "ReferencedType";
+static constexpr std::string_view kPropertyName = "name";
+
+// Joinable properties and joinable property id. Joinable property id is
+// determined by the lexicographical order of joinable property path.
+// Schema type with joinable property: FakeType
+static constexpr std::string_view kFakeType = "FakeType";
+static constexpr std::string_view kPropertyQualifiedId = "qualifiedId";
+
+// Schema type with nested joinable properties: NestedType
+static constexpr std::string_view kNestedType = "NestedType";
+static constexpr std::string_view kPropertyNestedDoc = "nested";
+static constexpr std::string_view kPropertyQualifiedId2 = "qualifiedId2";
+
+class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    base_dir_ = GetTestTempDir() + "/icing_test";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index";
+    schema_store_dir_ = base_dir_ + "/schema_store";
+    doc_store_dir_ = base_dir_ + "/doc_store";
+
+    ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+                               QualifiedIdJoinIndexImplV2::Create(
+                                   filesystem_, qualified_id_join_index_dir_,
+                                   /*pre_mapping_fbv=*/false));
+
+    language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        lang_segmenter_,
+        language_segmenter_factory::Create(std::move(segmenter_options)));
+
+    ASSERT_THAT(
+        filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+        IsTrue());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kReferencedType)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyName)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(SchemaTypeConfigBuilder().SetType(kFakeType).AddProperty(
+                PropertyConfigBuilder()
+                    .SetName(kPropertyQualifiedId)
+                    .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                    .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kNestedType)
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kPropertyNestedDoc)
+                            .SetDataTypeDocument(
+                                kFakeType, /*index_nested_properties=*/true)
+                            .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyQualifiedId2)
+                                     .SetDataTypeJoinableString(
+                                         JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()),
+                IsTrue());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
+                              schema_store_.get(),
+                              /*force_recovery_and_revalidate_documents=*/false,
+                              /*namespace_id_fingerprint=*/true,
+                              /*pre_mapping_fbv=*/false,
+                              /*use_persistent_hash_map=*/false,
+                              PortableFileBackedProtoLog<
+                                  DocumentWrapper>::kDeflateCompressionLevel,
+                              /*initialize_stats=*/nullptr));
+    doc_store_ = std::move(create_result.document_store);
+
+    // Get FakeType related ids.
+    ICING_ASSERT_OK_AND_ASSIGN(fake_type_id_,
+                               schema_store_->GetSchemaTypeId(kFakeType));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        const JoinablePropertyMetadata* metadata1,
+        schema_store_->GetJoinablePropertyMetadata(
+            fake_type_id_, std::string(kPropertyQualifiedId)));
+    ASSERT_THAT(metadata1, NotNull());
+    fake_type_joinable_property_id_ = metadata1->id;
+
+    // Get NestedType related ids.
+    ICING_ASSERT_OK_AND_ASSIGN(nested_type_id_,
+                               schema_store_->GetSchemaTypeId(kNestedType));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        const JoinablePropertyMetadata* metadata2,
+        schema_store_->GetJoinablePropertyMetadata(
+            nested_type_id_,
+            absl_ports::StrCat(kPropertyNestedDoc, ".", kPropertyQualifiedId)));
+    ASSERT_THAT(metadata2, NotNull());
+    nested_type_nested_joinable_property_id_ = metadata2->id;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        const JoinablePropertyMetadata* metadata3,
+        schema_store_->GetJoinablePropertyMetadata(
+            nested_type_id_, std::string(kPropertyQualifiedId2)));
+    ASSERT_THAT(metadata3, NotNull());
+    nested_type_joinable_property_id_ = metadata3->id;
+  }
+
+  void TearDown() override {
+    doc_store_.reset();
+    schema_store_.reset();
+    lang_segmenter_.reset();
+    qualified_id_join_index_.reset();
+
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  FakeClock fake_clock_;
+  std::string base_dir_;
+  std::string qualified_id_join_index_dir_;
+  std::string schema_store_dir_;
+  std::string doc_store_dir_;
+
+  std::unique_ptr<QualifiedIdJoinIndexImplV2> qualified_id_join_index_;
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> doc_store_;
+
+  // FakeType related ids.
+  SchemaTypeId fake_type_id_;
+  JoinablePropertyId fake_type_joinable_property_id_;
+
+  // NestedType related ids.
+  SchemaTypeId nested_type_id_;
+  JoinablePropertyId nested_type_nested_joinable_property_id_;
+  JoinablePropertyId nested_type_joinable_property_id_;
+};
+
+libtextclassifier3::StatusOr<
+    std::vector<QualifiedIdJoinIndexImplV2::JoinDataType>>
+GetJoinData(const QualifiedIdJoinIndexImplV2& index,
+            SchemaTypeId schema_type_id,
+            JoinablePropertyId joinable_property_id) {
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase> iter,
+      index.GetIterator(schema_type_id, joinable_property_id));
+
+  std::vector<QualifiedIdJoinIndexImplV2::JoinDataType> result;
+  while (iter->Advance().ok()) {
+    result.push_back(iter->GetCurrent());
+  }
+
+  return result;
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest, CreationWithNullPointerShouldFail) {
+  EXPECT_THAT(
+      QualifiedIdJoinIndexingHandler::Create(
+          /*clock=*/nullptr, doc_store_.get(), qualified_id_join_index_.get()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  EXPECT_THAT(
+      QualifiedIdJoinIndexingHandler::Create(
+          &fake_clock_, /*doc_store=*/nullptr, qualified_id_join_index_.get()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  EXPECT_THAT(
+      QualifiedIdJoinIndexingHandler::Create(
+          &fake_clock_, doc_store_.get(), /*qualified_id_join_index=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleJoinableProperty) {
+  // Create and put referenced (parent) document. Get its document id and
+  // namespace id.
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+                             doc_store_->Put(referenced_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id,
+      doc_store_->GetNamespaceId(referenced_document.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+      /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+              IsOkAndHolds(ref_doc_id));
+
+  // Create and put (child) document. Also tokenize it.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+
+  // Handle document.
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+
+  // Verify the state of qualified_id_join_index_ after Handle().
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain
+  // [(doc_id, ref_doc_ns_fingerprint_id)].
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/doc_id,
+              /*join_info=*/ref_doc_ns_fingerprint_id))));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleNestedJoinableProperty) {
+  // Create and put referenced (parent) document1. Get its document id and
+  // namespace id.
+  DocumentProto referenced_document1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id1,
+                             doc_store_->Put(referenced_document1));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id1,
+      doc_store_->GetNamespaceId(referenced_document1.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id1(
+      /*namespace_id=*/ref_doc_ns_id1,
+      /*target_str=*/referenced_document1.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id1),
+              IsOkAndHolds(ref_doc_id1));
+
+  // Create and put referenced (parent) document2. Get its document id and
+  // namespace id.
+  DocumentProto referenced_document2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/2")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "two")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id2,
+                             doc_store_->Put(referenced_document2));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id2,
+      doc_store_->GetNamespaceId(referenced_document2.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id2(
+      /*namespace_id=*/ref_doc_ns_id2,
+      /*target_str=*/referenced_document2.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id2),
+              IsOkAndHolds(ref_doc_id2));
+
+  // Create and put (child) document:
+  // - kPropertyNestedDoc.kPropertyQualifiedId refers to referenced_document2.
+  // - kPropertyQualifiedId2 refers to referenced_document1.
+  //
+  // Also tokenize it.
+  DocumentProto nested_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "nested_type/1")
+          .SetSchema(std::string(kNestedType))
+          .AddDocumentProperty(
+              std::string(kPropertyNestedDoc),
+              DocumentBuilder()
+                  .SetKey("pkg$db/ns", "nested_fake_type/1")
+                  .SetSchema(std::string(kFakeType))
+                  .AddStringProperty(std::string(kPropertyQualifiedId),
+                                     "pkg$db/ns#ref_type/2")
+                  .Build())
+          .AddStringProperty(std::string(kPropertyQualifiedId2),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id,
+                             doc_store_->Put(nested_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                nested_document));
+
+  // Handle nested_document.
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+
+  // Verify the state of qualified_id_join_index_ after Handle().
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
+  // (kNestedType, kPropertyNestedDoc.kPropertyQualifiedId) should contain
+  // [(doc_id, ref_doc_ns_fingerprint_id2)].
+  EXPECT_THAT(
+      GetJoinData(
+          *qualified_id_join_index_, /*schema_type_id=*/nested_type_id_,
+          /*joinable_property_id=*/nested_type_nested_joinable_property_id_),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/doc_id,
+              /*join_info=*/ref_doc_ns_fingerprint_id2))));
+  // (kNestedType, kPropertyQualifiedId2) should contain
+  // [(doc_id, ref_doc_ns_fingerprint_id1)].
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/nested_type_id_,
+                  /*joinable_property_id=*/nested_type_joinable_property_id_),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/doc_id,
+              /*join_info=*/ref_doc_ns_fingerprint_id1))));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+       HandleShouldSkipInvalidFormatQualifiedId) {
+  static constexpr std::string_view kInvalidFormatQualifiedId =
+      "invalid_format_qualified_id";
+  ASSERT_THAT(QualifiedId::Parse(kInvalidFormatQualifiedId),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // Create and put (child) document with an invalid format referenced qualified
+  // id. Also tokenize it.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             std::string(kInvalidFormatQualifiedId))
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  // Handle document. Should ignore invalid format qualified id.
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+
+  // Verify the state of qualified_id_join_index_ after Handle(). Index data
+  // should remain unchanged since there is no valid qualified id, but
+  // last_added_document_id should be updated.
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+       HandleShouldSkipNonExistingNamespace) {
+  static constexpr std::string_view kUnknownNamespace = "UnknownNamespace";
+  // Create and put (child) document which references to a parent qualified id
+  // with an unknown namespace. Also tokenize it.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(
+              std::string(kPropertyQualifiedId),
+              absl_ports::StrCat(kUnknownNamespace, "#", "ref_type/1"))
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+
+  // Handle document.
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+
+  // Verify the state of qualified_id_join_index_ after Handle().
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should be empty since
+  // "UnknownNamespace#ref_type/1" should be skipped.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleShouldSkipEmptyQualifiedId) {
+  // Create and put (child) document without any qualified id. Also tokenize it.
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/1")
+                               .SetSchema(std::string(kFakeType))
+                               .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  ASSERT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+
+  // Handle document.
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+
+  // Verify the state of qualified_id_join_index_ after Handle(). Index data
+  // should remain unchanged since there is no qualified id, but
+  // last_added_document_id should be updated.
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+       HandleInvalidDocumentIdShouldReturnInvalidArgumentError) {
+  // Create and put referenced (parent) document. Get its document id and
+  // namespace id.
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+                             doc_store_->Put(referenced_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id,
+      doc_store_->GetNamespaceId(referenced_document.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+      /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+              IsOkAndHolds(ref_doc_id));
+
+  // Create and put (child) document. Also tokenize it.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK(doc_store_->Put(document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+
+  qualified_id_join_index_->set_last_added_document_id(ref_doc_id);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(ref_doc_id));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+
+  // Handling document with kInvalidDocumentId should cause a failure.
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kInvalidDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  // Verify the state of qualified_id_join_index_ after Handle(). Both index
+  // data and last_added_document_id should remain unchanged.
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(ref_doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
+
+  // Recovery mode should get the same result.
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kInvalidDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(ref_doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+       HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) {
+  // Create and put referenced (parent) document. Get its document id and
+  // namespace id.
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+                             doc_store_->Put(referenced_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id,
+      doc_store_->GetNamespaceId(referenced_document.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+      /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+              IsOkAndHolds(ref_doc_id));
+
+  // Create and put (child) document. Also tokenize it.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+
+  // Handling document with document_id == last_added_document_id should cause a
+  // failure.
+  qualified_id_join_index_->set_last_added_document_id(doc_id);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  // Verify the state of qualified_id_join_index_ after Handle(). Both index
+  // data and last_added_document_id should remain unchanged.
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
+
+  // Handling document with document_id < last_added_document_id should cause a
+  // failure.
+  qualified_id_join_index_->set_last_added_document_id(doc_id + 1);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(doc_id + 1));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  // Verify the state of qualified_id_join_index_ after Handle(). Both index
+  // data and last_added_document_id should remain unchanged.
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(doc_id + 1));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+       HandleRecoveryModeShouldIndexDocsGtLastAddedDocId) {
+  // Create and put referenced (parent) document. Get its document id and
+  // namespace id.
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+                             doc_store_->Put(referenced_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id,
+      doc_store_->GetNamespaceId(referenced_document.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+      /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+              IsOkAndHolds(ref_doc_id));
+
+  // Create and put (child) document. Also tokenize it.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+
+  // Handle document with document_id > last_added_document_id in recovery mode.
+  // The handler should index this document and update last_added_document_id.
+  qualified_id_join_index_->set_last_added_document_id(doc_id - 1);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(doc_id - 1));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/doc_id,
+              /*join_info=*/ref_doc_ns_fingerprint_id))));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+       HandleRecoveryModeShouldIgnoreDocsLeLastAddedDocId) {
+  // Create and put referenced (parent) document. Get its document id and
+  // namespace id.
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+                             doc_store_->Put(referenced_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id,
+      doc_store_->GetNamespaceId(referenced_document.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+      /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+              IsOkAndHolds(ref_doc_id));
+
+  // Create and put (child) document. Also tokenize it.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+
+  // Handle document with document_id == last_added_document_id in recovery
+  // mode. We should not get any error, but the handler should ignore the
+  // document, so both index data and last_added_document_id should remain
+  // unchanged.
+  qualified_id_join_index_->set_last_added_document_id(doc_id);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
+
+  // Handle document with document_id < last_added_document_id in recovery mode.
+  // We should not get any error, but the handler should ignore the document, so
+  // both index data and last_added_document_id should remain unchanged.
+  qualified_id_join_index_->set_last_added_document_id(doc_id + 1);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(doc_id + 1));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(doc_id + 1));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/qualified-id.cc b/icing/join/qualified-id.cc
new file mode 100644
index 0000000..42e080c
--- /dev/null
+++ b/icing/join/qualified-id.cc
@@ -0,0 +1,110 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id.h"
+
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Since we use '#' as the separator and '\' to escape '\' and '#', only these 2
+// characters are considered special characters to parse qualified id.
+bool IsSpecialCharacter(char c) {
+  return c == QualifiedId::kEscapeChar ||
+         c == QualifiedId::kNamespaceUriSeparator;
+}
+
+// Helper function to verify the format (check the escape format and make sure
+// number of separator '#' is 1) and find the position of the unique separator.
+//
+// Returns:
+//   A valid index of the separator on success.
+//   std::string::npos if the escape format of content is incorrect.
+//   std::string::npos if the content contains 0 or more than 1 separators.
+//   std::string::npos if the content contains '\0'.
+size_t VerifyFormatAndGetSeparatorPosition(std::string_view content) {
+  size_t separator_pos = std::string::npos;
+  for (size_t i = 0; i < content.length(); ++i) {
+    if (content[i] == '\0') {
+      return std::string::npos;
+    }
+
+    if (content[i] == QualifiedId::kEscapeChar) {
+      // Advance to the next character.
+      ++i;
+      if (i >= content.length() || !IsSpecialCharacter(content[i])) {
+        // Invalid escape format.
+        return std::string::npos;
+      }
+    } else if (content[i] == QualifiedId::kNamespaceUriSeparator) {
+      if (separator_pos != std::string::npos) {
+        // Found another separator, so return std::string::npos since only one
+        // separator is allowed.
+        return std::string::npos;
+      }
+      separator_pos = i;
+    }
+  }
+  return separator_pos;
+}
+
+// Helper function to unescape the content.
+libtextclassifier3::StatusOr<std::string> Unescape(std::string_view content) {
+  std::string unescaped_content;
+  for (size_t i = 0; i < content.length(); ++i) {
+    if (content[i] == QualifiedId::kEscapeChar) {
+      // Advance to the next character.
+      ++i;
+      if (i >= content.length() || !IsSpecialCharacter(content[i])) {
+        // Invalid escape format.
+        return absl_ports::InvalidArgumentError("Invalid escape format");
+      }
+    }
+    unescaped_content += content[i];
+  }
+  return unescaped_content;
+}
+
+}  // namespace
+
+/* static */ libtextclassifier3::StatusOr<QualifiedId> QualifiedId::Parse(
+    std::string_view qualified_id_str) {
+  size_t separator_pos = VerifyFormatAndGetSeparatorPosition(qualified_id_str);
+  if (separator_pos == std::string::npos) {
+    return absl_ports::InvalidArgumentError(
+        "Failed to find the position of separator");
+  }
+
+  if (separator_pos == 0 || separator_pos + 1 >= qualified_id_str.length()) {
+    return absl_ports::InvalidArgumentError(
+        "Namespace or uri cannot be empty after parsing");
+  }
+
+  ICING_ASSIGN_OR_RETURN(std::string name_space,
+                         Unescape(qualified_id_str.substr(0, separator_pos)));
+  ICING_ASSIGN_OR_RETURN(std::string uri,
+                         Unescape(qualified_id_str.substr(separator_pos + 1)));
+  return QualifiedId(std::move(name_space), std::move(uri));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/qualified-id.h b/icing/join/qualified-id.h
new file mode 100644
index 0000000..eb6606a
--- /dev/null
+++ b/icing/join/qualified-id.h
@@ -0,0 +1,65 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_QUALIFIED_ID_H_
+#define ICING_JOIN_QUALIFIED_ID_H_
+
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+
+namespace icing {
+namespace lib {
+
+// QualifiedId definition: namespace and uri.
+// This is a wrapper class for parsing qualified id string.
+//
+// Qualified id string format: escape(namespace) + '#' + escape(uri).
+// - Use '#' as the separator to concat namespace and uri
+// - Use '\' to escape '\' and '#' in namespace and uri.
+// - There should be 1 separator '#' in a qualified string, and the rest part
+//   should have correct escape format.
+// - Raw namespace and uri cannot be empty.
+class QualifiedId {
+ public:
+  static constexpr char kEscapeChar = '\\';
+  static constexpr char kNamespaceUriSeparator = '#';
+
+  // Parses a qualified id string "<escaped(namespace)>#<escaped(uri)>" and
+  // creates an instance of QualifiedId.
+  //
+  // qualified_id_str: a qualified id string having the format mentioned above.
+  //
+  // Returns:
+  //   - A QualifiedId instance with raw namespace and uri, on success.
+  //   - INVALID_ARGUMENT_ERROR if the format of qualified_id_str is incorrect.
+  static libtextclassifier3::StatusOr<QualifiedId> Parse(
+      std::string_view qualified_id_str);
+
+  explicit QualifiedId(std::string name_space, std::string uri)
+      : name_space_(std::move(name_space)), uri_(std::move(uri)) {}
+
+  const std::string& name_space() const { return name_space_; }
+  const std::string& uri() const { return uri_; }
+
+ private:
+  std::string name_space_;
+  std::string uri_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_QUALIFIED_ID_H_
diff --git a/icing/join/qualified-id_test.cc b/icing/join/qualified-id_test.cc
new file mode 100644
index 0000000..92bf63e
--- /dev/null
+++ b/icing/join/qualified-id_test.cc
@@ -0,0 +1,159 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id.h"
+
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+
+TEST(QualifiedIdTest, ValidQualifiedIdWithoutSpecialCharacters) {
+  // "namespace#uri" -> "namespace" + "uri"
+  ICING_ASSERT_OK_AND_ASSIGN(QualifiedId id,
+                             QualifiedId::Parse(R"(namespace#uri)"));
+  EXPECT_THAT(id.name_space(), Eq(R"(namespace)"));
+  EXPECT_THAT(id.uri(), R"(uri)");
+}
+
+TEST(QualifiedIdTest, ValidQualifiedIdWithEscapedSpecialCharacters) {
+  // "namespace\\#uri" -> "namespace\" + "uri"
+  ICING_ASSERT_OK_AND_ASSIGN(QualifiedId id1,
+                             QualifiedId::Parse(R"(namespace\\#uri)"));
+  EXPECT_THAT(id1.name_space(), Eq(R"(namespace\)"));
+  EXPECT_THAT(id1.uri(), R"(uri)");
+
+  // "namespace\\\##uri" -> "namespace\#" + "uri"
+  ICING_ASSERT_OK_AND_ASSIGN(QualifiedId id2,
+                             QualifiedId::Parse(R"(namespace\\\##uri)"));
+  EXPECT_THAT(id2.name_space(), Eq(R"(namespace\#)"));
+  EXPECT_THAT(id2.uri(), R"(uri)");
+
+  // "namespace#\#\\uri" -> "namespace" + "#\uri"
+  ICING_ASSERT_OK_AND_ASSIGN(QualifiedId id3,
+                             QualifiedId::Parse(R"(namespace#\#\\uri)"));
+  EXPECT_THAT(id3.name_space(), Eq(R"(namespace)"));
+  EXPECT_THAT(id3.uri(), R"(#\uri)");
+
+  // "namespace\\\##\#\\uri" -> "namespace\#" + "#\uri"
+  ICING_ASSERT_OK_AND_ASSIGN(QualifiedId id4,
+                             QualifiedId::Parse(R"(namespace\\\##\#\\uri)"));
+  EXPECT_THAT(id4.name_space(), Eq(R"(namespace\#)"));
+  EXPECT_THAT(id4.uri(), R"(#\uri)");
+}
+
+TEST(QualifiedIdTest, InvalidQualifiedIdWithEmptyNamespaceOrUri) {
+  // "#uri"
+  EXPECT_THAT(QualifiedId::Parse(R"(#uri)"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // "namespace#"
+  EXPECT_THAT(QualifiedId::Parse(R"(namespace#)"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // "#"
+  EXPECT_THAT(QualifiedId::Parse(R"(#)"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(QualifiedIdTest, InvalidQualifiedIdWithInvalidEscape) {
+  // "namespace\"
+  // Add an additional '#' and use string_view trick to cover the index safe
+  // check when skipping the last '\'.
+  std::string str1 = R"(namespace\)"
+                     R"(#)";
+  EXPECT_THAT(
+      QualifiedId::Parse(std::string_view(str1.data(), str1.length() - 1)),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // "names\pace#uri"
+  EXPECT_THAT(QualifiedId::Parse(R"(names\pace#uri)"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // "names\\\pace#uri"
+  EXPECT_THAT(QualifiedId::Parse(R"(names\\\pace#uri)"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // "namespace#uri\"
+  // Add an additional '#' and use string_view trick to cover the index safe
+  // check when skipping the last '\'.
+  std::string str2 = R"(namespace#uri\)"
+                     R"(#)";
+  EXPECT_THAT(
+      QualifiedId::Parse(std::string_view(str2.data(), str2.length() - 1)),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(QualifiedIdTest, InvalidQualifiedIdWithWrongNumberOfSeparators) {
+  // ""
+  EXPECT_THAT(QualifiedId::Parse(R"()"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // "namespaceuri"
+  EXPECT_THAT(QualifiedId::Parse(R"(namespaceuri)"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // "namespace##uri"
+  EXPECT_THAT(QualifiedId::Parse(R"(namespace##uri)"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // "namespace#uri#others"
+  EXPECT_THAT(QualifiedId::Parse(R"(namespace#uri#others)"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // "namespace\#uri"
+  EXPECT_THAT(QualifiedId::Parse(R"(namespace\#uri)"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // "namespace\\##uri"
+  EXPECT_THAT(QualifiedId::Parse(R"(namespace\\##uri)"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // "namespace#uri\\#others"
+  EXPECT_THAT(QualifiedId::Parse(R"(namespace#uri\\#)"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(QualifiedIdTest, InvalidQualifiedIdWithStringTerminator) {
+  const char invalid_qualified_id1[] = "names\0pace#uri";
+  EXPECT_THAT(QualifiedId::Parse(std::string_view(invalid_qualified_id1, 14)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  const char invalid_qualified_id2[] = "namespace#ur\0i";
+  EXPECT_THAT(QualifiedId::Parse(std::string_view(invalid_qualified_id2, 14)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  const char invalid_qualified_id3[] = "\0namespace#uri";
+  EXPECT_THAT(QualifiedId::Parse(std::string_view(invalid_qualified_id3, 14)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  const char invalid_qualified_id4[] = "namespace#uri\0";
+  EXPECT_THAT(QualifiedId::Parse(std::string_view(invalid_qualified_id4, 14)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/legacy/core/icing-core-types.h b/icing/legacy/core/icing-core-types.h
index cc12663..7db8408 100644
--- a/icing/legacy/core/icing-core-types.h
+++ b/icing/legacy/core/icing-core-types.h
@@ -21,9 +21,8 @@
 #ifndef ICING_LEGACY_CORE_ICING_CORE_TYPES_H_
 #define ICING_LEGACY_CORE_ICING_CORE_TYPES_H_
 
-#include <stdint.h>
-
 #include <cstddef>  // size_t not defined implicitly for all platforms.
+#include <cstdint>
 #include <vector>
 
 #include "icing/legacy/core/icing-compat.h"
diff --git a/icing/legacy/core/icing-string-util.cc b/icing/legacy/core/icing-string-util.cc
index 1954cd3..ed06e03 100644
--- a/icing/legacy/core/icing-string-util.cc
+++ b/icing/legacy/core/icing-string-util.cc
@@ -11,21 +11,13 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-
-// Copyright 2011 Google Inc. All Rights Reserved.
-// Author: ulas@google.com (Ulas Kirazci)
-//         sbanacho@google.com (Scott Banachowski)
-//
-// This is a list of IsGoogleLetter letters. It is copied from
-// google3/util/utf8/proptables/letters.txt CL 19164202.
 #include "icing/legacy/core/icing-string-util.h"
 
-#include <stdarg.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-
 #include <algorithm>
+#include <cstdarg>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
 #include <string>
 
 #include "icing/legacy/portable/icing-zlib.h"
@@ -34,7 +26,6 @@
 namespace icing {
 namespace lib {
 
-namespace {}  // namespace
 uint32_t IcingStringUtil::UpdateCrc32(uint32_t crc, const char *str, int len) {
   if (len > 0) {
     crc = ~crc32(~crc, reinterpret_cast<const Bytef *>(str), len);
diff --git a/icing/legacy/core/icing-string-util.h b/icing/legacy/core/icing-string-util.h
index 4ea93ec..e5e4941 100644
--- a/icing/legacy/core/icing-string-util.h
+++ b/icing/legacy/core/icing-string-util.h
@@ -12,16 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Copyright 2011 Google Inc. All Rights Reserved.
-// Author: ulas@google.com (Ulas Kirazci)
-//         sbanacho@google.com (Scott Banachowski)
-
 #ifndef ICING_LEGACY_CORE_ICING_STRING_UTIL_H_
 #define ICING_LEGACY_CORE_ICING_STRING_UTIL_H_
 
-#include <stdarg.h>
-#include <stdint.h>
-
+#include <cstdarg>
+#include <cstdint>
 #include <string>
 
 #include "icing/legacy/core/icing-compat.h"
diff --git a/icing/legacy/core/icing-timer.h b/icing/legacy/core/icing-timer.h
index 49ba9ad..af38912 100644
--- a/icing/legacy/core/icing-timer.h
+++ b/icing/legacy/core/icing-timer.h
@@ -16,7 +16,8 @@
 #define ICING_LEGACY_CORE_ICING_TIMER_H_
 
 #include <sys/time.h>
-#include <time.h>
+
+#include <ctime>
 
 namespace icing {
 namespace lib {
diff --git a/icing/legacy/index/icing-array-storage.cc b/icing/legacy/index/icing-array-storage.cc
index b462135..de5178a 100644
--- a/icing/legacy/index/icing-array-storage.cc
+++ b/icing/legacy/index/icing-array-storage.cc
@@ -14,10 +14,10 @@
 
 #include "icing/legacy/index/icing-array-storage.h"
 
-#include <inttypes.h>
 #include <sys/mman.h>
 
 #include <algorithm>
+#include <cinttypes>
 
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/legacy/core/icing-timer.h"
@@ -65,17 +65,13 @@ bool IcingArrayStorage::Init(int fd, size_t fd_offset, bool map_shared,
     return false;
   }
   if (file_size < fd_offset) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Array storage file size %" PRIu64 " less than offset %zu", file_size,
-        fd_offset);
+    ICING_LOG(ERROR) << "Array storage file size " << file_size << " less than offset " << fd_offset;
     return false;
   }
 
   uint32_t capacity_num_elts = (file_size - fd_offset) / elt_size;
   if (capacity_num_elts < num_elts) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Array storage num elts %u > capacity num elts %u", num_elts,
-        capacity_num_elts);
+    ICING_LOG(ERROR) << "Array storage num elts " << num_elts << " > capacity num elts " << capacity_num_elts;
     return false;
   }
 
@@ -108,8 +104,7 @@ bool IcingArrayStorage::Init(int fd, size_t fd_offset, bool map_shared,
     if (init_crc) {
       *crc_ptr_ = crc;
     } else if (crc != *crc_ptr_) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-          "Array storage bad crc %u vs %u", crc, *crc_ptr_);
+      ICING_LOG(ERROR) << "Array storage bad crc " << crc << " vs " << *crc_ptr_;
       goto failed;
     }
   }
@@ -276,9 +271,9 @@ void IcingArrayStorage::UpdateCrc() {
     cur_offset += change.elt_len * elt_size_;
   }
   if (!changes_.empty()) {
-    ICING_VLOG(2) << IcingStringUtil::StringPrintf(
-        "Array update partial crcs %d truncated %d overlapped %d duplicate %d",
-        num_partial_crcs, num_truncated, num_overlapped, num_duplicate);
+    ICING_VLOG(2) << "Array update partial crcs " << num_partial_crcs
+        << " truncated " << num_truncated << " overlapped " << num_overlapped
+        << " duplicate " << num_duplicate;
   }
 
   // Now update with grown area.
@@ -286,8 +281,7 @@ void IcingArrayStorage::UpdateCrc() {
     cur_crc = IcingStringUtil::UpdateCrc32(
         cur_crc, array_cast<char>() + changes_end_ * elt_size_,
         (cur_num_ - changes_end_) * elt_size_);
-    ICING_VLOG(2) << IcingStringUtil::StringPrintf(
-        "Array update tail crc offset %u -> %u", changes_end_, cur_num_);
+    ICING_VLOG(2) << "Array update tail crc offset " << changes_end_ << " -> " << cur_num_;
   }
 
   // Clear, now that we've applied changes.
@@ -341,8 +335,7 @@ uint32_t IcingArrayStorage::Sync() {
         if (pwrite(fd_, array() + dirty_start, dirty_end - dirty_start,
                    fd_offset_ + dirty_start) !=
             static_cast<ssize_t>(dirty_end - dirty_start)) {
-          ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-              "Flushing pages failed (%u, %u)", dirty_start, dirty_end);
+          ICING_LOG(ERROR) << "Flushing pages failed (" << dirty_start << ", " << dirty_end << ")";
         }
         in_dirty = false;
       } else if (!in_dirty && is_dirty) {
@@ -361,8 +354,7 @@ uint32_t IcingArrayStorage::Sync() {
       if (pwrite(fd_, array() + dirty_start, dirty_end - dirty_start,
                  fd_offset_ + dirty_start) !=
           static_cast<ssize_t>(dirty_end - dirty_start)) {
-        ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-            "Flushing pages failed (%u, %u)", dirty_start, dirty_end);
+        ICING_LOG(ERROR) << "Flushing pages failed (" << dirty_start << ", " << dirty_end << ")";
       }
     }
 
@@ -377,9 +369,7 @@ uint32_t IcingArrayStorage::Sync() {
     }
 
     if (num_flushed > 0) {
-      ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-          "Flushing %u/%u %u contiguous pages in %.3fms", num_flushed,
-          dirty_pages_size, num_contiguous, timer.Elapsed() * 1000.);
+      ICING_VLOG(1) << "Flushing " << num_flushed << "/" << dirty_pages_size << " " << num_contiguous << " contiguous pages in " << timer.Elapsed() * 1000 << "ms.";
     }
 
     return num_flushed;
diff --git a/icing/legacy/index/icing-array-storage.h b/icing/legacy/index/icing-array-storage.h
index fad0565..0d93172 100644
--- a/icing/legacy/index/icing-array-storage.h
+++ b/icing/legacy/index/icing-array-storage.h
@@ -20,8 +20,7 @@
 #ifndef ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_
 #define ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_
 
-#include <stdint.h>
-
+#include <cstdint>
 #include <string>
 #include <vector>
 
diff --git a/icing/legacy/index/icing-bit-util.h b/icing/legacy/index/icing-bit-util.h
index 3273a68..d0c3f50 100644
--- a/icing/legacy/index/icing-bit-util.h
+++ b/icing/legacy/index/icing-bit-util.h
@@ -20,9 +20,8 @@
 #ifndef ICING_LEGACY_INDEX_ICING_BIT_UTIL_H_
 #define ICING_LEGACY_INDEX_ICING_BIT_UTIL_H_
 
-#include <stdint.h>
-#include <stdio.h>
-
+#include <cstdint>
+#include <cstdio>
 #include <limits>
 #include <vector>
 
diff --git a/icing/legacy/index/icing-common-types.h b/icing/legacy/index/icing-common-types.h
deleted file mode 100644
index 592b549..0000000
--- a/icing/legacy/index/icing-common-types.h
+++ /dev/null
@@ -1,129 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Copyright 2014 Google Inc. All Rights Reserved.
-// Author: sbanacho@google.com (Scott Banachowski)
-// Author: csyoung@google.com (C. Sean Young)
-
-#ifndef ICING_LEGACY_INDEX_ICING_COMMON_TYPES_H_
-#define ICING_LEGACY_INDEX_ICING_COMMON_TYPES_H_
-
-#include "icing/legacy/core/icing-core-types.h"
-
-// Protocol buffers are shared across several components.
-namespace com {
-namespace google {
-namespace android {
-namespace gms {
-namespace icing {
-namespace lib {
-
-class ClientFileGroup;
-class Document;
-class Document_Section;
-class DocumentStoreStatusProto;
-class IMEUpdate;
-class IMEUpdateResponse;
-class IndexCorpusScoringConfig;
-class IndexCorpusScoringConfig_Section;
-class IndexScoringConfig;
-class InitStatus;
-class InitStatus_CorpusInitInfo;
-class PendingDeleteUsageReport;
-class PhraseAffinityRequest;
-class QueryResponse;
-class QueryResponse_Corpus;
-class QueryResponse_Corpus_Section;
-class QueryResponse_Corpus_Tag;
-class QueryRequestSpec;
-class QueryRequestSpec_CorpusSpec;
-class QueryRequestSpec_SectionSpec;
-class ResponseDebugInfo;
-class ResultDebugInfo;
-class SectionConfig;
-class SuggestionResponse;
-class SuggestionResponse_Suggestion;
-class UsageReportsResponse;
-class UsageStats;
-class UsageStats_Corpus;
-
-}  // namespace lib
-}  // namespace icing
-}  // namespace gms
-}  // namespace android
-}  // namespace google
-}  // namespace com
-
-namespace icing {
-namespace lib {
-
-// Typedefs.
-using IcingDocId = uint32_t;
-
-using IcingSectionId = uint32_t;
-
-using IcingCorpusId = uint16_t;
-using IcingSectionIdMask = uint16_t;
-
-using IcingTagsCount = uint16_t;
-
-using IcingSequenceNumber = int64_t;
-
-using IcingScore = uint64_t;
-
-constexpr size_t kIcingMaxTokenLen = 30;  // default shared between query
-                                          // processor and indexer
-constexpr int kIcingQueryTermLimit = 50;  // Maximum number of terms in a query
-constexpr int kIcingMaxVariantsPerToken = 10;  // Maximum number of variants
-
-// LINT.IfChange
-constexpr int kIcingDocIdBits = 20;  // 1M docs
-constexpr IcingDocId kIcingInvalidDocId = (1u << kIcingDocIdBits) - 1;
-constexpr IcingDocId kIcingMaxDocId = kIcingInvalidDocId - 1;
-// LINT.ThenChange(//depot/google3/wireless/android/icing/plx/google_sql_common_macros.sql)
-
-constexpr int kIcingDocScoreBits = 32;
-
-constexpr int kIcingSectionIdBits = 4;  // 4 bits for 16 values
-constexpr IcingSectionId kIcingMaxSectionId = (1u << kIcingSectionIdBits) - 1;
-constexpr IcingSectionId kIcingInvalidSectionId = kIcingMaxSectionId + 1;
-constexpr IcingSectionIdMask kIcingSectionIdMaskAll = ~IcingSectionIdMask{0};
-constexpr IcingSectionIdMask kIcingSectionIdMaskNone = IcingSectionIdMask{0};
-
-constexpr int kIcingCorpusIdBits = 15;  // 32K
-constexpr IcingCorpusId kIcingInvalidCorpusId = (1u << kIcingCorpusIdBits) - 1;
-constexpr IcingCorpusId kIcingMaxCorpusId = kIcingInvalidCorpusId - 1;
-
-constexpr size_t kIcingMaxSearchableDocumentSize = (1u << 16) - 1;  // 64K
-// Max num tokens per document. 64KB is our original maximum (searchable)
-// document size. We clip if document exceeds this.
-constexpr uint32_t kIcingMaxNumTokensPerDoc =
-    kIcingMaxSearchableDocumentSize / 5;
-constexpr uint32_t kIcingMaxNumHitsPerDocument =
-    kIcingMaxNumTokensPerDoc * kIcingMaxVariantsPerToken;
-
-constexpr IcingTagsCount kIcingInvalidTagCount = ~IcingTagsCount{0};
-constexpr IcingTagsCount kIcingMaxTagCount = kIcingInvalidTagCount - 1;
-
-// Location refers to document storage.
-constexpr uint64_t kIcingInvalidLocation = ~uint64_t{0};
-constexpr uint64_t kIcingMaxDocStoreWriteLocation = uint64_t{1}
-                                                    << 32;  // 4bytes.
-
-// Dump symbols in the proto namespace.
-using namespace ::com::google::android::gms::icing;  // NOLINT(build/namespaces)
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_LEGACY_INDEX_ICING_COMMON_TYPES_H_
diff --git a/icing/legacy/index/icing-dynamic-trie.cc b/icing/legacy/index/icing-dynamic-trie.cc
index ee3d3a2..378b666 100644
--- a/icing/legacy/index/icing-dynamic-trie.cc
+++ b/icing/legacy/index/icing-dynamic-trie.cc
@@ -62,18 +62,20 @@
 
 #include "icing/legacy/index/icing-dynamic-trie.h"
 
-#include <errno.h>
 #include <fcntl.h>
-#include <inttypes.h>
-#include <string.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
 #include <algorithm>
+#include <cerrno>
+#include <cinttypes>
+#include <cstdint>
+#include <cstring>
 #include <memory>
 #include <utility>
 
+#include "icing/absl_ports/canonical_errors.h"
 #include "icing/legacy/core/icing-packed-pod.h"
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/legacy/core/icing-timer.h"
@@ -81,9 +83,11 @@
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/legacy/index/icing-flash-bitmap.h"
 #include "icing/legacy/index/icing-mmapper.h"
+#include "icing/legacy/index/proto/icing-dynamic-trie-header.pb.h"
 #include "icing/util/i18n-utils.h"
 #include "icing/util/logging.h"
 #include "icing/util/math-util.h"
+#include "icing/util/status-macros.h"
 
 using std::inplace_merge;
 using std::lower_bound;
@@ -96,14 +100,22 @@ using std::vector;
 namespace icing {
 namespace lib {
 
+namespace {
+constexpr uint32_t kInvalidNodeIndex = (1U << 24) - 1;
+constexpr uint32_t kInvalidNextIndex = ~0U;
+
+void ResetMutableNext(IcingDynamicTrie::Next &mutable_next) {
+  mutable_next.set_val(0xff);
+  mutable_next.set_node_index(kInvalidNodeIndex);
+}
+}  // namespace
+
 // Based on the bit field widths.
 const uint32_t IcingDynamicTrie::Options::kMaxNodes = (1U << 24) - 1;
 const uint32_t IcingDynamicTrie::Options::kMaxNexts = (1U << 27) - 1;
 const uint32_t IcingDynamicTrie::Options::kMaxSuffixesSize = 1U << 27;
 const uint32_t IcingDynamicTrie::Options::kMaxValueSize = 1U << 16;
 
-const uint32_t IcingDynamicTrie::kInvalidNodeIndex = (1U << 24) - 1;
-const uint32_t IcingDynamicTrie::kInvalidNextIndex = ~0U;
 const uint32_t IcingDynamicTrie::kInvalidSuffixIndex = ~0U;
 
 const int IcingDynamicTrie::kMaxNextArraySize;
@@ -298,7 +310,7 @@ class IcingDynamicTrie::IcingDynamicTrieStorage {
   // REQUIRES: nodes_left() > 0.
   Node *AllocNode();
   // REQUIRES: nexts_left() >= kMaxNextArraySize.
-  Next *AllocNextArray(int size);
+  libtextclassifier3::StatusOr<Next *> AllocNextArray(int size);
   void FreeNextArray(Next *next, int log2_size);
   // REQUIRES: suffixes_left() >= strlen(suffix) + 1 + value_size()
   uint32_t MakeSuffix(const char *suffix, const void *value,
@@ -383,6 +395,8 @@ class IcingDynamicTrie::IcingDynamicTrieStorage {
   // storage.
   IcingScopedFd array_fds_[NUM_ARRAY_TYPES];
   std::vector<IcingArrayStorage> array_storage_;
+
+  // Legacy file system. Switch to use the new Filesystem class instead.
   const IcingFilesystem *filesystem_;
 };
 
@@ -449,8 +463,7 @@ bool IcingDynamicTrie::IcingDynamicTrieStorage::Init() {
     if (i == 0) {
       // Header.
       if (file_size != IcingMMapper::system_page_size()) {
-        ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-            "Trie hdr wrong size: %" PRIu64, file_size);
+        ICING_LOG(ERROR) << "Trie hdr wrong size: " << file_size;
         goto failed;
       }
 
@@ -511,8 +524,7 @@ bool IcingDynamicTrie::IcingDynamicTrieStorage::Init() {
                                    sizeof(char), hdr_.hdr.suffixes_size(),
                                    hdr_.hdr.max_suffixes_size(),
                                    &crcs_->array_crcs[SUFFIX], init_crcs)) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Trie mmap suffix failed");
+    ICING_LOG(ERROR) << "Trie mmap suffix failed";
     goto failed;
   }
 
@@ -660,8 +672,7 @@ bool IcingDynamicTrie::IcingDynamicTrieStorage::Sync() {
   }
 
   if (!WriteHeader()) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Flushing trie header failed: %s", strerror(errno));
+    ICING_LOG(ERROR) << "Flushing trie header failed: " << strerror(errno);
     success = false;
   }
 
@@ -675,8 +686,7 @@ bool IcingDynamicTrie::IcingDynamicTrieStorage::Sync() {
   }
 
   if (total_flushed > 0) {
-    ICING_VLOG(1) << IcingStringUtil::StringPrintf("Flushing %u pages of trie",
-                                                   total_flushed);
+    ICING_VLOG(1) << "Flushing " << total_flushed << " pages of trie";
   }
 
   return success;
@@ -719,10 +729,11 @@ IcingDynamicTrie::Node *IcingDynamicTrie::IcingDynamicTrieStorage::AllocNode() {
   return GetMutableNode(hdr_.hdr.num_nodes() - 1);
 }
 
-IcingDynamicTrie::Next *
+libtextclassifier3::StatusOr<IcingDynamicTrie::Next *>
 IcingDynamicTrie::IcingDynamicTrieStorage::AllocNextArray(int size) {
   if (size > kMaxNextArraySize) {
-    ICING_LOG(FATAL) << "Array size exceeds the max 'next' array size";
+    return absl_ports::InternalError(
+        "Array size exceeds the max 'next' array size");
   }
 
   if (nexts_left() < static_cast<uint32_t>(kMaxNextArraySize)) {
@@ -752,8 +763,7 @@ IcingDynamicTrie::IcingDynamicTrieStorage::AllocNextArray(int size) {
 
   // Fill with char 0xff so we are sorted properly.
   for (int i = 0; i < aligned_size; i++) {
-    ret[i].set_val(0xff);
-    ret[i].set_node_index(kInvalidNodeIndex);
+    ResetMutableNext(ret[i]);
   }
   return ret;
 }
@@ -807,8 +817,7 @@ uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::UpdateCrc() {
 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::UpdateCrcInternal(
     bool write_hdr) {
   if (write_hdr && !WriteHeader()) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Flushing trie header failed: %s", strerror(errno));
+    ICING_LOG(ERROR) << "Flushing trie header failed: " << strerror(errno);
   }
 
   crcs_->header_crc = GetHeaderCrc();
@@ -891,7 +900,7 @@ bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::Init(
 
 bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::SerializeToArray(
     uint8_t *buf, uint32_t buf_size) const {
-  uint32_t size = hdr.ByteSize();
+  uint32_t size = hdr.ByteSizeLong();
   if (size + sizeof(kMagic) + sizeof(uint32_t) > buf_size) return false;
   memcpy(buf, &kMagic, sizeof(kMagic));
   memcpy(buf + sizeof(kMagic), &size, sizeof(uint32_t));
@@ -902,8 +911,7 @@ bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::SerializeToArray(
 bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::Verify() {
   // Check version.
   if (hdr.version() != kCurVersion) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Trie version %u mismatch", hdr.version());
+    ICING_LOG(ERROR) << "Trie version " << hdr.version() << " mismatch";
     return false;
   }
 
@@ -1145,9 +1153,8 @@ bool IcingDynamicTrie::Sync() {
 
   Warm();
 
-  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-      "Syncing dynamic trie %s took %.3fms", filename_base_.c_str(),
-      timer.Elapsed() * 1000.);
+  ICING_VLOG(1) << "Syncing dynamic trie " << filename_base_.c_str()
+      << " took " << timer.Elapsed() * 1000 << "ms";
 
   return success;
 }
@@ -1197,8 +1204,7 @@ std::unique_ptr<IcingFlashBitmap> IcingDynamicTrie::OpenAndInitBitmap(
     const IcingFilesystem *filesystem) {
   auto bitmap = std::make_unique<IcingFlashBitmap>(filename, filesystem);
   if (!bitmap->Init() || (verify && !bitmap->Verify())) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Init of %s failed",
-                                                      filename.c_str());
+    ICING_LOG(ERROR) << "Init of " << filename.c_str() << " failed";
     return nullptr;
   }
   return bitmap;
@@ -1228,16 +1234,14 @@ bool IcingDynamicTrie::InitPropertyBitmaps() {
   vector<std::string> files;
   if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
                                      &files)) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Could not get files at prefix %s", property_bitmaps_prefix_.c_str());
+    ICING_LOG(ERROR) << "Could not get files at prefix " << property_bitmaps_prefix_;
     goto failed;
   }
   for (size_t i = 0; i < files.size(); i++) {
     // Decode property id from filename.
     size_t property_id_start_idx = files[i].rfind('.');
     if (property_id_start_idx == std::string::npos) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s",
-                                                        files[i].c_str());
+      ICING_LOG(ERROR) << "Malformed filename " << files[i];
       continue;
     }
     property_id_start_idx++;  // skip dot
@@ -1245,8 +1249,7 @@ bool IcingDynamicTrie::InitPropertyBitmaps() {
     uint32_t property_id =
         strtol(files[i].c_str() + property_id_start_idx, &end, 10);  // NOLINT
     if (!end || end != (files[i].c_str() + files[i].size())) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s",
-                                                        files[i].c_str());
+      ICING_LOG(ERROR) << "Malformed filename " << files[i];
       continue;
     }
     std::unique_ptr<IcingFlashBitmap> bitmap = OpenAndInitBitmap(
@@ -1254,8 +1257,7 @@ bool IcingDynamicTrie::InitPropertyBitmaps() {
         runtime_options_.storage_policy == RuntimeOptions::kMapSharedWithCrc,
         filesystem_);
     if (!bitmap) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-          "Open prop bitmap failed: %s", files[i].c_str());
+      ICING_LOG(ERROR) << "Open prop bitmap failed: " << files[i];
       goto failed;
     }
     bitmap->Truncate(truncate_idx);
@@ -1299,50 +1301,6 @@ void IcingDynamicTrie::OnSleep() {
   UpdateCrc();
 }
 
-IcingDynamicTrie::NewValueMap::~NewValueMap() {}
-
-bool IcingDynamicTrie::Compact(
-    const NewValueMap &old_tvi_to_new_value, IcingDynamicTrie *out,
-    std::unordered_map<uint32_t, uint32_t> *old_to_new_tvi) const {
-  if (old_to_new_tvi == nullptr) {
-    ICING_LOG(ERROR) << "TVI is null";
-  }
-
-  if (!is_initialized()) {
-    ICING_LOG(FATAL) << "DynamicTrie not initialized";
-  }
-
-  PropertyReadersAll prop_readers(*this);
-
-  old_to_new_tvi->clear();
-  old_to_new_tvi->rehash(size() * 2);
-
-  for (Iterator it_all(*this, ""); it_all.IsValid(); it_all.Advance()) {
-    uint32_t value_index = it_all.GetValueIndex();
-    const void *new_value = old_tvi_to_new_value.GetNewValue(value_index);
-    if (!new_value) continue;
-
-    uint32_t new_value_index;
-    if (!out->Insert(it_all.GetKey(), new_value, &new_value_index, false)) {
-      return false;
-    }
-
-    old_to_new_tvi->insert({value_index, new_value_index});
-
-    // Copy properties.
-    for (size_t i = 0; i < prop_readers.size(); i++) {
-      if (prop_readers.HasProperty(i, value_index)) {
-        if (!out->SetProperty(new_value_index, i)) {
-          // Ouch. We need to bail.
-          return false;
-        }
-      }
-    }
-  }
-
-  return true;
-}
-
 uint32_t IcingDynamicTrie::size() const {
   if (!is_initialized()) {
     ICING_LOG(FATAL) << "DynamicTrie not initialized";
@@ -1350,10 +1308,12 @@ uint32_t IcingDynamicTrie::size() const {
   return storage_->hdr().num_keys();
 }
 
-void IcingDynamicTrie::CollectStatsRecursive(const Node &node,
-                                             Stats *stats) const {
+void IcingDynamicTrie::CollectStatsRecursive(const Node &node, Stats *stats,
+                                             uint32_t depth) const {
   if (node.is_leaf()) {
     stats->num_leaves++;
+    stats->sum_depth += depth;
+    stats->max_depth = max(stats->max_depth, depth);
     const char *suffix = storage_->GetSuffix(node.next_index());
     stats->suffixes_used += strlen(suffix) + 1 + value_size();
     if (!suffix[0]) {
@@ -1365,13 +1325,16 @@ void IcingDynamicTrie::CollectStatsRecursive(const Node &node,
     for (; i < (1U << node.log2_num_children()); i++) {
       const Next &next = *storage_->GetNext(node.next_index(), i);
       if (next.node_index() == kInvalidNodeIndex) break;
-      CollectStatsRecursive(*storage_->GetNode(next.node_index()), stats);
+      CollectStatsRecursive(*storage_->GetNode(next.node_index()), stats,
+                            depth + 1);
     }
 
     // At least one valid node in each next array
     if (i == 0) {
       ICING_LOG(FATAL) << "No valid node in 'next' array";
     }
+    stats->sum_children += i;
+    stats->max_children = max(stats->max_children, i);
 
     stats->child_counts[i - 1]++;
     stats->wasted[node.log2_num_children()] +=
@@ -1453,9 +1416,12 @@ std::string IcingDynamicTrie::Stats::DumpStats(int verbosity) const {
         "Wasted total: %u\n"
         "Num intermediates %u num leaves %u "
         "suffixes used %u null %u\n"
+        "avg and max children for intermediates: %.3f, %u\n"
+        "avg and max depth for leaves: %.3f, %u\n"
         "Total next frag: %.3f%%\n",
         total_wasted, num_intermediates, num_leaves, suffixes_used,
-        null_suffixes,
+        null_suffixes, 1. * sum_children / num_intermediates, max_children,
+        1. * sum_depth / num_leaves, max_depth,
         100. * math_util::SafeDivide((total_free + total_wasted), num_nexts));
   }
   IcingStringUtil::SStringAppendF(
@@ -1502,9 +1468,56 @@ void IcingDynamicTrie::Clear() {
   deleted_bitmap_->Truncate(0);
 }
 
-bool IcingDynamicTrie::Insert(const char *key, const void *value,
-                              uint32_t *value_index, bool replace,
-                              bool *pnew_key) {
+bool IcingDynamicTrie::ClearSuffixAndValue(uint32_t suffix_value_index) {
+  // The size 1 below is for a '\0' between the suffix and the value.
+  size_t suffix_and_value_length =
+      strlen(this->storage_->GetSuffix(suffix_value_index)) + 1 +
+      this->value_size();
+  char *mutable_suffix_and_value = this->storage_->GetMutableSuffix(
+      suffix_value_index, suffix_and_value_length);
+
+  if (mutable_suffix_and_value == nullptr) {
+    return false;
+  }
+
+  memset(mutable_suffix_and_value, 0, suffix_and_value_length);
+  return true;
+}
+
+bool IcingDynamicTrie::ResetNext(uint32_t next_index) {
+  Next *mutable_next =
+      this->storage_->GetMutableNextArray(next_index, /*len=*/1);
+
+  if (mutable_next == nullptr) {
+    return false;
+  }
+  ResetMutableNext(*mutable_next);
+  return true;
+}
+
+bool IcingDynamicTrie::SortNextArray(const Node *node) {
+  if (node == nullptr) {
+    // Nothing to sort, return success directly.
+    return true;
+  }
+
+  uint32_t next_array_buffer_size = 1u << node->log2_num_children();
+  Next *next_array_start = this->storage_->GetMutableNextArray(
+      node->next_index(), next_array_buffer_size);
+
+  if (next_array_start == nullptr) {
+    return false;
+  }
+
+  std::sort(next_array_start, next_array_start + next_array_buffer_size);
+  return true;
+}
+
+libtextclassifier3::Status IcingDynamicTrie::Insert(const char *key,
+                                                    const void *value,
+                                                    uint32_t *value_index,
+                                                    bool replace,
+                                                    bool *pnew_key) {
   if (!is_initialized()) {
     ICING_LOG(FATAL) << "DynamicTrie not initialized";
   }
@@ -1520,8 +1533,7 @@ bool IcingDynamicTrie::Insert(const char *key, const void *value,
   if (!(storage_->nodes_left() >= 2 + key_len + 1 &&
         storage_->nexts_left() >= 2 + key_len + 1 + kMaxNextArraySize &&
         storage_->suffixes_left() >= key_len + 1 + value_size())) {
-    // No more space left.
-    return false;
+    return absl_ports::ResourceExhaustedError("No more space left");
   }
 
   uint32_t best_node_index;
@@ -1563,7 +1575,7 @@ bool IcingDynamicTrie::Insert(const char *key, const void *value,
             storage_->GetSuffixIndex(prev_suffix_cur + 1), value_size());
         memcpy(mutable_prev_suffix_cur, value, value_size());
       }
-      return true;
+      return libtextclassifier3::Status::OK;
     }
 
     if (*prev_suffix_cur == *key_cur) {
@@ -1577,7 +1589,7 @@ bool IcingDynamicTrie::Insert(const char *key, const void *value,
     int common_len = prev_suffix_cur - prev_suffix;
     for (int i = 0; i < common_len; i++) {
       // Create a single-branch child node.
-      Next *split_next = storage_->AllocNextArray(1);
+      ICING_ASSIGN_OR_RETURN(Next * split_next, storage_->AllocNextArray(1));
       split_node->set_next_index(storage_->GetNextArrayIndex(split_next));
       split_node->set_is_leaf(false);
       split_node->set_log2_num_children(0);
@@ -1589,7 +1601,7 @@ bool IcingDynamicTrie::Insert(const char *key, const void *value,
     }
 
     // Fill a split.
-    Next *split_next = storage_->AllocNextArray(2);
+    ICING_ASSIGN_OR_RETURN(Next * split_next, storage_->AllocNextArray(2));
     split_node->set_next_index(storage_->GetNextArrayIndex(split_next));
     split_node->set_is_leaf(false);
     split_node->set_log2_num_children(1);
@@ -1641,17 +1653,14 @@ bool IcingDynamicTrie::Insert(const char *key, const void *value,
     new_leaf_node->set_log2_num_children(0);
 
     // Figure out the real length of the existing next array.
-    Next *cur_next = storage_->GetMutableNextArray(
-        best_node->next_index(), 1 << best_node->log2_num_children());
-    int next_len = 0;
-    for (; next_len < (1 << best_node->log2_num_children()) &&
-           cur_next[next_len].node_index() != kInvalidNodeIndex;
-         next_len++) {
-    }
+    uint32_t next_array_buffer_size = 1u << best_node->log2_num_children();
+    Next *cur_next = storage_->GetMutableNextArray(best_node->next_index(),
+                                                   next_array_buffer_size);
+    int next_len = GetValidNextsSize(cur_next, next_array_buffer_size);
     Next *new_next = cur_next;
-    if (next_len == (1 << best_node->log2_num_children())) {
+    if (next_len == (next_array_buffer_size)) {
       // Allocate a new, larger, array.
-      new_next = storage_->AllocNextArray(next_len + 1);
+      ICING_ASSIGN_OR_RETURN(new_next, storage_->AllocNextArray(next_len + 1));
       memcpy(new_next, cur_next, sizeof(Next) * next_len);
     }
 
@@ -1672,7 +1681,8 @@ bool IcingDynamicTrie::Insert(const char *key, const void *value,
 
       // 8 == log2(256)
       if (log2_num_children >= 8) {
-        ICING_LOG(FATAL) << "Number of children exceeds the max allowed size";
+        return absl_ports::InternalError(
+            "Number of children exceeds the max allowed size");
       }
 
       mutable_best_node->set_log2_num_children(log2_num_children + 1);
@@ -1686,7 +1696,7 @@ bool IcingDynamicTrie::Insert(const char *key, const void *value,
   storage_->inc_num_keys();
 
   if (pnew_key) *pnew_key = true;
-  return true;
+  return libtextclassifier3::Status::OK;
 }
 
 const void *IcingDynamicTrie::GetValueAtIndex(uint32_t value_index) const {
@@ -1735,11 +1745,12 @@ bool IcingDynamicTrie::Find(const char *key, void *value,
 }
 
 IcingDynamicTrie::Iterator::Iterator(const IcingDynamicTrie &trie,
-                                     const char *prefix)
+                                     const char *prefix, bool reverse)
     : cur_key_(prefix),
       cur_suffix_(nullptr),
       cur_suffix_len_(0),
       single_leaf_match_(false),
+      reverse_(reverse),
       trie_(trie) {
   if (!trie.is_initialized()) {
     ICING_LOG(FATAL) << "DynamicTrie not initialized";
@@ -1748,19 +1759,29 @@ IcingDynamicTrie::Iterator::Iterator(const IcingDynamicTrie &trie,
   Reset();
 }
 
-void IcingDynamicTrie::Iterator::LeftBranchToLeaf(uint32_t node_index) {
+void IcingDynamicTrie::Iterator::BranchToLeaf(uint32_t node_index,
+                                              BranchType branch_type) {
   // Go down the trie, following the left-most child until we hit a
   // leaf. Push to stack and cur_key nodes and chars as we go.
-  for (; !trie_.storage_->GetNode(node_index)->is_leaf();
-       node_index =
-           trie_.storage_
-               ->GetNext(trie_.storage_->GetNode(node_index)->next_index(), 0)
-               ->node_index()) {
-    branch_stack_.push_back(Branch(node_index));
-    cur_key_.push_back(
-        trie_.storage_
-            ->GetNext(trie_.storage_->GetNode(node_index)->next_index(), 0)
-            ->val());
+  // When reverse_ is true, the method will follow the right-most child.
+  const Node *node = trie_.storage_->GetNode(node_index);
+  while (!node->is_leaf()) {
+    const Next *next_start = trie_.storage_->GetNext(node->next_index(), 0);
+    int child_idx;
+    if (branch_type == BranchType::kRightMost) {
+      uint32_t next_array_size = 1u << node->log2_num_children();
+      child_idx = trie_.GetValidNextsSize(next_start, next_array_size) - 1;
+    } else {
+      // node isn't a leaf. So it must have >0 children.
+      // 0 is the left-most child.
+      child_idx = 0;
+    }
+    const Next &child_next = next_start[child_idx];
+    branch_stack_.push_back(Branch(node_index, child_idx));
+    cur_key_.push_back(child_next.val());
+
+    node_index = child_next.node_index();
+    node = trie_.storage_->GetNode(node_index);
   }
 
   // We're at a leaf.
@@ -1796,7 +1817,7 @@ void IcingDynamicTrie::Iterator::Reset() {
   // Two cases/states:
   //
   // - Found an intermediate node. If we matched all of prefix
-  //   (cur_key_), LeftBranchToLeaf.
+  //   (cur_key_), BranchToLeaf.
   //
   // - Found a leaf node, which is the ONLY matching key for this
   //   prefix. Check that suffix matches the prefix. Then we set
@@ -1819,7 +1840,9 @@ void IcingDynamicTrie::Iterator::Reset() {
     cur_suffix_len_ = strlen(cur_suffix_);
     single_leaf_match_ = true;
   } else if (static_cast<size_t>(key_offset) == cur_key_.size()) {
-    LeftBranchToLeaf(node_index);
+    BranchType branch_type =
+        (reverse_) ? BranchType::kRightMost : BranchType::kLeftMost;
+    BranchToLeaf(node_index, branch_type);
   }
 }
 
@@ -1846,19 +1869,25 @@ bool IcingDynamicTrie::Iterator::Advance() {
   while (!branch_stack_.empty()) {
     Branch *branch = &branch_stack_.back();
     const Node *node = trie_.storage_->GetNode(branch->node_idx);
-    branch->child_idx++;
-    if (branch->child_idx < (1 << node->log2_num_children()) &&
-        trie_.storage_->GetNext(node->next_index(), branch->child_idx)
-                ->node_index() != kInvalidNodeIndex) {
-      // Successfully incremented to the next child. Update the char
-      // value at this depth.
-      cur_key_[cur_key_.size() - 1] =
-          trie_.storage_->GetNext(node->next_index(), branch->child_idx)->val();
-      // We successfully found a sub-trie to explore.
-      LeftBranchToLeaf(
-          trie_.storage_->GetNext(node->next_index(), branch->child_idx)
-              ->node_index());
-      return true;
+    if (reverse_) {
+      branch->child_idx--;
+    } else {
+      branch->child_idx++;
+    }
+    if (branch->child_idx >= 0 &&
+        branch->child_idx < (1 << node->log2_num_children())) {
+      const Next *child_next =
+          trie_.storage_->GetNext(node->next_index(), branch->child_idx);
+      if (child_next->node_index() != kInvalidNodeIndex) {
+        // Successfully incremented to the next child. Update the char
+        // value at this depth.
+        cur_key_[cur_key_.size() - 1] = child_next->val();
+        // We successfully found a sub-trie to explore.
+        BranchType branch_type =
+            (reverse_) ? BranchType::kRightMost : BranchType::kLeftMost;
+        BranchToLeaf(child_next->node_index(), branch_type);
+        return true;
+      }
     }
     branch_stack_.pop_back();
     cur_key_.resize(cur_key_.size() - 1);
@@ -2047,22 +2076,34 @@ const IcingDynamicTrie::Next *IcingDynamicTrie::GetNextByChar(
   return found;
 }
 
+int IcingDynamicTrie::GetValidNextsSize(
+    const IcingDynamicTrie::Next *next_array_start,
+    int next_array_length) const {
+  // Only searching for key char 0xff is not sufficient, as 0xff can be a valid
+  // character. We must also specify kInvalidNodeIndex as the target node index
+  // when searching the next array.
+  return LowerBound(next_array_start, next_array_start + next_array_length,
+                    /*key_char=*/0xff, /*node_index=*/kInvalidNodeIndex) -
+         next_array_start;
+}
+
 const IcingDynamicTrie::Next *IcingDynamicTrie::LowerBound(
-    const Next *start, const Next *end, uint8_t key_char) const {
+    const Next *start, const Next *end, uint8_t key_char,
+    uint32_t node_index) const {
   // Above this value will use binary search instead of linear
   // search. 16 was chosen from running some benchmarks with
   // different values.
   static const uint32_t kBinarySearchCutoff = 16;
 
+  Next key_next(key_char, node_index);
   if (end - start >= kBinarySearchCutoff) {
     // Binary search.
-    Next key_next(key_char, 0);
     return lower_bound(start, end, key_next);
   } else {
     // Linear search.
     const Next *found;
     for (found = start; found < end; found++) {
-      if (found->val() >= key_char) {
+      if (!(*found < key_next)) {
         // Should have gotten match.
         break;
       }
@@ -2072,7 +2113,8 @@ const IcingDynamicTrie::Next *IcingDynamicTrie::LowerBound(
 }
 
 void IcingDynamicTrie::FindBestNode(const char *key, uint32_t *best_node_index,
-                                    int *key_offset, bool prefix) const {
+                                    int *key_offset, bool prefix,
+                                    bool utf8) const {
   // Find the best node such that:
   //
   // - If key is NOT in the trie, key[0..key_offset) is a prefix to
@@ -2093,6 +2135,8 @@ void IcingDynamicTrie::FindBestNode(const char *key, uint32_t *best_node_index,
 
   const Node *cur_node = storage_->GetRootNode();
   const char *cur_key = key;
+  const Node *utf8_node = cur_node;
+  const char *utf8_key = cur_key;
   while (!cur_node->is_leaf()) {
     const Next *found = GetNextByChar(cur_node, *cur_key);
     if (!found) break;
@@ -2108,12 +2152,136 @@ void IcingDynamicTrie::FindBestNode(const char *key, uint32_t *best_node_index,
       break;
     }
     cur_key++;
+
+    if (utf8 && i18n_utils::IsLeadUtf8Byte(*cur_key)) {
+      utf8_node = cur_node;
+      utf8_key = cur_key;
+    }
+  }
+
+  if (utf8) {
+    // Rewind.
+    cur_node = utf8_node;
+    cur_key = utf8_key;
   }
 
   *best_node_index = storage_->GetNodeIndex(cur_node);
   *key_offset = reinterpret_cast<const char *>(cur_key) - key;
 }
 
+int IcingDynamicTrie::FindNewBranchingPrefixLength(const char *key,
+                                                   bool utf8) const {
+  if (storage_->empty()) {
+    return kNoBranchFound;
+  }
+
+  uint32_t best_node_index;
+  int key_offset;
+  FindBestNode(key, &best_node_index, &key_offset, /*prefix=*/true, utf8);
+  const Node *cur_node = storage_->GetNode(best_node_index);
+  const char *cur_key = key + key_offset;
+  if (cur_node->is_leaf()) {
+    // Prefix in the trie. Split at leaf.
+    const char *prev_suffix = storage_->GetSuffix(cur_node->next_index());
+    while (*prev_suffix != '\0' && *prev_suffix == *cur_key) {
+      prev_suffix++;
+      cur_key++;
+    }
+
+    // Equal strings? No branching.
+    if (*prev_suffix == '\0' && *cur_key == '\0') {
+      return kNoBranchFound;
+    }
+
+    if (utf8) {
+      // Rewind to utf8 boundary.
+      size_t offset = i18n_utils::SafeTruncateUtf8Length(key, cur_key - key);
+      cur_key = key + offset;
+    }
+
+    return cur_key - key;
+  } else if (cur_node->log2_num_children() == 0) {
+    // Intermediate node going from no branching to branching.
+    return cur_key - key;
+  }
+
+  // If we've reached this point, then we're already at a branch point. So there
+  // is no *new* branch point.
+  return kNoBranchFound;
+}
+
+std::vector<int> IcingDynamicTrie::FindBranchingPrefixLengths(const char *key,
+                                                              bool utf8) const {
+  std::vector<int> prefix_lengths;
+
+  if (storage_->empty()) {
+    return prefix_lengths;
+  }
+
+  const Node *cur_node = storage_->GetRootNode();
+  const char *cur_key = key;
+  while (*cur_key && !cur_node->is_leaf()) {
+    // Branching prefix?
+    if (cur_node->log2_num_children() > 0) {
+      int len = cur_key - key;
+      if (utf8) {
+        // Do not cut mid-utf8. Walk up to utf8 boundary.
+        len = i18n_utils::SafeTruncateUtf8Length(key, len);
+        if (prefix_lengths.empty() || len != prefix_lengths.back()) {
+          prefix_lengths.push_back(len);
+        }
+      } else {
+        prefix_lengths.push_back(len);
+      }
+    }
+
+    // Move to next.
+    const Next *found = GetNextByChar(cur_node, *cur_key);
+    if (found == nullptr) {
+      break;
+    }
+    cur_node = storage_->GetNode(found->node_index());
+
+    ++cur_key;
+  }
+  return prefix_lengths;
+}
+
+bool IcingDynamicTrie::IsBranchingTerm(const char *key) const {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  if (storage_->empty()) {
+    return false;
+  }
+
+  uint32_t best_node_index;
+  int key_offset;
+  FindBestNode(key, &best_node_index, &key_offset, /*prefix=*/true);
+  const Node *cur_node = storage_->GetNode(best_node_index);
+
+  if (cur_node->is_leaf()) {
+    return false;
+  }
+
+  // There is no intermediate node for key in the trie.
+  if (key[key_offset] != '\0') {
+    return false;
+  }
+
+  // Found key as an intermediate node, but key is not a valid term stored in
+  // the trie. In this case, we need at least two children for key to be a
+  // branching term.
+  if (GetNextByChar(cur_node, '\0') == nullptr) {
+    return cur_node->log2_num_children() >= 1;
+  }
+
+  // The intermediate node for key must have more than two children for key to
+  // be a branching term, one of which represents the leaf node for key itself.
+  return cur_node->log2_num_children() > 1;
+}
+
 void IcingDynamicTrie::GetDebugInfo(int verbosity, std::string *out) const {
   Stats stats;
   CollectStats(&stats);
@@ -2123,8 +2291,7 @@ void IcingDynamicTrie::GetDebugInfo(int verbosity, std::string *out) const {
   vector<std::string> files;
   if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
                                      &files)) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Could not get files at prefix %s", property_bitmaps_prefix_.c_str());
+    ICING_LOG(ERROR) << "Could not get files at prefix " << property_bitmaps_prefix_;
     return;
   }
   for (size_t i = 0; i < files.size(); i++) {
@@ -2196,8 +2363,7 @@ IcingFlashBitmap *IcingDynamicTrie::OpenOrCreatePropertyBitmap(
   }
 
   if (property_id > kMaxPropertyId) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Property id %u out of range", property_id);
+    ICING_LOG(ERROR) << "Property id " << property_id << " out of range";
     return nullptr;
   }
 
@@ -2248,6 +2414,121 @@ bool IcingDynamicTrie::ClearDeleted(uint32_t value_index) {
   return deleted_bitmap_->SetBit(idx, false);
 }
 
+// Steps:
+// 1. Find the key in the trie.
+// 2. Remove the suffix and the value.
+// 3. Reset the nexts that point to the nodes to be removed.
+// 4. Sort any next array if needed.
+bool IcingDynamicTrie::Delete(const std::string_view key) {
+  if (!is_initialized()) {
+    ICING_LOG(ERROR) << "DynamicTrie not initialized";
+    return false;
+  }
+
+  if (storage_->empty()) {
+    // Nothing to delete.
+    return true;
+  }
+
+  // Tries to find the key in the trie, starting from the root.
+  const Node *current_node = storage_->GetRootNode();
+
+  // The node after which we start to remove data.
+  const Node *last_multichild_node = nullptr;
+
+  // While visiting the trie nodes, we store the indices of Nexts that point
+  // to all the nodes after last_multichild_node. Those nodes must be
+  // consecutive and all have only one child. Resetting those Nexts means that
+  // we remove the data of the key.
+  std::vector<uint32_t> nexts_to_reset;
+  nexts_to_reset.reserve(key.length());
+
+  // Iterates through chars in the key, finds nodes in the trie until a leaf
+  // node is reached. The max number of loops is key.length() + 1 because we
+  // start from the root.
+  for (size_t i = 0; i <= key.length(); ++i) {
+    if (current_node->is_leaf()) {
+      // Leaf node, now check the suffix.
+      if (key.substr(i) != storage_->GetSuffix(current_node->next_index())) {
+        // Key does not exist in the trie, nothing to delete.
+        return true;
+      }
+      // Otherwise, key is found.
+      break;
+    }
+
+    // Finds the next char.
+    const Next *next;
+    if (i == key.length()) {
+      // When we're at the end of the key, the next char is the termination char
+      // '\0'.
+      next = GetNextByChar(current_node, '\0');
+    } else {
+      next = GetNextByChar(current_node, key[i]);
+    }
+
+    if (next == nullptr) {
+      // Key does not exist in the trie, nothing to delete.
+      return true;
+    }
+
+    // Checks the real size of next array.
+    uint32_t next_array_buffer_size = 1u << current_node->log2_num_children();
+    Next *next_array_start = storage_->GetMutableNextArray(
+        current_node->next_index(), next_array_buffer_size);
+    int valid_next_array_size =
+        GetValidNextsSize(next_array_start, next_array_buffer_size);
+    if (valid_next_array_size == 0) {
+      // Key does not exist in the trie, nothing to delete.
+      // This shouldn't happen, but we put a sanity check here in case something
+      // is wrong.
+      return true;
+    } else if (valid_next_array_size == 1) {
+      // Single-child branch will be deleted.
+      nexts_to_reset.push_back(storage_->GetNextArrayIndex(next));
+    } else {
+      // We see a new node with multiple children, all the previously seen nodes
+      // shouldn't be removed.
+      last_multichild_node = current_node;
+      nexts_to_reset.clear();
+      nexts_to_reset.push_back(storage_->GetNextArrayIndex(next));
+    }
+
+    // Updates current_node.
+    current_node = storage_->GetNode(next->node_index());
+  }
+  // Now we've found the key in the trie.
+
+  ClearSuffixAndValue(current_node->next_index());
+
+  // Resets nexts to remove key information.
+  for (uint32_t next_index : nexts_to_reset) {
+    ResetNext(next_index);
+  }
+
+  if (last_multichild_node != nullptr) {
+    SortNextArray(last_multichild_node);
+    uint32_t next_array_buffer_size =
+        1u << last_multichild_node->log2_num_children();
+    Next *next_array_start = this->storage_->GetMutableNextArray(
+        last_multichild_node->next_index(), next_array_buffer_size);
+    uint32_t num_children =
+        GetValidNextsSize(next_array_start, next_array_buffer_size);
+    // Shrink the next array if we can.
+    if (num_children == next_array_buffer_size / 2) {
+      Node *mutable_node = storage_->GetMutableNode(
+          storage_->GetNodeIndex(last_multichild_node));
+      mutable_node->set_log2_num_children(mutable_node->log2_num_children() -
+                                          1);
+      // Add the unused second half of the next array to the free list.
+      storage_->FreeNextArray(next_array_start + next_array_buffer_size / 2,
+                              mutable_node->log2_num_children());
+    }
+  }
+
+  return true;
+}
+
 bool IcingDynamicTrie::ClearPropertyForAllValues(uint32_t property_id) {
   if (!is_initialized()) {
     ICING_LOG(FATAL) << "DynamicTrie not initialized";
@@ -2255,8 +2536,7 @@ bool IcingDynamicTrie::ClearPropertyForAllValues(uint32_t property_id) {
 
   PropertyReadersAll readers(*this);
   if (!readers.Exists(property_id)) {
-    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-        "Properties for id %u don't exist", property_id);
+    ICING_VLOG(1) << "Properties for id " << property_id << " don't exist";
     return true;
   }
 
diff --git a/icing/legacy/index/icing-dynamic-trie.h b/icing/legacy/index/icing-dynamic-trie.h
index 7136ef8..18748d7 100644
--- a/icing/legacy/index/icing-dynamic-trie.h
+++ b/icing/legacy/index/icing-dynamic-trie.h
@@ -35,13 +35,14 @@
 #ifndef ICING_LEGACY_INDEX_ICING_DYNAMIC_TRIE_H_
 #define ICING_LEGACY_INDEX_ICING_DYNAMIC_TRIE_H_
 
-#include <stdint.h>
-
+#include <cstdint>
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
 
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/legacy/core/icing-compat.h"
 #include "icing/legacy/core/icing-packed-pod.h"
 #include "icing/legacy/index/icing-filesystem.h"
@@ -153,8 +154,13 @@ class IcingDynamicTrie : public IIcingStorage {
     uint32_t max_nodes;
     // Count of intermediate nodes.
     uint32_t num_intermediates;
+    // Total and maximum number of children of intermediate nodes.
+    uint32_t sum_children, max_children;
+
     // Count of leaf nodes.
     uint32_t num_leaves;
+    // Total and maximum depth of leaf nodes.
+    uint32_t sum_depth, max_depth;
 
     // Next stats
 
@@ -187,6 +193,7 @@ class IcingDynamicTrie : public IIcingStorage {
     uint32_t dirty_pages_nexts;
     uint32_t dirty_pages_suffixes;
 
+    // TODO(b/222349894) Convert the string output to a protocol buffer instead.
     std::string DumpStats(int verbosity) const;
   };
 
@@ -288,6 +295,16 @@ class IcingDynamicTrie : public IIcingStorage {
   // Empty out the trie without closing or removing.
   void Clear();
 
+  // Clears the suffix and value at the given index. Returns true on success.
+  bool ClearSuffixAndValue(uint32_t suffix_value_index);
+
+  // Resets the next at the given index so that it points to no node.
+  // Returns true on success.
+  bool ResetNext(uint32_t next_index);
+
+  // Sorts the next array of the node. Returns true on success.
+  bool SortNextArray(const Node *node);
+
   // Sync to disk.
   bool Sync() override;
 
@@ -297,23 +314,6 @@ class IcingDynamicTrie : public IIcingStorage {
   // Potentially about to get nuked.
   void OnSleep() override;
 
-  // Compact trie into out for value indices present in old_tvi_to_new_value.
-  class NewValueMap {
-   public:
-    virtual ~NewValueMap();
-
-    // Returns the new value we want to assign to the entry at old
-    // value index. We don't take ownership of the pointer.
-    virtual const void *GetNewValue(uint32_t old_value_index) const = 0;
-  };
-  // Compacts this trie. This drops all deleted keys, drops all keys for which
-  // old_tvi_to_new_value returns nullptr, updates values to be the values
-  // returned by old_tvi_to_new_value, rewrites tvis, and saves the results into
-  // the trie given in 'out'. 'old_to_new_tvi' is be populated with a mapping of
-  // old value_index to new value_index.
-  bool Compact(const NewValueMap &old_tvi_to_new_value, IcingDynamicTrie *out,
-               std::unordered_map<uint32_t, uint32_t> *old_to_new_tvi) const;
-
   // Insert value at key. If key already exists and replace == true,
   // replaces old value with value. We take a copy of value.
   //
@@ -321,18 +321,22 @@ class IcingDynamicTrie : public IIcingStorage {
   // value_index. This can then be used with SetValueAtIndex
   // below. value_index is not valid past a Clear/Read/Write.
   //
-  // Returns false if there is no space left in the trie.
-  //
   // REQUIRES: value a buffer of size value_size()
-  bool Insert(const char *key, const void *value) {
+  //
+  // Returns:
+  //   OK on success
+  //   RESOURCE_EXHAUSTED if no disk space is available
+  //   INTERNAL_ERROR if there are inconsistencies in the dynamic trie.
+  libtextclassifier3::Status Insert(const char *key, const void *value) {
     return Insert(key, value, nullptr, true, nullptr);
   }
-  bool Insert(const char *key, const void *value, uint32_t *value_index,
-              bool replace) {
+  libtextclassifier3::Status Insert(const char *key, const void *value,
+                                    uint32_t *value_index, bool replace) {
     return Insert(key, value, value_index, replace, nullptr);
   }
-  bool Insert(const char *key, const void *value, uint32_t *value_index,
-              bool replace, bool *pnew_key);
+  libtextclassifier3::Status Insert(const char *key, const void *value,
+                                    uint32_t *value_index, bool replace,
+                                    bool *pnew_key);
 
   // Get a value returned by Insert value_index. This points to the
   // value in the trie. The pointer is immutable and always valid
@@ -375,6 +379,23 @@ class IcingDynamicTrie : public IIcingStorage {
     bool is_full_match() const { return value_index != kInvalidValueIndex; }
   };
 
+  static constexpr int kNoBranchFound = -1;
+  // Return prefix of any new branches created if key were inserted. If utf8 is
+  // true, does not cut key mid-utf8. Returns kNoBranchFound if no branches
+  // would be created.
+  int FindNewBranchingPrefixLength(const char *key, bool utf8) const;
+
+  // Find all prefixes of key where the trie branches. Excludes the key
+  // itself. If utf8 is true, does not cut key mid-utf8.
+  std::vector<int> FindBranchingPrefixLengths(const char *key, bool utf8) const;
+
+  // Check if key is a branching term.
+  //
+  // key is a branching term, if and only if there exists terms s1 and s2 in the
+  // trie such that key is the maximum common prefix of s1 and s2, but s1 and s2
+  // are not prefixes of each other.
+  bool IsBranchingTerm(const char *key) const;
+
   void GetDebugInfo(int verbosity, std::string *out) const override;
 
   double min_free_fraction() const;
@@ -402,6 +423,10 @@ class IcingDynamicTrie : public IIcingStorage {
   // Clears the deleted property for each value.
   bool ClearDeleted(uint32_t value_index);
 
+  // Deletes the entry associated with the key. Data can not be recovered after
+  // the deletion. Returns true on success.
+  bool Delete(std::string_view key);
+
   // Clear a specific property id from all values.  For each value that has this
   // property cleared, also check to see if it was the only property set;  if
   // so, set the deleted property for the value to indicate it no longer has any
@@ -479,9 +504,13 @@ class IcingDynamicTrie : public IIcingStorage {
   // Not thread-safe.
   //
   // Change in underlying trie invalidates iterator.
+  //
+  // TODO(b/241784804): change IcingDynamicTrie::Iterator to follow the common
+  //                    iterator pattern in our codebase.
   class Iterator {
    public:
-    Iterator(const IcingDynamicTrie &trie, const char *prefix);
+    Iterator(const IcingDynamicTrie &trie, const char *prefix,
+             bool reverse = false);
     void Reset();
     bool Advance();
 
@@ -498,9 +527,10 @@ class IcingDynamicTrie : public IIcingStorage {
     Iterator();
     // Copy is ok.
 
-    // Helper function that takes the left-most branch down
-    // intermediate nodes to a leaf.
-    void LeftBranchToLeaf(uint32_t node_index);
+    enum class BranchType { kLeftMost = 0, kRightMost = 1 };
+    // Helper function that takes the left-most or the right-most branch down
+    // intermediate nodes to a leaf, based on branch_type.
+    void BranchToLeaf(uint32_t node_index, BranchType branch_type);
 
     std::string cur_key_;
     const char *cur_suffix_;
@@ -509,10 +539,12 @@ class IcingDynamicTrie : public IIcingStorage {
       uint32_t node_idx;
       int child_idx;
 
-      explicit Branch(uint32_t ni) : node_idx(ni), child_idx(0) {}
+      explicit Branch(uint32_t node_index, int child_index)
+          : node_idx(node_index), child_idx(child_index) {}
     };
     std::vector<Branch> branch_stack_;
     bool single_leaf_match_;
+    bool reverse_;
 
     const IcingDynamicTrie &trie_;
   };
@@ -569,24 +601,27 @@ class IcingDynamicTrie : public IIcingStorage {
   class CandidateSet;
 
   // For testing only.
+  friend class IcingDynamicTrieTest_TrieShouldRespectLimits_Test;
   friend class IcingDynamicTrieTest_SyncErrorRecovery_Test;
   friend class IcingDynamicTrieTest_BitmapsClosedWhenInitFails_Test;
   void GetHeader(IcingDynamicTrieHeader *hdr) const;
   void SetHeader(const IcingDynamicTrieHeader &new_hdr);
 
-  static const uint32_t kInvalidNodeIndex;
-  static const uint32_t kInvalidNextIndex;
   static const uint32_t kInvalidSuffixIndex;
 
   // Stats helpers.
-  void CollectStatsRecursive(const Node &node, Stats *stats) const;
+  void CollectStatsRecursive(const Node &node, Stats *stats,
+                             uint32_t depth = 0) const;
 
   // Helpers for Find and Insert.
   const Next *GetNextByChar(const Node *node, uint8_t key_char) const;
-  const Next *LowerBound(const Next *start, const Next *end,
-                         uint8_t key_char) const;
+  const Next *LowerBound(const Next *start, const Next *end, uint8_t key_char,
+                         uint32_t node_index = 0) const;
+  // Returns the number of valid nexts in the array.
+  int GetValidNextsSize(const IcingDynamicTrie::Next *next_array_start,
+                        int next_array_length) const;
   void FindBestNode(const char *key, uint32_t *best_node_index, int *key_offset,
-                    bool prefix) const;
+                    bool prefix, bool utf8 = false) const;
 
   // For value properties.  This truncates the data by clearing it, but leaving
   // the storage intact.
diff --git a/icing/legacy/index/icing-dynamic-trie_test.cc b/icing/legacy/index/icing-dynamic-trie_test.cc
new file mode 100644
index 0000000..ec7e277
--- /dev/null
+++ b/icing/legacy/index/icing-dynamic-trie_test.cc
@@ -0,0 +1,1450 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/legacy/index/icing-dynamic-trie.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/hash/farmhash.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using testing::ContainerEq;
+using testing::ElementsAre;
+using testing::StrEq;
+
+constexpr std::string_view kKeys[] = {
+    "", "ab", "ac", "abd", "bac", "bb", "bacd", "abbb", "abcdefg",
+};
+constexpr uint32_t kNumKeys = ABSL_ARRAYSIZE(kKeys);
+
+class IcingDynamicTrieTest : public ::testing::Test {
+ protected:
+  // Output trie stats to stderr.
+  static void StatsDump(const IcingDynamicTrie& trie) {
+    IcingDynamicTrie::Stats stats;
+    trie.CollectStats(&stats);
+    DLOG(INFO) << "Stats:\n" << stats.DumpStats(true);
+  }
+
+  static void AddToTrie(IcingDynamicTrie* trie, uint32_t num_keys) {
+    std::string key;
+    for (uint32_t i = 0; i < kNumKeys; i++) {
+      key.clear();
+      IcingStringUtil::SStringAppendF(&key, 0, "%u+%010u", i % 2, i);
+      ASSERT_THAT(trie->Insert(key.c_str(), &i), IsOk());
+    }
+  }
+
+  static void CheckTrie(const IcingDynamicTrie& trie, uint32_t num_keys) {
+    std::string key;
+    for (uint32_t i = 0; i < kNumKeys; i++) {
+      key.clear();
+      IcingStringUtil::SStringAppendF(&key, 0, "%u+%010u", i % 2, i);
+      uint32_t val;
+      bool found = trie.Find(key.c_str(), &val);
+      EXPECT_TRUE(found);
+      EXPECT_EQ(i, val);
+    }
+  }
+
+  static void PrintTrie(const IcingDynamicTrie& trie) {
+    std::vector<std::string> keys;
+    std::ostringstream os;
+    DLOG(INFO) << "Trie:\n";
+    trie.DumpTrie(&os, &keys);
+    DLOG(INFO) << os.str();
+  }
+
+  void SetUp() override {
+    trie_files_dir_ = GetTestTempDir() + "/trie_files";
+    trie_files_prefix_ = trie_files_dir_ + "/test_file_";
+  }
+
+  void TearDown() override {
+    IcingFilesystem filesystem;
+    filesystem.DeleteDirectoryRecursively(trie_files_dir_.c_str());
+  }
+
+  std::string trie_files_dir_;
+  std::string trie_files_prefix_;
+};
+
+std::vector<std::pair<std::string, int>> RetrieveKeyValuePairs(
+    IcingDynamicTrie::Iterator& term_iter) {
+  std::vector<std::pair<std::string, int>> key_value;
+  for (; term_iter.IsValid(); term_iter.Advance()) {
+    uint32_t val;
+    memcpy(&val, term_iter.GetValue(), sizeof(val));
+    key_value.push_back(std::make_pair(term_iter.GetKey(), val));
+  }
+  return key_value;
+}
+
+constexpr std::string_view kCommonEnglishWords[] = {
+    "that", "was",  "for",  "on",   "are",  "with",  "they", "be",    "at",
+    "one",  "have", "this", "from", "word", "but",   "what", "some",  "you",
+    "had",  "the",  "and",  "can",  "out",  "other", "were", "which", "their",
+    "time", "will", "how",  "said", "each", "tell",  "may",  "three"};
+constexpr uint32_t kCommonEnglishWordArrayLen =
+    sizeof(kCommonEnglishWords) / sizeof(std::string_view);
+
+TEST_F(IcingDynamicTrieTest, Simple) {
+  // Test simple key insertions.
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  for (uint32_t i = 0; i < kNumKeys; i++) {
+    ASSERT_THAT(trie.Insert(kKeys[i].data(), &i), IsOk());
+
+    uint32_t val;
+    bool found = trie.Find(kKeys[i].data(), &val);
+    EXPECT_TRUE(found) << kKeys[i];
+    if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
+  }
+
+  EXPECT_EQ(trie.size(), kNumKeys);
+
+  StatsDump(trie);
+  std::vector<std::string> keys;
+  std::ostringstream os;
+  DLOG(INFO) << "Trie:\n";
+  trie.DumpTrie(&os, &keys);
+  DLOG(INFO) << os.str();
+  EXPECT_EQ(keys.size(), kNumKeys);
+}
+
+TEST_F(IcingDynamicTrieTest, Init) {
+  // Test create/init behavior.
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  EXPECT_FALSE(trie.is_initialized());
+  EXPECT_FALSE(trie.Init());
+
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  EXPECT_TRUE(trie.Init());
+  EXPECT_TRUE(trie.is_initialized());
+}
+
+TEST_F(IcingDynamicTrieTest, Iterator) {
+  // Test iterator.
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  for (uint32_t i = 0; i < kNumKeys; i++) {
+    ASSERT_THAT(trie.Insert(kKeys[i].data(), &i), IsOk());
+  }
+
+  // Should get the entire trie.
+  std::vector<std::pair<std::string, int>> exp_key_values = {
+      {"", 0},   {"ab", 1},  {"abbb", 7}, {"abcdefg", 8}, {"abd", 3},
+      {"ac", 2}, {"bac", 4}, {"bacd", 6}, {"bb", 5}};
+  IcingDynamicTrie::Iterator it_all(trie, "");
+  std::vector<std::pair<std::string, int>> key_values =
+      RetrieveKeyValuePairs(it_all);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Should get same results after calling Reset
+  it_all.Reset();
+  key_values = RetrieveKeyValuePairs(it_all);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Get everything under "a".
+  exp_key_values = {
+      {"ab", 1}, {"abbb", 7}, {"abcdefg", 8}, {"abd", 3}, {"ac", 2}};
+  IcingDynamicTrie::Iterator it1(trie, "a");
+  key_values = RetrieveKeyValuePairs(it1);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Should get same results after calling Reset
+  it1.Reset();
+  key_values = RetrieveKeyValuePairs(it1);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Now "b".
+  exp_key_values = {{"bac", 4}, {"bacd", 6}, {"bb", 5}};
+  IcingDynamicTrie::Iterator it2(trie, "b");
+  key_values = RetrieveKeyValuePairs(it2);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Should get same results after calling Reset
+  it2.Reset();
+  key_values = RetrieveKeyValuePairs(it2);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Get everything under "ab".
+  exp_key_values = {{"ab", 1}, {"abbb", 7}, {"abcdefg", 8}, {"abd", 3}};
+  IcingDynamicTrie::Iterator it3(trie, "ab");
+  key_values = RetrieveKeyValuePairs(it3);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Should get same results after calling Reset
+  it3.Reset();
+  key_values = RetrieveKeyValuePairs(it3);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Should match only one key exactly.
+  constexpr std::string_view kOneMatch[] = {
+      "abd",
+      "abcd",
+      "abcdef",
+      "abcdefg",
+  };
+  // With the following match:
+  constexpr std::string_view kOneMatchMatched[] = {
+      "abd",
+      "abcdefg",
+      "abcdefg",
+      "abcdefg",
+  };
+
+  for (size_t k = 0; k < ABSL_ARRAYSIZE(kOneMatch); k++) {
+    IcingDynamicTrie::Iterator it_single(trie, kOneMatch[k].data());
+    ASSERT_TRUE(it_single.IsValid()) << kOneMatch[k];
+    EXPECT_THAT(it_single.GetKey(), StrEq(kOneMatchMatched[k].data()));
+    EXPECT_FALSE(it_single.Advance()) << kOneMatch[k];
+    EXPECT_FALSE(it_single.IsValid()) << kOneMatch[k];
+
+    // Should get same results after calling Reset
+    it_single.Reset();
+    ASSERT_TRUE(it_single.IsValid()) << kOneMatch[k];
+    EXPECT_THAT(it_single.GetKey(), StrEq(kOneMatchMatched[k].data()));
+    EXPECT_FALSE(it_single.Advance()) << kOneMatch[k];
+    EXPECT_FALSE(it_single.IsValid()) << kOneMatch[k];
+  }
+
+  // Matches nothing.
+  constexpr std::string_view kNoMatch[] = {
+      "abbd",
+      "abcdeg",
+      "abcdefh",
+  };
+  for (size_t k = 0; k < ABSL_ARRAYSIZE(kNoMatch); k++) {
+    IcingDynamicTrie::Iterator it_empty(trie, kNoMatch[k].data());
+    EXPECT_FALSE(it_empty.IsValid());
+    it_empty.Reset();
+    EXPECT_FALSE(it_empty.IsValid());
+  }
+
+  // Clear.
+  trie.Clear();
+  EXPECT_FALSE(IcingDynamicTrie::Iterator(trie, "").IsValid());
+  EXPECT_EQ(0u, trie.size());
+  EXPECT_EQ(1.0, trie.min_free_fraction());
+}
+
+TEST_F(IcingDynamicTrieTest, IteratorReverse) {
+  // Test iterator.
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  for (uint32_t i = 0; i < kNumKeys; i++) {
+    ASSERT_THAT(trie.Insert(kKeys[i].data(), &i), IsOk());
+  }
+
+  // Should get the entire trie.
+  std::vector<std::pair<std::string, int>> exp_key_values = {
+      {"bb", 5},      {"bacd", 6}, {"bac", 4}, {"ac", 2}, {"abd", 3},
+      {"abcdefg", 8}, {"abbb", 7}, {"ab", 1},  {"", 0}};
+  IcingDynamicTrie::Iterator it_all(trie, "", /*reverse=*/true);
+  std::vector<std::pair<std::string, int>> key_values =
+      RetrieveKeyValuePairs(it_all);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+  it_all.Reset();
+  key_values = RetrieveKeyValuePairs(it_all);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Get everything under "a".
+  exp_key_values = {
+      {"ac", 2}, {"abd", 3}, {"abcdefg", 8}, {"abbb", 7}, {"ab", 1}};
+  IcingDynamicTrie::Iterator it1(trie, "a", /*reverse=*/true);
+  key_values = RetrieveKeyValuePairs(it1);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Should get same results after calling Reset
+  it1.Reset();
+  key_values = RetrieveKeyValuePairs(it1);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Now "b".
+  exp_key_values = {{"bb", 5}, {"bacd", 6}, {"bac", 4}};
+  IcingDynamicTrie::Iterator it2(trie, "b", /*reverse=*/true);
+  key_values = RetrieveKeyValuePairs(it2);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Should get same results after calling Reset
+  it2.Reset();
+  key_values = RetrieveKeyValuePairs(it2);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Get everything under "ab".
+  exp_key_values = {{"abd", 3}, {"abcdefg", 8}, {"abbb", 7}, {"ab", 1}};
+  IcingDynamicTrie::Iterator it3(trie, "ab", /*reverse=*/true);
+  key_values = RetrieveKeyValuePairs(it3);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Should get same results after calling Reset
+  it3.Reset();
+  key_values = RetrieveKeyValuePairs(it3);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Should match only one key exactly.
+  constexpr std::string_view kOneMatch[] = {
+      "abd",
+      "abcd",
+      "abcdef",
+      "abcdefg",
+  };
+  // With the following match:
+  constexpr std::string_view kOneMatchMatched[] = {
+      "abd",
+      "abcdefg",
+      "abcdefg",
+      "abcdefg",
+  };
+
+  for (size_t k = 0; k < ABSL_ARRAYSIZE(kOneMatch); k++) {
+    IcingDynamicTrie::Iterator it_single(trie, kOneMatch[k].data(),
+                                         /*reverse=*/true);
+    ASSERT_TRUE(it_single.IsValid()) << kOneMatch[k];
+    EXPECT_THAT(it_single.GetKey(), StrEq(kOneMatchMatched[k].data()));
+    EXPECT_FALSE(it_single.Advance()) << kOneMatch[k];
+    EXPECT_FALSE(it_single.IsValid()) << kOneMatch[k];
+
+    // Should get same results after calling Reset
+    it_single.Reset();
+    ASSERT_TRUE(it_single.IsValid()) << kOneMatch[k];
+    EXPECT_THAT(it_single.GetKey(), StrEq(kOneMatchMatched[k].data()));
+    EXPECT_FALSE(it_single.Advance()) << kOneMatch[k];
+    EXPECT_FALSE(it_single.IsValid()) << kOneMatch[k];
+  }
+
+  // Matches nothing.
+  constexpr std::string_view kNoMatch[] = {
+      "abbd",
+      "abcdeg",
+      "abcdefh",
+  };
+  for (size_t k = 0; k < ABSL_ARRAYSIZE(kNoMatch); k++) {
+    IcingDynamicTrie::Iterator it_empty(trie, kNoMatch[k].data(),
+                                        /*reverse=*/true);
+    EXPECT_FALSE(it_empty.IsValid());
+    it_empty.Reset();
+    EXPECT_FALSE(it_empty.IsValid());
+  }
+
+  // Clear.
+  trie.Clear();
+  EXPECT_FALSE(
+      IcingDynamicTrie::Iterator(trie, "", /*reverse=*/true).IsValid());
+  EXPECT_EQ(0u, trie.size());
+  EXPECT_EQ(1.0, trie.min_free_fraction());
+}
+
+TEST_F(IcingDynamicTrieTest, IteratorLoadTest) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  std::default_random_engine random;
+  ICING_LOG(ERROR) << "Seed: " << std::default_random_engine::default_seed;
+
+  std::vector<std::pair<std::string, int>> exp_key_values;
+  // Randomly generate 1024 terms.
+  for (int i = 0; i < 1024; ++i) {
+    std::string term = RandomString("abcdefg", 5, &random) + std::to_string(i);
+    ASSERT_THAT(trie.Insert(term.c_str(), &i), IsOk());
+    exp_key_values.push_back(std::make_pair(term, i));
+  }
+  // Lexicographically sort the expected keys.
+  std::sort(exp_key_values.begin(), exp_key_values.end());
+
+  // Check that the iterator works.
+  IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
+  std::vector<std::pair<std::string, int>> key_values =
+      RetrieveKeyValuePairs(term_iter);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Check that Reset works.
+  term_iter.Reset();
+  key_values = RetrieveKeyValuePairs(term_iter);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  std::reverse(exp_key_values.begin(), exp_key_values.end());
+  // Check that the reverse iterator works.
+  IcingDynamicTrie::Iterator term_iter_reverse(trie, /*prefix=*/"",
+                                               /*reverse=*/true);
+  key_values = RetrieveKeyValuePairs(term_iter_reverse);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+
+  // Check that Reset works.
+  term_iter_reverse.Reset();
+  key_values = RetrieveKeyValuePairs(term_iter_reverse);
+  EXPECT_THAT(key_values, ContainerEq(exp_key_values));
+}
+
+TEST_F(IcingDynamicTrieTest, Persistence) {
+  // Test persistence on the English dictionary.
+  IcingFilesystem filesystem;
+  {
+    // Test with a trie including strings in words. Test will fail if
+    // words are not unique.
+    IcingDynamicTrie trie(trie_files_prefix_,
+                          IcingDynamicTrie::RuntimeOptions(), &filesystem);
+    EXPECT_FALSE(trie.Init());
+    ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+    ASSERT_TRUE(trie.Init());
+
+    for (uint32_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
+      ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &i), IsOk());
+    }
+    // Explicitly omit sync.
+
+    StatsDump(trie);
+  }
+
+  {
+    IcingDynamicTrie trie(trie_files_prefix_,
+                          IcingDynamicTrie::RuntimeOptions(), &filesystem);
+    ASSERT_TRUE(trie.Init());
+    EXPECT_EQ(0U, trie.size());
+
+    for (uint32_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
+      ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &i), IsOk());
+    }
+    trie.Sync();
+
+    StatsDump(trie);
+  }
+
+  {
+    IcingDynamicTrie trie(trie_files_prefix_,
+                          IcingDynamicTrie::RuntimeOptions(), &filesystem);
+    ASSERT_TRUE(trie.Init());
+
+    // Make sure we can find everything with the right value.
+    uint32_t found_count = 0;
+    uint32_t matched_count = 0;
+    for (size_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
+      uint32_t val;
+      bool found = trie.Find(kCommonEnglishWords[i].data(), &val);
+      if (found) {
+        found_count++;
+        if (i == val) {
+          matched_count++;
+        }
+      }
+    }
+    EXPECT_EQ(found_count, kCommonEnglishWordArrayLen);
+    EXPECT_EQ(matched_count, kCommonEnglishWordArrayLen);
+
+    StatsDump(trie);
+  }
+}
+
+TEST_F(IcingDynamicTrieTest, PersistenceShared) {
+  // Test persistence on the English dictionary.
+  IcingFilesystem filesystem;
+  IcingDynamicTrie::RuntimeOptions ropt;
+
+  {
+    // Test with a trie including strings in words. Test will fail if
+    // words are not unique.
+    ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc;
+    IcingDynamicTrie trie(trie_files_prefix_, ropt, &filesystem);
+    EXPECT_FALSE(trie.Init());
+    ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+    ASSERT_TRUE(trie.Init());
+
+    uint32_t next_reopen = kCommonEnglishWordArrayLen / 16;
+    for (uint32_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
+      ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &i), IsOk());
+
+      if (i == next_reopen) {
+        ASSERT_NE(0u, trie.UpdateCrc());
+        trie.Close();
+        ASSERT_TRUE(trie.Init());
+
+        next_reopen += next_reopen / 2;
+      }
+    }
+    // Explicitly omit sync. Shared should automatically persist.
+
+    StatsDump(trie);
+  }
+
+  // Go back and forth between the two policies.
+  for (int i = 0; i < 5; i++) {
+    if (i % 2 == 0) {
+      DLOG(INFO) << "Opening with map shared";
+      ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc;
+    } else {
+      DLOG(INFO) << "Opening with explicit flush";
+      ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kExplicitFlush;
+    }
+    IcingDynamicTrie trie(trie_files_prefix_, ropt, &filesystem);
+    ASSERT_TRUE(trie.Init());
+
+    // Make sure we can find everything with the right value.
+    uint32_t found_count = 0;
+    uint32_t matched_count = 0;
+    for (size_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
+      uint32_t val;
+      bool found = trie.Find(kCommonEnglishWords[i].data(), &val);
+      if (found) {
+        found_count++;
+        if (i == val) {
+          matched_count++;
+        }
+      }
+    }
+    EXPECT_EQ(found_count, kCommonEnglishWordArrayLen);
+    EXPECT_EQ(matched_count, kCommonEnglishWordArrayLen);
+
+    StatsDump(trie);
+  }
+
+  // Clear and re-open.
+  ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc;
+  IcingDynamicTrie trie(trie_files_prefix_, ropt, &filesystem);
+  ASSERT_TRUE(trie.Init());
+  trie.Clear();
+  trie.Close();
+  ASSERT_TRUE(trie.Init());
+}
+
+TEST_F(IcingDynamicTrieTest, Sync) {
+  IcingFilesystem filesystem;
+  {
+    IcingDynamicTrie trie(trie_files_prefix_,
+                          IcingDynamicTrie::RuntimeOptions(), &filesystem);
+    ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+    ASSERT_TRUE(trie.Init());
+
+    for (uint32_t i = 0; i < kNumKeys; i++) {
+      ASSERT_THAT(trie.Insert(kKeys[i].data(), &i), IsOk());
+
+      uint32_t val;
+      bool found = trie.Find(kKeys[i].data(), &val);
+      EXPECT_TRUE(found) << kKeys[i];
+      if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
+    }
+
+    StatsDump(trie);
+    PrintTrie(trie);
+
+    trie.Sync();
+
+    for (uint32_t i = 0; i < kNumKeys; i++) {
+      uint32_t val;
+      bool found = trie.Find(kKeys[i].data(), &val);
+      EXPECT_TRUE(found) << kKeys[i];
+      if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
+    }
+  }
+
+  {
+    IcingDynamicTrie trie(trie_files_prefix_,
+                          IcingDynamicTrie::RuntimeOptions(), &filesystem);
+    ASSERT_TRUE(trie.Init());
+
+    for (uint32_t i = 0; i < kNumKeys; i++) {
+      uint32_t val;
+      bool found = trie.Find(kKeys[i].data(), &val);
+      EXPECT_TRUE(found) << kKeys[i];
+      if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
+    }
+
+    StatsDump(trie);
+    PrintTrie(trie);
+  }
+}
+
+TEST_F(IcingDynamicTrieTest, LimitsZero) {
+  // Don't crash if we set limits to 0.
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_FALSE(trie.CreateIfNotExist(IcingDynamicTrie::Options(0, 0, 0, 0)));
+}
+
+TEST_F(IcingDynamicTrieTest, LimitsSmall) {
+  // Test limits with a few keys.
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(
+      IcingDynamicTrie::Options(10, 300, 30, sizeof(uint32_t))));
+  ASSERT_TRUE(trie.Init());
+
+  ASSERT_LT(3U, kNumKeys);
+
+  for (uint32_t i = 0; i < 3; i++) {
+    ASSERT_THAT(trie.Insert(kKeys[i].data(), &i), IsOk()) << i;
+
+    uint32_t val;
+    bool found = trie.Find(kKeys[i].data(), &val);
+    EXPECT_TRUE(found) << kKeys[i];
+    if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
+  }
+
+  uint32_t val = 3;
+  EXPECT_THAT(trie.Insert(kKeys[3].data(), &val),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+  StatsDump(trie);
+  PrintTrie(trie);
+}
+
+TEST_F(IcingDynamicTrieTest, DISABLEDFingerprintedKeys) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie::Options options(4 << 20, 4 << 20, 20 << 20,
+                                    sizeof(uint32_t));
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(options));
+  ASSERT_TRUE(trie.Init());
+  IcingDynamicTrie triefp(trie_files_prefix_ + ".fps",
+                          IcingDynamicTrie::RuntimeOptions(), &filesystem);
+  ASSERT_TRUE(triefp.CreateIfNotExist(options));
+  ASSERT_TRUE(triefp.Init());
+
+  static const uint32_t kNumKeys = 1000000;
+  std::string key;
+  for (uint32_t i = 0; i < kNumKeys; i++) {
+    key.clear();
+    IcingStringUtil::SStringAppendF(
+        &key, 1000, "content://gmail-ls/account/conversation/%u/message/%u", i,
+        10 * i);
+    ASSERT_THAT(trie.Insert(key.c_str(), &i), IsOk());
+
+    // Now compute a fingerprint.
+    uint64_t fpkey = tc3farmhash::Fingerprint64(key);
+
+    // Convert to base255 since keys in trie cannot contain 0.
+    uint8_t fpkey_base255[9];
+    for (int j = 0; j < 8; j++) {
+      fpkey_base255[j] = (fpkey % 255) + 1;
+      fpkey /= 255;
+    }
+    fpkey_base255[8] = '\0';
+    ASSERT_THAT(triefp.Insert(reinterpret_cast<const char*>(fpkey_base255), &i),
+                IsOk());
+
+    // Sync periodically to gauge write locality.
+    if ((i + 1) % (kNumKeys / 10) == 0) {
+      DLOG(INFO) << "Trie sync";
+      trie.Sync();
+      DLOG(INFO) << "Trie fp sync";
+      triefp.Sync();
+    }
+  }
+
+  DLOG(INFO) << "Trie stats";
+  StatsDump(trie);
+  DLOG(INFO) << "Trie fp stats";
+  StatsDump(triefp);
+}
+
+TEST_F(IcingDynamicTrieTest, AddDups) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  static const uint32_t kNumKeys = 5000;
+  AddToTrie(&trie, kNumKeys);
+  CheckTrie(trie, kNumKeys);
+
+  DLOG(INFO) << "Trie stats";
+  StatsDump(trie);
+
+  AddToTrie(&trie, kNumKeys);
+  CheckTrie(trie, kNumKeys);
+  DLOG(INFO) << "Trie stats";
+  StatsDump(trie);
+}
+
+TEST_F(IcingDynamicTrieTest, Properties) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  static const uint32_t kOne = 1;
+  uint32_t val_idx;
+  ICING_ASSERT_OK(trie.Insert("abcd", &kOne, &val_idx, false));
+  trie.SetProperty(val_idx, 0);
+  trie.SetProperty(val_idx, 3);
+
+  {
+    IcingDynamicTrie::PropertyReader reader(trie, 3);
+    ASSERT_TRUE(reader.Exists());
+    EXPECT_TRUE(reader.HasProperty(val_idx));
+    EXPECT_FALSE(reader.HasProperty(1000));
+  }
+
+  // Disappear after close.
+  trie.Close();
+  ASSERT_TRUE(trie.Init());
+  {
+    IcingDynamicTrie::PropertyReader reader(trie, 3);
+    EXPECT_FALSE(reader.HasProperty(val_idx));
+  }
+
+  // Persist after sync.
+  ICING_ASSERT_OK(trie.Insert("abcd", &kOne, &val_idx, false));
+  trie.SetProperty(val_idx, 1);
+  ASSERT_TRUE(trie.Sync());
+  trie.Close();
+  ASSERT_TRUE(trie.Init());
+
+  uint32_t val;
+  ASSERT_TRUE(trie.Find("abcd", &val, &val_idx));
+  EXPECT_EQ(1u, val);
+  {
+    IcingDynamicTrie::PropertyReader reader(trie, 1);
+    EXPECT_TRUE(reader.HasProperty(val_idx));
+  }
+
+  // Get all.
+  {
+    IcingDynamicTrie::PropertyReadersAll readers(trie);
+    ASSERT_EQ(4u, readers.size());
+    EXPECT_TRUE(readers.Exists(0));
+    EXPECT_TRUE(readers.Exists(1));
+    EXPECT_FALSE(readers.Exists(2));
+    EXPECT_TRUE(readers.Exists(3));
+  }
+}
+
+TEST_F(IcingDynamicTrieTest, ClearSingleProperty) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  static const uint32_t kOne = 1;
+  uint32_t val_idx[3];
+  ICING_ASSERT_OK(trie.Insert("abcd", &kOne, &val_idx[0], false));
+  trie.SetProperty(val_idx[0], 0);
+  trie.SetProperty(val_idx[0], 3);
+
+  ICING_ASSERT_OK(trie.Insert("efgh", &kOne, &val_idx[1], false));
+  trie.SetProperty(val_idx[1], 0);
+  trie.SetProperty(val_idx[1], 3);
+
+  ICING_ASSERT_OK(trie.Insert("ijkl", &kOne, &val_idx[2], false));
+  trie.SetProperty(val_idx[2], 0);
+  trie.SetProperty(val_idx[2], 3);
+
+  {
+    IcingDynamicTrie::PropertyReadersAll readers(trie);
+    ASSERT_EQ(4u, readers.size());
+    EXPECT_TRUE(readers.Exists(0));
+    EXPECT_FALSE(readers.Exists(1));
+    EXPECT_FALSE(readers.Exists(2));
+    EXPECT_TRUE(readers.Exists(3));
+    for (size_t i = 0; i < readers.size(); i++) {
+      if (readers.Exists(i)) {
+        for (size_t j = 0; j < sizeof(val_idx) / sizeof(uint32_t); ++j) {
+          EXPECT_TRUE(readers.HasProperty(i, val_idx[j]));
+        }
+      }
+    }
+  }
+
+  EXPECT_TRUE(trie.ClearPropertyForAllValues(3));
+
+  {
+    IcingDynamicTrie::PropertyReadersAll readers(trie);
+    ASSERT_EQ(4u, readers.size());
+    EXPECT_TRUE(readers.Exists(0));
+    EXPECT_FALSE(readers.Exists(1));
+    EXPECT_FALSE(readers.Exists(2));
+    // Clearing the property causes all values to be deleted.
+    EXPECT_FALSE(readers.Exists(3));
+    for (size_t i = 0; i < readers.size(); i++) {
+      for (size_t j = 0; j < sizeof(val_idx) / sizeof(uint32_t); ++j) {
+        if (i == 0) {
+          EXPECT_TRUE(readers.HasProperty(i, val_idx[j]));
+        } else {
+          EXPECT_FALSE(readers.HasProperty(i, val_idx[j]));
+        }
+      }
+    }
+  }
+}
+
+TEST_F(IcingDynamicTrieTest, DeletionShouldWorkWhenRootIsLeaf) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  // Inserts a key, the root is a leaf.
+  uint32_t value = 1;
+  ASSERT_THAT(trie.Insert("foo", &value), IsOk());
+  ASSERT_TRUE(trie.Find("foo", &value));
+
+  // Deletes the key.
+  EXPECT_TRUE(trie.Delete("foo"));
+  EXPECT_FALSE(trie.Find("foo", &value));
+}
+
+TEST_F(IcingDynamicTrieTest, DeletionShouldWorkWhenLastCharIsLeaf) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  // Inserts "bar" and "ba", the trie structure looks like:
+  //       root
+  //         |
+  //         b
+  //         |
+  //         a
+  //        / \
+  //     null  r
+  uint32_t value = 1;
+  ASSERT_THAT(trie.Insert("bar", &value), IsOk());
+  ASSERT_THAT(trie.Insert("ba", &value), IsOk());
+  ASSERT_TRUE(trie.Find("bar", &value));
+  ASSERT_TRUE(trie.Find("ba", &value));
+
+  // Deletes "bar". "r" is a leaf node in the trie.
+  EXPECT_TRUE(trie.Delete("bar"));
+  EXPECT_FALSE(trie.Find("bar", &value));
+  EXPECT_TRUE(trie.Find("ba", &value));
+}
+
+TEST_F(IcingDynamicTrieTest, DeletionShouldWorkWithTerminationNode) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  // Inserts "bar" and "ba", the trie structure looks like:
+  //       root
+  //         |
+  //         b
+  //         |
+  //         a
+  //        / \
+  //     null  r
+  uint32_t value = 1;
+  ASSERT_THAT(trie.Insert("bar", &value), IsOk());
+  ASSERT_THAT(trie.Insert("ba", &value), IsOk());
+  ASSERT_TRUE(trie.Find("bar", &value));
+  ASSERT_TRUE(trie.Find("ba", &value));
+
+  // Deletes "ba" which is a key with termination node in the trie.
+  EXPECT_TRUE(trie.Delete("ba"));
+  EXPECT_FALSE(trie.Find("ba", &value));
+  EXPECT_TRUE(trie.Find("bar", &value));
+}
+
+TEST_F(IcingDynamicTrieTest, DeletionShouldWorkWithMultipleNexts) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  // Inserts "ba", "bb", "bc", and "bd", the trie structure looks like:
+  //       root
+  //         |
+  //         b
+  //      / | | \
+  //     a  b c  d
+  uint32_t value = 1;
+  ASSERT_THAT(trie.Insert("ba", &value), IsOk());
+  ASSERT_THAT(trie.Insert("bb", &value), IsOk());
+  ASSERT_THAT(trie.Insert("bc", &value), IsOk());
+  ASSERT_THAT(trie.Insert("bd", &value), IsOk());
+  ASSERT_TRUE(trie.Find("ba", &value));
+  ASSERT_TRUE(trie.Find("bb", &value));
+  ASSERT_TRUE(trie.Find("bc", &value));
+  ASSERT_TRUE(trie.Find("bd", &value));
+
+  // Deletes "bc".
+  EXPECT_TRUE(trie.Delete("bc"));
+  EXPECT_FALSE(trie.Find("bc", &value));
+  EXPECT_TRUE(trie.Find("ba", &value));
+  EXPECT_TRUE(trie.Find("bb", &value));
+  EXPECT_TRUE(trie.Find("bd", &value));
+}
+
+TEST_F(IcingDynamicTrieTest, DeletionShouldWorkWithMultipleTrieBranches) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  // Inserts "batter", "battle", and "bar", the trie structure looks like:
+  //       root
+  //         |
+  //         b
+  //         |
+  //         a
+  //        / \
+  //       t   r
+  //       |
+  //       t
+  //      / \
+  //     e   l
+  //     |   |
+  //     r   e
+  uint32_t value = 1;
+  ASSERT_THAT(trie.Insert("batter", &value), IsOk());
+  ASSERT_THAT(trie.Insert("battle", &value), IsOk());
+  ASSERT_THAT(trie.Insert("bar", &value), IsOk());
+  ASSERT_TRUE(trie.Find("batter", &value));
+  ASSERT_TRUE(trie.Find("battle", &value));
+  ASSERT_TRUE(trie.Find("bar", &value));
+
+  // Deletes "batter".
+  EXPECT_TRUE(trie.Delete("batter"));
+  EXPECT_FALSE(trie.Find("batter", &value));
+  EXPECT_TRUE(trie.Find("battle", &value));
+  EXPECT_TRUE(trie.Find("bar", &value));
+}
+
+TEST_F(IcingDynamicTrieTest, InsertionShouldWorkAfterDeletion) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  // Inserts some keys.
+  uint32_t value = 1;
+  ASSERT_THAT(trie.Insert("bar", &value), IsOk());
+  ASSERT_THAT(trie.Insert("bed", &value), IsOk());
+  ASSERT_THAT(trie.Insert("foo", &value), IsOk());
+
+  // Deletes a key
+  ASSERT_TRUE(trie.Delete("bed"));
+  ASSERT_FALSE(trie.Find("bed", &value));
+
+  // Inserts after deletion
+  ASSERT_THAT(trie.Insert("bed", &value), IsOk());
+  ASSERT_THAT(trie.Insert("bedroom", &value), IsOk());
+  EXPECT_TRUE(trie.Find("bed", &value));
+  EXPECT_TRUE(trie.Find("bedroom", &value));
+}
+
+TEST_F(IcingDynamicTrieTest, IteratorShouldWorkAfterDeletion) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  // Inserts some keys.
+  uint32_t value = 1;
+  ASSERT_THAT(trie.Insert("bar", &value), IsOk());
+  ASSERT_THAT(trie.Insert("bed", &value), IsOk());
+  ASSERT_THAT(trie.Insert("foo", &value), IsOk());
+
+  // Deletes a key
+  ASSERT_TRUE(trie.Delete("bed"));
+
+  // Iterates through all keys
+  IcingDynamicTrie::Iterator iterator_all(trie, "");
+  std::vector<std::string> results;
+  for (; iterator_all.IsValid(); iterator_all.Advance()) {
+    results.emplace_back(iterator_all.GetKey());
+  }
+  EXPECT_THAT(results, ElementsAre("bar", "foo"));
+
+  // Iterates through keys that start with "b"
+  IcingDynamicTrie::Iterator iterator_b(trie, "b");
+  results.clear();
+  for (; iterator_b.IsValid(); iterator_b.Advance()) {
+    results.emplace_back(iterator_b.GetKey());
+  }
+  EXPECT_THAT(results, ElementsAre("bar"));
+}
+
+TEST_F(IcingDynamicTrieTest, DeletingNonExistingKeyShouldReturnTrue) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  // Inserts some keys.
+  uint32_t value = 1;
+  ASSERT_THAT(trie.Insert("bar", &value), IsOk());
+  ASSERT_THAT(trie.Insert("bed", &value), IsOk());
+
+  // "ba" and bedroom are not keys in the trie.
+  EXPECT_TRUE(trie.Delete("ba"));
+  EXPECT_TRUE(trie.Delete("bedroom"));
+
+  // The original keys are not affected.
+  EXPECT_TRUE(trie.Find("bar", &value));
+  EXPECT_TRUE(trie.Find("bed", &value));
+}
+
+TEST_F(IcingDynamicTrieTest, DeletionResortsFullNextArray) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  uint32_t value = 1;
+  // 'f' -> [ 'a', 'j', 'o', 'u' ]
+  ASSERT_THAT(trie.Insert("foul", &value), IsOk());
+  ASSERT_THAT(trie.Insert("far", &value), IsOk());
+  ASSERT_THAT(trie.Insert("fudge", &value), IsOk());
+  ASSERT_THAT(trie.Insert("fjord", &value), IsOk());
+
+  // Delete the third child
+  EXPECT_TRUE(trie.Delete("foul"));
+
+  std::vector<std::string> remaining;
+  for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
+       term_iter.IsValid(); term_iter.Advance()) {
+    remaining.push_back(term_iter.GetKey());
+  }
+  EXPECT_THAT(remaining, ElementsAre("far", "fjord", "fudge"));
+}
+
+TEST_F(IcingDynamicTrieTest, DeletionResortsPartiallyFilledNextArray) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  uint32_t value = 1;
+  // 'f' -> [ 'a', 'o', 'u', 0xFF ]
+  ASSERT_THAT(trie.Insert("foul", &value), IsOk());
+  ASSERT_THAT(trie.Insert("far", &value), IsOk());
+  ASSERT_THAT(trie.Insert("fudge", &value), IsOk());
+
+  // Delete the second child
+  EXPECT_TRUE(trie.Delete("foul"));
+
+  std::vector<std::string> remaining;
+  for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
+       term_iter.IsValid(); term_iter.Advance()) {
+    remaining.push_back(term_iter.GetKey());
+  }
+  EXPECT_THAT(remaining, ElementsAre("far", "fudge"));
+}
+
+TEST_F(IcingDynamicTrieTest, DeletionLoadTest) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  std::default_random_engine random;
+  ICING_LOG(ERROR) << "Seed: " << std::default_random_engine::default_seed;
+  std::vector<std::string> terms;
+  uint32_t value;
+  // Randomly generate 2048 terms.
+  for (int i = 0; i < 2048; ++i) {
+    terms.push_back(RandomString("abcdefg", 5, &random));
+    ASSERT_THAT(trie.Insert(terms.back().c_str(), &value), IsOk());
+  }
+
+  // Randomly delete 1024 terms.
+  std::unordered_set<std::string> exp_remaining(terms.begin(), terms.end());
+  std::shuffle(terms.begin(), terms.end(), random);
+  for (int i = 0; i < 1024; ++i) {
+    exp_remaining.erase(terms[i]);
+    ASSERT_TRUE(trie.Delete(terms[i].c_str()));
+  }
+
+  // Check that the iterator still works, and the remaining terms are correct.
+  std::unordered_set<std::string> remaining;
+  for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
+       term_iter.IsValid(); term_iter.Advance()) {
+    remaining.insert(term_iter.GetKey());
+  }
+  EXPECT_THAT(remaining, ContainerEq(exp_remaining));
+
+  // Check that we can still insert terms after delete.
+  for (int i = 0; i < 2048; ++i) {
+    std::string term = RandomString("abcdefg", 5, &random);
+    ASSERT_THAT(trie.Insert(term.c_str(), &value), IsOk());
+    exp_remaining.insert(term);
+  }
+  remaining.clear();
+  for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
+       term_iter.IsValid(); term_iter.Advance()) {
+    remaining.insert(term_iter.GetKey());
+  }
+  EXPECT_THAT(remaining, ContainerEq(exp_remaining));
+}
+
+}  // namespace
+
+// The tests below are accessing private methods and fields of IcingDynamicTrie
+// so can't be in the anonymous namespace.
+
+TEST_F(IcingDynamicTrieTest, TrieShouldRespectLimits) {
+  // Test limits on numbers of nodes, nexts, and suffixes size.
+  IcingFilesystem filesystem;
+
+  // These 3 numbers are the entities we need in order to insert all the test
+  // words before the last one.
+  uint32_t num_nodes_enough;
+  uint32_t num_nexts_enough;
+  uint32_t suffixes_size_enough;
+
+  // First, try to fill the 3 numbers above.
+  {
+    IcingDynamicTrie trie(trie_files_prefix_,
+                          IcingDynamicTrie::RuntimeOptions(), &filesystem);
+    ASSERT_TRUE(trie.Remove());
+    // Creates a trie with enough numbers of nodes, nexts, and suffix file size.
+    ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options(
+        /*max_nodes_in=*/1000, /*max_nexts_in=*/1000,
+        /*max_suffixes_size_in=*/1000, sizeof(uint32_t))));
+    ASSERT_TRUE(trie.Init());
+
+    // Inserts all the test words before the last one.
+    uint32_t value = 0;
+    for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) {
+      ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &value), IsOk());
+    }
+
+    IcingDynamicTrieHeader header;
+    trie.GetHeader(&header);
+
+    // Before each insertion, it requires that there're (2 + 1 + key_length)
+    // nodes left, so we need 8 nodes to insert the last word. +7 here will make
+    // it just enough to insert the word before the last one.
+    num_nodes_enough = header.num_nodes() + 7;
+
+    // Before each insertion, it requires that there're (2 + 1 + key_length +
+    // kMaxNextArraySize) nexts left, so we need (8 + kMaxNextArraySize) nexts
+    // to insert the last word. (7 + kMaxNextArraySize) here will make it just
+    // enough to insert the word before the last one.
+    num_nexts_enough =
+        header.num_nexts() + 7 + IcingDynamicTrie::kMaxNextArraySize;
+
+    // Before each insertion, it requires that there're (1 + key_length +
+    // value_size) bytes left for suffixes, so we need (6 + sizeof(uint32_t))
+    // bytes to insert the last word. (5 + sizeof(uint32_t)) here will make it
+    // just enough to insert the word before the last one.
+    suffixes_size_enough = header.suffixes_size() + 5 + sizeof(uint32_t);
+  }
+
+  // Test a trie with just enough number of nodes.
+  {
+    IcingDynamicTrie trie(trie_files_prefix_,
+                          IcingDynamicTrie::RuntimeOptions(), &filesystem);
+    ASSERT_TRUE(trie.Remove());
+    ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options(
+        num_nodes_enough, /*max_nexts_in=*/1000,
+        /*max_suffixes_size_in=*/1000, sizeof(uint32_t))));
+    ASSERT_TRUE(trie.Init());
+
+    // Inserts all the test words before the last one.
+    uint32_t value = 0;
+    for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) {
+      ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &value), IsOk());
+    }
+
+    // Fails to insert the last word because no enough nodes left.
+    EXPECT_THAT(
+        trie.Insert(kCommonEnglishWords[kCommonEnglishWordArrayLen - 1].data(),
+                    &value),
+        StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  }
+
+  // Test a trie with just enough number of nexts.
+  {
+    IcingDynamicTrie trie(trie_files_prefix_,
+                          IcingDynamicTrie::RuntimeOptions(), &filesystem);
+    ASSERT_TRUE(trie.Remove());
+    ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options(
+        /*max_nodes_in=*/1000, num_nexts_enough,
+        /*max_suffixes_size_in=*/1000, sizeof(uint32_t))));
+    ASSERT_TRUE(trie.Init());
+
+    // Inserts all the test words before the last one.
+    uint32_t value = 0;
+    for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) {
+      ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &value), IsOk());
+    }
+
+    // Fails to insert the last word because no enough nexts left.
+    EXPECT_THAT(
+        trie.Insert(kCommonEnglishWords[kCommonEnglishWordArrayLen - 1].data(),
+                    &value),
+        StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  }
+
+  // Test a trie with just enough suffixes size.
+  {
+    IcingDynamicTrie trie(trie_files_prefix_,
+                          IcingDynamicTrie::RuntimeOptions(), &filesystem);
+    ASSERT_TRUE(trie.Remove());
+    ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options(
+        /*max_nodes_in=*/1000, /*max_nexts_in=*/1000, suffixes_size_enough,
+        sizeof(uint32_t))));
+    ASSERT_TRUE(trie.Init());
+
+    // Inserts all the test words before the last one.
+    uint32_t value = 0;
+    for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) {
+      ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &value), IsOk());
+    }
+
+    // Fails to insert the last word because no enough space for more suffixes.
+    EXPECT_THAT(
+        trie.Insert(kCommonEnglishWords[kCommonEnglishWordArrayLen - 1].data(),
+                    &value),
+        StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  }
+}
+
+TEST_F(IcingDynamicTrieTest, SyncErrorRecovery) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  static const uint32_t kNumKeys = 5000;
+  AddToTrie(&trie, kNumKeys);
+  CheckTrie(trie, kNumKeys);
+
+  trie.Sync();
+  trie.Close();
+
+  // Reach into the file and set the value_size.
+  ASSERT_TRUE(trie.Init());
+  IcingDynamicTrieHeader hdr;
+  trie.GetHeader(&hdr);
+  hdr.set_value_size(hdr.value_size() + 123);
+  trie.SetHeader(hdr);
+  trie.Close();
+
+  ASSERT_FALSE(trie.Init());
+}
+
+TEST_F(IcingDynamicTrieTest, BitmapsClosedWhenInitFails) {
+  // Create trie with one property.
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(
+      trie_files_prefix_,
+      IcingDynamicTrie::RuntimeOptions().set_storage_policy(
+          IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
+      &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+  ASSERT_TRUE(trie.deleted_bitmap_);
+  trie.SetProperty(0, 0);
+  ASSERT_EQ(1, trie.property_bitmaps_.size());
+  ASSERT_TRUE(trie.property_bitmaps_[0]);
+  trie.Close();
+
+  // Intentionally corrupt deleted_bitmap file to make Init() fail.
+  FILE* fp = fopen(trie.deleted_bitmap_filename_.c_str(), "r+");
+  ASSERT_TRUE(fp);
+  ASSERT_EQ(16, fwrite("################", 1, 16, fp));
+  fclose(fp);
+  ASSERT_FALSE(trie.Init());
+
+  // Check that both the bitmap and the property files have been closed.
+  ASSERT_FALSE(trie.deleted_bitmap_);
+  ASSERT_EQ(0, trie.property_bitmaps_.size());
+}
+
+TEST_F(IcingDynamicTrieTest, IsBranchingTermShouldWorkForExistingTerms) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  uint32_t value = 1;
+
+  ASSERT_THAT(trie.Insert("", &value), IsOk());
+  EXPECT_FALSE(trie.IsBranchingTerm(""));
+
+  ASSERT_THAT(trie.Insert("ab", &value), IsOk());
+  EXPECT_FALSE(trie.IsBranchingTerm(""));
+  EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+
+  ASSERT_THAT(trie.Insert("ac", &value), IsOk());
+  // "" is a prefix of "ab" and "ac", but it is not a branching term.
+  EXPECT_FALSE(trie.IsBranchingTerm(""));
+  EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+
+  ASSERT_THAT(trie.Insert("ba", &value), IsOk());
+  // "" now branches to "ba"
+  EXPECT_TRUE(trie.IsBranchingTerm(""));
+  EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+
+  ASSERT_THAT(trie.Insert("a", &value), IsOk());
+  EXPECT_TRUE(trie.IsBranchingTerm(""));
+  // "a" branches to "ab" and "ac"
+  EXPECT_TRUE(trie.IsBranchingTerm("a"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+
+  ASSERT_THAT(trie.Insert("abc", &value), IsOk());
+  ASSERT_THAT(trie.Insert("acd", &value), IsOk());
+  EXPECT_TRUE(trie.IsBranchingTerm(""));
+  EXPECT_TRUE(trie.IsBranchingTerm("a"));
+  // "ab" is a prefix of "abc", but it is not a branching term.
+  EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+  // "ac" is a prefix of "acd", but it is not a branching term.
+  EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+  EXPECT_FALSE(trie.IsBranchingTerm("acd"));
+
+  ASSERT_THAT(trie.Insert("abcd", &value), IsOk());
+  EXPECT_TRUE(trie.IsBranchingTerm(""));
+  EXPECT_TRUE(trie.IsBranchingTerm("a"));
+  // "ab" is a prefix of "abc" and "abcd", but it is not a branching term.
+  EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+  // "abc" is a prefix of "abcd", but it is not a branching term.
+  EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+  EXPECT_FALSE(trie.IsBranchingTerm("acd"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abcd"));
+
+  ASSERT_THAT(trie.Insert("abd", &value), IsOk());
+  EXPECT_TRUE(trie.IsBranchingTerm(""));
+  EXPECT_TRUE(trie.IsBranchingTerm("a"));
+  // "ab" branches to "abc" and "abd"
+  EXPECT_TRUE(trie.IsBranchingTerm("ab"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ac"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ba"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+  EXPECT_FALSE(trie.IsBranchingTerm("acd"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abcd"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abd"));
+}
+
+TEST_F(IcingDynamicTrieTest, IsBranchingTermShouldWorkForNonExistingTerms) {
+  IcingFilesystem filesystem;
+  IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+                        &filesystem);
+  ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+  ASSERT_TRUE(trie.Init());
+
+  uint32_t value = 1;
+
+  EXPECT_FALSE(trie.IsBranchingTerm(""));
+  EXPECT_FALSE(trie.IsBranchingTerm("a"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+
+  ASSERT_THAT(trie.Insert("aa", &value), IsOk());
+  EXPECT_FALSE(trie.IsBranchingTerm(""));
+  EXPECT_FALSE(trie.IsBranchingTerm("a"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+
+  ASSERT_THAT(trie.Insert("ac", &value), IsOk());
+  EXPECT_FALSE(trie.IsBranchingTerm(""));
+  // "a" does not exist in the trie, but now it branches to "aa" and "ac".
+  EXPECT_TRUE(trie.IsBranchingTerm("a"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+
+  ASSERT_THAT(trie.Insert("ad", &value), IsOk());
+  EXPECT_FALSE(trie.IsBranchingTerm(""));
+  EXPECT_TRUE(trie.IsBranchingTerm("a"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+
+  ASSERT_THAT(trie.Insert("abcd", &value), IsOk());
+  EXPECT_FALSE(trie.IsBranchingTerm(""));
+  EXPECT_TRUE(trie.IsBranchingTerm("a"));
+  EXPECT_FALSE(trie.IsBranchingTerm("ab"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+
+  ASSERT_THAT(trie.Insert("abd", &value), IsOk());
+  EXPECT_FALSE(trie.IsBranchingTerm(""));
+  EXPECT_TRUE(trie.IsBranchingTerm("a"));
+  // "ab" does not exist in the trie, but now it branches to "abcd" and "abd".
+  EXPECT_TRUE(trie.IsBranchingTerm("ab"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abc"));
+
+  ASSERT_THAT(trie.Insert("abce", &value), IsOk());
+  EXPECT_FALSE(trie.IsBranchingTerm(""));
+  EXPECT_TRUE(trie.IsBranchingTerm("a"));
+  EXPECT_TRUE(trie.IsBranchingTerm("ab"));
+  // "abc" does not exist in the trie, but now it branches to "abcd" and "abce".
+  EXPECT_TRUE(trie.IsBranchingTerm("abc"));
+
+  ASSERT_THAT(trie.Insert("abc_suffix", &value), IsOk());
+  EXPECT_FALSE(trie.IsBranchingTerm(""));
+  EXPECT_TRUE(trie.IsBranchingTerm("a"));
+  EXPECT_TRUE(trie.IsBranchingTerm("ab"));
+  EXPECT_TRUE(trie.IsBranchingTerm("abc"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abc_s"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abc_su"));
+  EXPECT_FALSE(trie.IsBranchingTerm("abc_suffi"));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/legacy/index/icing-filesystem.cc b/icing/legacy/index/icing-filesystem.cc
index 90e9146..fbf5a27 100644
--- a/icing/legacy/index/icing-filesystem.cc
+++ b/icing/legacy/index/icing-filesystem.cc
@@ -16,7 +16,6 @@
 
 #include <dirent.h>
 #include <dlfcn.h>
-#include <errno.h>
 #include <fcntl.h>
 #include <fnmatch.h>
 #include <pthread.h>
@@ -27,6 +26,7 @@
 #include <unistd.h>
 
 #include <algorithm>
+#include <cerrno>
 #include <unordered_set>
 
 #include "icing/absl_ports/str_cat.h"
@@ -65,18 +65,15 @@ void LogOpenFileDescriptors() {
   constexpr int kMaxFileDescriptorsToStat = 4096;
   struct rlimit rlim = {0, 0};
   if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "getrlimit() failed (errno=%d)", errno);
+    ICING_LOG(ERROR) << "getrlimit() failed (errno=" << errno << ")";
     return;
   }
   int fd_lim = rlim.rlim_cur;
   if (fd_lim > kMaxFileDescriptorsToStat) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Maximum number of file descriptors (%d) too large.", fd_lim);
+    ICING_LOG(ERROR) << "Maximum number of file descriptors (" << fd_lim << ") too large.";
     fd_lim = kMaxFileDescriptorsToStat;
   }
-  ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-      "Listing up to %d file descriptors.", fd_lim);
+  ICING_LOG(ERROR) << "Listing up to " << fd_lim << " file descriptors.";
 
   // Verify that /proc/self/fd is a directory. If not, procfs is not mounted or
   // inaccessible for some other reason. In that case, there's no point trying
@@ -98,15 +95,12 @@ void LogOpenFileDescriptors() {
     if (len >= 0) {
       // Zero-terminate the buffer, because readlink() won't.
       target[len < target_size ? len : target_size - 1] = '\0';
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("fd %d -> \"%s\"", fd,
-                                                        target);
+      ICING_LOG(ERROR) << "fd " << fd << " -> \"" << target << "\"";
     } else if (errno != ENOENT) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("fd %d -> ? (errno=%d)",
-                                                        fd, errno);
+      ICING_LOG(ERROR) << "fd " << fd << " -> ? (errno=" << errno << ")";
     }
   }
-  ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-      "File descriptor list complete.");
+  ICING_LOG(ERROR) << "File descriptor list complete.";
 }
 
 // Logs an error formatted as: desc1 + file_name + desc2 + strerror(errnum).
@@ -115,8 +109,7 @@ void LogOpenFileDescriptors() {
 // file descriptors (see LogOpenFileDescriptors() above).
 void LogOpenError(const char *desc1, const char *file_name, const char *desc2,
                   int errnum) {
-  ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-      "%s%s%s%s", desc1, file_name, desc2, strerror(errnum));
+  ICING_LOG(ERROR) << desc1 << file_name << desc2 << strerror(errnum);
   if (errnum == EMFILE) {
     LogOpenFileDescriptors();
   }
@@ -157,8 +150,7 @@ bool ListDirectoryInternal(const char *dir_name,
     }
   }
   if (closedir(dir) != 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Error closing %s: %s", dir_name, strerror(errno));
+    ICING_LOG(ERROR) << "Error closing " << dir_name << ": " << strerror(errno);
   }
   return true;
 }
@@ -181,12 +173,11 @@ void IcingScopedFd::reset(int fd) {
 const uint64_t IcingFilesystem::kBadFileSize;
 
 bool IcingFilesystem::DeleteFile(const char *file_name) const {
-  ICING_VLOG(1) << IcingStringUtil::StringPrintf("Deleting file %s", file_name);
+  ICING_VLOG(1) << "Deleting file " << file_name;
   int ret = unlink(file_name);
   bool success = (ret == 0) || (errno == ENOENT);
   if (!success) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Deleting file %s failed: %s", file_name, strerror(errno));
+    ICING_LOG(ERROR) << "Deleting file " << file_name << " failed: " << strerror(errno);
   }
   return success;
 }
@@ -195,8 +186,7 @@ bool IcingFilesystem::DeleteDirectory(const char *dir_name) const {
   int ret = rmdir(dir_name);
   bool success = (ret == 0) || (errno == ENOENT);
   if (!success) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Deleting directory %s failed: %s", dir_name, strerror(errno));
+    ICING_LOG(ERROR) << "Deleting directory " << dir_name << " failed: " << strerror(errno);
   }
   return success;
 }
@@ -208,8 +198,7 @@ bool IcingFilesystem::DeleteDirectoryRecursively(const char *dir_name) const {
     if (errno == ENOENT) {
       return true;  // If directory didn't exist, this was successful.
     }
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Stat %s failed: %s", dir_name, strerror(errno));
+    ICING_LOG(ERROR) << "Stat " << dir_name << " failed: " << strerror(errno);
     return false;
   }
   vector<std::string> entries;
@@ -222,8 +211,7 @@ bool IcingFilesystem::DeleteDirectoryRecursively(const char *dir_name) const {
        ++i) {
     std::string filename = std::string(dir_name) + '/' + *i;
     if (stat(filename.c_str(), &st) < 0) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-          "Stat %s failed: %s", filename.c_str(), strerror(errno));
+      ICING_LOG(ERROR) << "Stat " << filename << " failed: " << strerror(errno);
       success = false;
     } else if (S_ISDIR(st.st_mode)) {
       success = DeleteDirectoryRecursively(filename.c_str()) && success;
@@ -246,8 +234,7 @@ bool IcingFilesystem::FileExists(const char *file_name) const {
     exists = S_ISREG(st.st_mode) != 0;
   } else {
     if (errno != ENOENT) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-          "Unable to stat file %s: %s", file_name, strerror(errno));
+      ICING_LOG(ERROR) << "Unable to stat file " << file_name << ": " << strerror(errno);
     }
     exists = false;
   }
@@ -261,8 +248,7 @@ bool IcingFilesystem::DirectoryExists(const char *dir_name) const {
     exists = S_ISDIR(st.st_mode) != 0;
   } else {
     if (errno != ENOENT) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-          "Unable to stat directory %s: %s", dir_name, strerror(errno));
+      ICING_LOG(ERROR) << "Unable to stat directory " << dir_name << ": " << strerror(errno);
     }
     exists = false;
   }
@@ -317,8 +303,7 @@ bool IcingFilesystem::GetMatchingFiles(const char *glob,
   int basename_idx = GetBasenameIndex(glob);
   if (basename_idx == 0) {
     // We need a directory.
-    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-        "Expected directory, no matching files for: %s", glob);
+    ICING_VLOG(1) << "Expected directory, no matching files for: " << glob;
     return true;
   }
   const char *basename_glob = glob + basename_idx;
@@ -374,8 +359,7 @@ uint64_t IcingFilesystem::GetFileSize(int fd) const {
   struct stat st;
   uint64_t size = kBadFileSize;
   if (fstat(fd, &st) < 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat file: %s",
-                                                      strerror(errno));
+    ICING_LOG(ERROR) << "Unable to stat file: " << strerror(errno);
   } else {
     size = st.st_size;
   }
@@ -386,8 +370,7 @@ uint64_t IcingFilesystem::GetFileSize(const char *filename) const {
   struct stat st;
   uint64_t size = kBadFileSize;
   if (stat(filename, &st) < 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Unable to stat file %s: %s", filename, strerror(errno));
+    ICING_LOG(ERROR) << "Unable to stat file " << filename << ": " << strerror(errno);
   } else {
     size = st.st_size;
   }
@@ -399,8 +382,7 @@ bool IcingFilesystem::Truncate(int fd, uint64_t new_size) const {
   if (ret == 0) {
     lseek(fd, new_size, SEEK_SET);
   } else {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Unable to truncate file: %s", strerror(errno));
+    ICING_LOG(ERROR) << "Unable to truncate file: " << strerror(errno);
   }
   return (ret == 0);
 }
@@ -418,8 +400,7 @@ bool IcingFilesystem::Truncate(const char *filename, uint64_t new_size) const {
 bool IcingFilesystem::Grow(int fd, uint64_t new_size) const {
   int ret = ftruncate(fd, new_size);
   if (ret != 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to grow file: %s",
-                                                      strerror(errno));
+    ICING_LOG(ERROR) << "Unable to grow file: " << strerror(errno);
   }
   return (ret == 0);
 }
@@ -431,8 +412,7 @@ bool IcingFilesystem::Write(int fd, const void *data, size_t data_size) const {
     size_t chunk_size = std::min<size_t>(write_len, 64u * 1024);
     ssize_t wrote = write(fd, data, chunk_size);
     if (wrote < 0) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Bad write: %s",
-                                                        strerror(errno));
+      ICING_LOG(ERROR) << "Bad write: " << strerror(errno);
       return false;
     }
     data = static_cast<const uint8_t *>(data) + wrote;
@@ -449,8 +429,7 @@ bool IcingFilesystem::PWrite(int fd, off_t offset, const void *data,
     size_t chunk_size = std::min<size_t>(write_len, 64u * 1024);
     ssize_t wrote = pwrite(fd, data, chunk_size, offset);
     if (wrote < 0) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Bad write: %s",
-                                                        strerror(errno));
+      ICING_LOG(ERROR) << "Bad write: " << strerror(errno);
       return false;
     }
     data = static_cast<const uint8_t *>(data) + wrote;
@@ -468,8 +447,7 @@ bool IcingFilesystem::DataSync(int fd) const {
 #endif
 
   if (result < 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to sync data: %s",
-                                                      strerror(errno));
+    ICING_LOG(ERROR) << "Unable to sync data: " << strerror(errno);
     return false;
   }
   return true;
@@ -478,9 +456,7 @@ bool IcingFilesystem::DataSync(int fd) const {
 bool IcingFilesystem::RenameFile(const char *old_name,
                                  const char *new_name) const {
   if (rename(old_name, new_name) < 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Unable to rename file %s to %s: %s", old_name, new_name,
-        strerror(errno));
+    ICING_LOG(ERROR) << "Unable to rename file " << old_name << " to " << new_name << ": " << strerror(errno);
     return false;
   }
   return true;
@@ -518,8 +494,7 @@ bool IcingFilesystem::CreateDirectory(const char *dir_name) const {
     if (mkdir(dir_name, S_IRUSR | S_IWUSR | S_IXUSR) == 0) {
       success = true;
     } else {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-          "Creating directory %s failed: %s", dir_name, strerror(errno));
+      ICING_LOG(ERROR) << "Creating directory " << dir_name << " failed: " << strerror(errno);
     }
   }
   return success;
@@ -561,8 +536,7 @@ end:
   if (src_fd > 0) close(src_fd);
   if (dst_fd > 0) close(dst_fd);
   if (!success) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Couldn't copy file %s to %s", src, dst);
+    ICING_LOG(ERROR) << "Couldn't copy file " << src << " to " << dst;
   }
   return success;
 }
@@ -583,8 +557,7 @@ bool IcingFilesystem::ComputeChecksum(int fd, uint32_t *checksum,
 uint64_t IcingFilesystem::GetDiskUsage(int fd) const {
   struct stat st;
   if (fstat(fd, &st) < 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat file: %s",
-                                                      strerror(errno));
+    ICING_LOG(ERROR) << "Unable to stat file: " << strerror(errno);
     return kBadFileSize;
   }
   return st.st_blocks * kStatBlockSize;
@@ -593,8 +566,7 @@ uint64_t IcingFilesystem::GetDiskUsage(int fd) const {
 uint64_t IcingFilesystem::GetFileDiskUsage(const char *path) const {
   struct stat st;
   if (stat(path, &st) != 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat %s: %s",
-                                                      path, strerror(errno));
+    ICING_LOG(ERROR) << "Unable to stat " << path << ": " << strerror(errno);
     return kBadFileSize;
   }
   return st.st_blocks * kStatBlockSize;
@@ -603,8 +575,7 @@ uint64_t IcingFilesystem::GetFileDiskUsage(const char *path) const {
 uint64_t IcingFilesystem::GetDiskUsage(const char *path) const {
   struct stat st;
   if (stat(path, &st) != 0) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat %s: %s",
-                                                      path, strerror(errno));
+    ICING_LOG(ERROR) << "Unable to stat " << path << ": " << strerror(errno);
     return kBadFileSize;
   }
   uint64_t result = st.st_blocks * kStatBlockSize;
diff --git a/icing/legacy/index/icing-filesystem.h b/icing/legacy/index/icing-filesystem.h
index 2b10c1c..ce75a82 100644
--- a/icing/legacy/index/icing-filesystem.h
+++ b/icing/legacy/index/icing-filesystem.h
@@ -17,13 +17,15 @@
 #ifndef ICING_LEGACY_INDEX_ICING_FILESYSTEM_H_
 #define ICING_LEGACY_INDEX_ICING_FILESYSTEM_H_
 
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
+#include <sys/types.h>
 
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
 #include <memory>
 #include <string>
 #include <unordered_set>
+#include <utility>
 #include <vector>
 
 namespace icing {
@@ -222,6 +224,11 @@ class IcingFilesystem {
   // Increments to_increment by size if size is valid, or sets to_increment
   // to kBadFileSize if either size or to_increment is kBadFileSize.
   static void IncrementByOrSetInvalid(uint64_t size, uint64_t *to_increment);
+
+  // Return -1 if file_size is invalid. Otherwise, return file_size.
+  static int64_t SanitizeFileSize(int64_t file_size) {
+    return (file_size != kBadFileSize) ? file_size : -1;
+  }
 };
 
 }  // namespace lib
diff --git a/icing/legacy/index/icing-flash-bitmap.cc b/icing/legacy/index/icing-flash-bitmap.cc
index 56dec00..774308f 100644
--- a/icing/legacy/index/icing-flash-bitmap.cc
+++ b/icing/legacy/index/icing-flash-bitmap.cc
@@ -73,8 +73,7 @@ class IcingFlashBitmap::Accessor {
 
 bool IcingFlashBitmap::Verify() const {
   if (!is_initialized()) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Can't verify unopened flash bitmap %s", filename_.c_str());
+    ICING_LOG(ERROR) << "Can't verify unopened flash bitmap " << filename_;
     return false;
   }
   if (mmapper_ == nullptr) {
@@ -83,26 +82,21 @@ bool IcingFlashBitmap::Verify() const {
   }
   Accessor accessor(mmapper_.get());
   if (accessor.header()->magic != kMagic) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Flash bitmap %s has incorrect magic header", filename_.c_str());
+    ICING_LOG(ERROR) << "Flash bitmap " << filename_ << " has incorrect magic header";
     return false;
   }
   if (accessor.header()->version != kCurVersion) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Flash bitmap %s has incorrect version", filename_.c_str());
+    ICING_LOG(ERROR) << "Flash bitmap " << filename_ << " has incorrect version";
     return false;
   }
   if (accessor.header()->dirty) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Flash bitmap %s is dirty", filename_.c_str());
+    ICING_LOG(ERROR) << "Flash bitmap " << filename_ << " is dirty";
     return false;
   }
   uint32_t crc =
       IcingStringUtil::UpdateCrc32(0, accessor.data(), accessor.data_size());
   if (accessor.header()->crc != crc) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Flash bitmap %s has incorrect CRC32 %u %u", filename_.c_str(),
-        accessor.header()->crc, crc);
+    ICING_LOG(ERROR) << "Flash bitmap " << filename_ << " has incorrect CRC32 " << accessor.header()->crc << " " << crc;
     return false;
   }
   return true;
@@ -265,17 +259,14 @@ uint32_t IcingFlashBitmap::UpdateCrc() const {
 bool IcingFlashBitmap::Grow(size_t new_file_size) {
   IcingScopedFd fd(filesystem_->OpenForWrite(filename_.c_str()));
   if (!filesystem_->Grow(fd.get(), new_file_size)) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Grow %s to new size %zu failed", filename_.c_str(), new_file_size);
+    ICING_LOG(ERROR) << "Grow " << filename_ << " to new size " << new_file_size << " failed";
     return false;
   }
   if (!mmapper_->Remap(fd.get(), 0, new_file_size)) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Remap of %s after grow failed", filename_.c_str());
+    ICING_LOG(ERROR) << "Remap of " << filename_ << " after grow failed";
     return false;
   }
-  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-      "Grew %s new size %zu", filename_.c_str(), new_file_size);
+  ICING_VLOG(1) << "Grew " << filename_ << " new size " << new_file_size;
   Accessor accessor(mmapper_.get());
   accessor.header()->dirty = true;
   return true;
diff --git a/icing/legacy/index/icing-flash-bitmap.h b/icing/legacy/index/icing-flash-bitmap.h
index 3b3521a..6bb9591 100644
--- a/icing/legacy/index/icing-flash-bitmap.h
+++ b/icing/legacy/index/icing-flash-bitmap.h
@@ -37,8 +37,7 @@
 #ifndef ICING_LEGACY_INDEX_ICING_FLASH_BITMAP_H_
 #define ICING_LEGACY_INDEX_ICING_FLASH_BITMAP_H_
 
-#include <stdint.h>
-
+#include <cstdint>
 #include <memory>
 #include <string>
 
@@ -139,6 +138,7 @@ class IcingFlashBitmap {
   // Upgrade for version 18.
   bool UpgradeTo18();
 
+  // Legacy file system. Switch to use the new Filesystem class instead.
   const IcingFilesystem *const filesystem_;
   std::string filename_;
   OpenType open_type_;
diff --git a/icing/legacy/index/icing-mmapper.cc b/icing/legacy/index/icing-mmapper.cc
index 737335c..d086da2 100644
--- a/icing/legacy/index/icing-mmapper.cc
+++ b/icing/legacy/index/icing-mmapper.cc
@@ -17,10 +17,11 @@
 //
 #include "icing/legacy/index/icing-mmapper.h"
 
-#include <errno.h>
-#include <string.h>
 #include <sys/mman.h>
 
+#include <cerrno>
+#include <cstring>
+
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/util/logging.h"
@@ -66,8 +67,7 @@ void IcingMMapper::DoMapping(int fd, uint64_t location, size_t size) {
     address_ = reinterpret_cast<uint8_t *>(mmap_result_) + alignment_adjustment;
   } else {
     const char *errstr = strerror(errno);
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
-        "Could not mmap file for reading: %s", errstr);
+    ICING_LOG(ERROR) << "Could not mmap file for reading: " << errstr;
     mmap_result_ = nullptr;
   }
 }
@@ -94,8 +94,7 @@ IcingMMapper::~IcingMMapper() { Unmap(); }
 bool IcingMMapper::Sync() {
   if (is_valid() && !read_only_) {
     if (msync(mmap_result_, mmap_len_, MS_SYNC) != 0) {
-      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("msync failed: %s",
-                                                        strerror(errno));
+      ICING_LOG(ERROR) << "msync failed: " << strerror(errno);
       return false;
     }
   }
diff --git a/icing/legacy/index/icing-mmapper.h b/icing/legacy/index/icing-mmapper.h
index bf62aa5..d054c11 100644
--- a/icing/legacy/index/icing-mmapper.h
+++ b/icing/legacy/index/icing-mmapper.h
@@ -22,9 +22,11 @@
 #ifndef ICING_LEGACY_INDEX_ICING_MMAPPER_H_
 #define ICING_LEGACY_INDEX_ICING_MMAPPER_H_
 
-#include <stdint.h>
 #include <unistd.h>
 
+#include <cstddef>
+#include <cstdint>
+
 namespace icing {
 namespace lib {
 
diff --git a/icing/legacy/index/icing-mock-filesystem.h b/icing/legacy/index/icing-mock-filesystem.h
index 31e012a..122ee7b 100644
--- a/icing/legacy/index/icing-mock-filesystem.h
+++ b/icing/legacy/index/icing-mock-filesystem.h
@@ -15,81 +15,230 @@
 #ifndef ICING_LEGACY_INDEX_ICING_MOCK_FILESYSTEM_H_
 #define ICING_LEGACY_INDEX_ICING_MOCK_FILESYSTEM_H_
 
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
 #include <memory>
 #include <string>
 #include <vector>
 
-#include "icing/legacy/index/icing-filesystem.h"
 #include "gmock/gmock.h"
+#include "icing/legacy/index/icing-filesystem.h"
 
 namespace icing {
 namespace lib {
+using ::testing::_;
+using ::testing::A;
 
 class IcingMockFilesystem : public IcingFilesystem {
  public:
-  MOCK_CONST_METHOD1(DeleteFile, bool(const char *file_name));
+  IcingMockFilesystem() {
+    ON_CALL(*this, DeleteFile).WillByDefault([this](const char *file_name) {
+      return real_icing_filesystem_.DeleteFile(file_name);
+    });
+
+    ON_CALL(*this, DeleteDirectory).WillByDefault([this](const char *dir_name) {
+      return real_icing_filesystem_.DeleteDirectory(dir_name);
+    });
+
+    ON_CALL(*this, DeleteDirectoryRecursively)
+        .WillByDefault([this](const char *dir_name) {
+          return real_icing_filesystem_.DeleteDirectoryRecursively(dir_name);
+        });
+
+    ON_CALL(*this, FileExists).WillByDefault([this](const char *file_name) {
+      return real_icing_filesystem_.FileExists(file_name);
+    });
+
+    ON_CALL(*this, DirectoryExists).WillByDefault([this](const char *dir_name) {
+      return real_icing_filesystem_.DirectoryExists(dir_name);
+    });
+
+    ON_CALL(*this, GetBasenameIndex)
+        .WillByDefault([this](const char *file_name) {
+          return real_icing_filesystem_.GetBasenameIndex(file_name);
+        });
+
+    ON_CALL(*this, GetBasename).WillByDefault([this](const char *file_name) {
+      return real_icing_filesystem_.GetBasename(file_name);
+    });
+
+    ON_CALL(*this, GetDirname).WillByDefault([this](const char *file_name) {
+      return real_icing_filesystem_.GetDirname(file_name);
+    });
+
+    ON_CALL(*this, ListDirectory)
+        .WillByDefault(
+            [this](const char *dir_name, std::vector<std::string> *entries) {
+              return real_icing_filesystem_.ListDirectory(dir_name, entries);
+            });
+
+    ON_CALL(*this, GetMatchingFiles)
+        .WillByDefault(
+            [this](const char *glob, std::vector<std::string> *matches) {
+              return real_icing_filesystem_.GetMatchingFiles(glob, matches);
+            });
+
+    ON_CALL(*this, OpenForWrite).WillByDefault([this](const char *file_name) {
+      return real_icing_filesystem_.OpenForWrite(file_name);
+    });
+
+    ON_CALL(*this, OpenForAppend).WillByDefault([this](const char *file_name) {
+      return real_icing_filesystem_.OpenForAppend(file_name);
+    });
+
+    ON_CALL(*this, OpenForRead).WillByDefault([this](const char *file_name) {
+      return real_icing_filesystem_.OpenForRead(file_name);
+    });
+
+    ON_CALL(*this, GetFileSize(A<int>())).WillByDefault([this](int fd) {
+      return real_icing_filesystem_.GetFileSize(fd);
+    });
+
+    ON_CALL(*this, GetFileSize(A<const char *>()))
+        .WillByDefault([this](const char *filename) {
+          return real_icing_filesystem_.GetFileSize(filename);
+        });
+
+    ON_CALL(*this, Truncate(A<int>(), _))
+        .WillByDefault([this](int fd, uint64_t new_size) {
+          return real_icing_filesystem_.Truncate(fd, new_size);
+        });
+
+    ON_CALL(*this, Truncate(A<const char *>(), _))
+        .WillByDefault([this](const char *filename, uint64_t new_size) {
+          return real_icing_filesystem_.Truncate(filename, new_size);
+        });
+
+    ON_CALL(*this, Grow).WillByDefault([this](int fd, uint64_t new_size) {
+      return real_icing_filesystem_.Grow(fd, new_size);
+    });
+
+    ON_CALL(*this, Write)
+        .WillByDefault([this](int fd, const void *data, size_t data_size) {
+          return real_icing_filesystem_.Write(fd, data, data_size);
+        });
+    ON_CALL(*this, PWrite)
+        .WillByDefault(
+            [this](int fd, off_t offset, const void *data, size_t data_size) {
+              return real_icing_filesystem_.PWrite(fd, offset, data, data_size);
+            });
+
+    ON_CALL(*this, DataSync).WillByDefault([this](int fd) {
+      return real_icing_filesystem_.DataSync(fd);
+    });
+
+    ON_CALL(*this, RenameFile)
+        .WillByDefault([this](const char *old_name, const char *new_name) {
+          return real_icing_filesystem_.RenameFile(old_name, new_name);
+        });
+
+    ON_CALL(*this, SwapFiles)
+        .WillByDefault([this](const char *one, const char *two) {
+          return real_icing_filesystem_.SwapFiles(one, two);
+        });
+
+    ON_CALL(*this, CreateDirectory).WillByDefault([this](const char *dir_name) {
+      return real_icing_filesystem_.CreateDirectory(dir_name);
+    });
+
+    ON_CALL(*this, CreateDirectoryRecursively)
+        .WillByDefault([this](const char *dir_name) {
+          return real_icing_filesystem_.CreateDirectoryRecursively(dir_name);
+        });
+
+    ON_CALL(*this, CopyFile)
+        .WillByDefault([this](const char *src, const char *dst) {
+          return real_icing_filesystem_.CopyFile(src, dst);
+        });
+
+    ON_CALL(*this, ComputeChecksum)
+        .WillByDefault([this](int fd, uint32_t *checksum, uint64_t offset,
+                              uint64_t length) {
+          return real_icing_filesystem_.ComputeChecksum(fd, checksum, offset,
+                                                        length);
+        });
+
+    ON_CALL(*this, GetDiskUsage).WillByDefault([this](const char *path) {
+      return real_icing_filesystem_.GetDiskUsage(path);
+    });
+  }
+
+  MOCK_METHOD(bool, DeleteFile, (const char *file_name), (const, override));
+
+  MOCK_METHOD(bool, DeleteDirectory, (const char *dir_name), (const, override));
 
-  MOCK_CONST_METHOD1(DeleteDirectory, bool(const char *dir_name));
+  MOCK_METHOD(bool, DeleteDirectoryRecursively, (const char *dir_name),
+              (const, override));
 
-  MOCK_CONST_METHOD1(DeleteDirectoryRecursively, bool(const char *dir_name));
+  MOCK_METHOD(bool, FileExists, (const char *file_name), (const, override));
 
-  MOCK_CONST_METHOD1(FileExists, bool(const char *file_name));
+  MOCK_METHOD(bool, DirectoryExists, (const char *dir_name), (const, override));
 
-  MOCK_CONST_METHOD1(DirectoryExists, bool(const char *dir_name));
+  MOCK_METHOD(int, GetBasenameIndex, (const char *file_name),
+              (const, override));
 
-  MOCK_CONST_METHOD1(GetBasenameIndex, int(const char *file_name));
+  MOCK_METHOD(std::string, GetBasename, (const char *file_name),
+              (const, override));
 
-  MOCK_CONST_METHOD1(GetBasename, std::string(const char *file_name));
+  MOCK_METHOD(std::string, GetDirname, (const char *file_name),
+              (const, override));
 
-  MOCK_CONST_METHOD1(GetDirname, std::string(const char *file_name));
+  MOCK_METHOD(bool, ListDirectory,
+              (const char *dir_name, std::vector<std::string> *entries),
+              (const, override));
 
-  MOCK_CONST_METHOD2(ListDirectory, bool(const char *dir_name,
-                                         std::vector<std::string> *entries));
+  MOCK_METHOD(bool, GetMatchingFiles,
+              (const char *glob, std::vector<std::string> *matches),
+              (const, override));
 
-  MOCK_CONST_METHOD2(GetMatchingFiles,
-                     bool(const char *glob, std::vector<std::string> *matches));
+  MOCK_METHOD(int, OpenForWrite, (const char *file_name), (const, override));
 
-  MOCK_CONST_METHOD1(OpenForWrite, int(const char *file_name));
+  MOCK_METHOD(int, OpenForAppend, (const char *file_name), (const, override));
 
-  MOCK_CONST_METHOD1(OpenForAppend, int(const char *file_name));
+  MOCK_METHOD(int, OpenForRead, (const char *file_name), (const, override));
 
-  MOCK_CONST_METHOD1(OpenForRead, int(const char *file_name));
+  MOCK_METHOD(uint64_t, GetFileSize, (int fd), (const, override));
 
-  MOCK_CONST_METHOD1(GetFileSize, uint64_t(int fd));
+  MOCK_METHOD(uint64_t, GetFileSize, (const char *filename), (const, override));
 
-  MOCK_CONST_METHOD1(GetFileSize, uint64_t(const char *filename));
+  MOCK_METHOD(bool, Truncate, (int fd, uint64_t new_size), (const, override));
 
-  MOCK_CONST_METHOD2(Truncate, bool(int fd, uint64_t new_size));
+  MOCK_METHOD(bool, Truncate, (const char *filename, uint64_t new_size),
+              (const, override));
 
-  MOCK_CONST_METHOD2(Truncate, bool(const char *filename, uint64_t new_size));
+  MOCK_METHOD(bool, Grow, (int fd, uint64_t new_size), (const, override));
 
-  MOCK_CONST_METHOD2(Grow, bool(int fd, uint64_t new_size));
+  MOCK_METHOD(bool, Write, (int fd, const void *data, size_t data_size),
+              (const, override));
+  MOCK_METHOD(bool, PWrite,
+              (int fd, off_t offset, const void *data, size_t data_size),
+              (const, override));
 
-  MOCK_CONST_METHOD3(Write, bool(int fd, const void *data, size_t data_size));
-  MOCK_CONST_METHOD4(PWrite, bool(int fd, off_t offset, const void *data,
-                                  size_t data_size));
+  MOCK_METHOD(bool, DataSync, (int fd), (const, override));
 
-  MOCK_CONST_METHOD1(DataSync, bool(int fd));
+  MOCK_METHOD(bool, RenameFile, (const char *old_name, const char *new_name),
+              (const, override));
 
-  MOCK_CONST_METHOD2(RenameFile,
-                     bool(const char *old_name, const char *new_name));
+  MOCK_METHOD(bool, SwapFiles, (const char *one, const char *two),
+              (const, override));
 
-  MOCK_CONST_METHOD2(SwapFiles, bool(const char *one, const char *two));
+  MOCK_METHOD(bool, CreateDirectory, (const char *dir_name), (const, override));
 
-  MOCK_CONST_METHOD1(CreateDirectory, bool(const char *dir_name));
+  MOCK_METHOD(bool, CreateDirectoryRecursively, (const char *dir_name),
+              (const, override));
 
-  MOCK_CONST_METHOD1(CreateDirectoryRecursively, bool(const char *dir_name));
+  MOCK_METHOD(bool, CopyFile, (const char *src, const char *dst),
+              (const, override));
 
-  MOCK_CONST_METHOD2(CopyFile, bool(const char *src, const char *dst));
+  MOCK_METHOD(bool, ComputeChecksum,
+              (int fd, uint32_t *checksum, uint64_t offset, uint64_t length),
+              (const, override));
 
-  MOCK_CONST_METHOD4(ComputeChecksum, bool(int fd, uint32_t *checksum,
-                                           uint64_t offset, uint64_t length));
+  MOCK_METHOD(uint64_t, GetDiskUsage, (const char *path), (const, override));
 
-  MOCK_CONST_METHOD1(GetDiskUsage, uint64_t(const char *path));
+ private:
+  IcingFilesystem real_icing_filesystem_;
 };
 
 }  // namespace lib
diff --git a/icing/legacy/index/icing-storage-file.cc b/icing/legacy/index/icing-storage-file.cc
index b27ec67..bbc6b81 100644
--- a/icing/legacy/index/icing-storage-file.cc
+++ b/icing/legacy/index/icing-storage-file.cc
@@ -14,9 +14,9 @@
 
 #include "icing/legacy/index/icing-storage-file.h"
 
-#include <inttypes.h>
 #include <unistd.h>
 
+#include <cinttypes>
 #include <string>
 
 #include "icing/legacy/core/icing-compat.h"
@@ -69,22 +69,18 @@ bool IcingStorageFile::Sync() {
 
   IcingTimer timer;
   if (!PreSync()) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Pre-sync %s failed",
-                                                      filename_.c_str());
+    ICING_LOG(ERROR) << "Pre-sync " << filename_ << " failed";
     return false;
   }
   if (!filesystem_->DataSync(fd_.get())) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Sync %s failed",
-                                                      filename_.c_str());
+    ICING_LOG(ERROR) << "Sync " << filename_ << " failed";
     return false;
   }
   if (!PostSync()) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Post-sync %s failed",
-                                                      filename_.c_str());
+    ICING_LOG(ERROR) << "Post-sync " << filename_ << " failed";
     return false;
   }
-  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-      "Syncing %s took %.3fms", filename_.c_str(), timer.Elapsed() * 1000.);
+  ICING_VLOG(1) << "Syncing " << filename_ << " took " << timer.Elapsed() * 1000 << "ms";
   return true;
 }
 
diff --git a/icing/legacy/index/icing-storage.h b/icing/legacy/index/icing-storage.h
index cc06c54..58b6aa1 100644
--- a/icing/legacy/index/icing-storage.h
+++ b/icing/legacy/index/icing-storage.h
@@ -20,6 +20,7 @@
 #ifndef ICING_LEGACY_INDEX_ICING_STORAGE_H_
 #define ICING_LEGACY_INDEX_ICING_STORAGE_H_
 
+#include <cstdint>
 #include <string>
 
 namespace icing {
diff --git a/icing/monkey_test/icing-monkey-test-runner.cc b/icing/monkey_test/icing-monkey-test-runner.cc
new file mode 100644
index 0000000..76e41ce
--- /dev/null
+++ b/icing/monkey_test/icing-monkey-test-runner.cc
@@ -0,0 +1,525 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/monkey_test/icing-monkey-test-runner.h"
+
+#include <algorithm>
+#include <array>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <random>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/icing-search-engine.h"
+#include "icing/monkey_test/in-memory-icing-search-engine.h"
+#include "icing/monkey_test/monkey-test-generators.h"
+#include "icing/monkey_test/monkey-test-util.h"
+#include "icing/monkey_test/monkey-tokenized-document.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/result-state-manager.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::Eq;
+using ::testing::Le;
+using ::testing::Not;
+using ::testing::SizeIs;
+using ::testing::UnorderedElementsAreArray;
+
+SearchSpecProto GenerateRandomSearchSpecProto(
+    MonkeyTestRandomEngine* random,
+    MonkeyDocumentGenerator* document_generator) {
+  // Get a random token from the language set as a single term query.
+  std::string query(document_generator->GetToken());
+  std::uniform_int_distribution<> dist(0, 1);
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+  if (dist(*random) == 1) {
+    term_match_type = TermMatchType::PREFIX;
+    // Randomly drop a suffix of query to test prefix query.
+    std::uniform_int_distribution<> size_dist(1, query.size());
+    query.resize(size_dist(*random));
+  }
+  // 50% chance of getting a section restriction.
+  if (dist(*random) == 1) {
+    const SchemaTypeConfigProto& type_config = document_generator->GetType();
+    if (type_config.properties_size() > 0) {
+      std::uniform_int_distribution<> prop_dist(
+          0, type_config.properties_size() - 1);
+      query = absl_ports::StrCat(
+          type_config.properties(prop_dist(*random)).property_name(), ":",
+          query);
+    }
+  }
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(term_match_type);
+  search_spec.set_query(query);
+  return search_spec;
+}
+
+ScoringSpecProto GenerateRandomScoringSpec(MonkeyTestRandomEngine* random) {
+  ScoringSpecProto scoring_spec;
+
+  constexpr std::array<ScoringSpecProto::RankingStrategy::Code, 3>
+      ranking_strategies = {
+          ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+          ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP,
+          ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE};
+
+  std::uniform_int_distribution<> dist(0, ranking_strategies.size() - 1);
+  scoring_spec.set_rank_by(ranking_strategies[dist(*random)]);
+  return scoring_spec;
+}
+
+ResultSpecProto::SnippetSpecProto GenerateRandomSnippetSpecProto(
+    MonkeyTestRandomEngine* random, const ResultSpecProto& result_spec) {
+  ResultSpecProto::SnippetSpecProto snippet_spec;
+
+  std::uniform_int_distribution<> num_to_snippet_dist(
+      0, result_spec.num_per_page() * 2);
+  snippet_spec.set_num_to_snippet(num_to_snippet_dist(*random));
+
+  std::uniform_int_distribution<> num_matches_per_property_dist(0, 10);
+  snippet_spec.set_num_matches_per_property(
+      num_matches_per_property_dist(*random));
+
+  std::uniform_int_distribution<> dist(0, 4);
+  int random_num = dist(*random);
+  // 1/5 chance of getting one of 0 (disabled), 8, 32, 128, 512
+  int max_window_utf32_length =
+      random_num == 0 ? 0 : (1 << (2 * random_num + 1));
+  snippet_spec.set_max_window_utf32_length(max_window_utf32_length);
+  return snippet_spec;
+}
+
+TypePropertyMask GenerateTypePropertyMask(
+    MonkeyTestRandomEngine* random, const SchemaTypeConfigProto& type_config) {
+  TypePropertyMask type_property_mask;
+  type_property_mask.set_schema_type(type_config.schema_type());
+  for (const auto& properties : type_config.properties()) {
+    // 25% chance of adding the current property to the mask.
+    std::uniform_int_distribution<> dist(0, 3);
+    if (dist(*random) == 0) {
+      type_property_mask.add_paths(properties.property_name());
+    }
+  }
+  return type_property_mask;
+}
+
+ResultSpecProto GenerateRandomResultSpecProto(MonkeyTestRandomEngine* random,
+                                              const SchemaProto* schema) {
+  std::uniform_int_distribution<> dist(0, 4);
+  ResultSpecProto result_spec;
+  // 1/5 chance of getting one of 1, 4, 16, 64, 256
+  int num_per_page = 1 << (2 * dist(*random));
+  result_spec.set_num_per_page(num_per_page);
+  *result_spec.mutable_snippet_spec() =
+      GenerateRandomSnippetSpecProto(random, result_spec);
+
+  // 1/5 chance of enabling projection.
+  if (dist(*random) == 0) {
+    for (const SchemaTypeConfigProto& type_config : schema->types()) {
+      // 25% chance of adding the current type to the projection.
+      std::uniform_int_distribution<> dist(0, 3);
+      if (dist(*random) == 0) {
+        *result_spec.add_type_property_masks() =
+            GenerateTypePropertyMask(random, type_config);
+      }
+    }
+  }
+  return result_spec;
+}
+
+void SortDocuments(std::vector<DocumentProto>& documents) {
+  std::sort(documents.begin(), documents.end(),
+            [](const DocumentProto& doc1, const DocumentProto& doc2) {
+              if (doc1.namespace_() != doc2.namespace_()) {
+                return doc1.namespace_() < doc2.namespace_();
+              }
+              return doc1.uri() < doc2.uri();
+            });
+}
+
+}  // namespace
+
+IcingMonkeyTestRunner::IcingMonkeyTestRunner(
+    IcingMonkeyTestRunnerConfiguration config)
+    : config_(std::move(config)),
+      random_(config_.seed),
+      in_memory_icing_(std::make_unique<InMemoryIcingSearchEngine>(&random_)),
+      schema_generator_(
+          std::make_unique<MonkeySchemaGenerator>(&random_, &config_)) {
+  ICING_LOG(INFO) << "Monkey test runner started with seed: " << config_.seed;
+  std::string dir = GetTestTempDir() + "/icing/monkey";
+  filesystem_.DeleteDirectoryRecursively(dir.c_str());
+  icing_dir_ = std::make_unique<DestructibleDirectory>(&filesystem_, dir);
+}
+
+void IcingMonkeyTestRunner::Run(uint32_t num) {
+  ASSERT_TRUE(icing_ != nullptr)
+      << "Icing search engine has not yet been created. Please call "
+         "Initialize() first";
+
+  uint32_t frequency_sum = 0;
+  for (const auto& schedule : config_.monkey_api_schedules) {
+    frequency_sum += schedule.second;
+  }
+  std::uniform_int_distribution<> dist(0, frequency_sum - 1);
+  for (; num; --num) {
+    int p = dist(random_);
+    for (const auto& schedule : config_.monkey_api_schedules) {
+      if (p < schedule.second) {
+        ASSERT_NO_FATAL_FAILURE(schedule.first(this));
+        break;
+      }
+      p -= schedule.second;
+    }
+    ICING_LOG(INFO) << "Completed Run #" << num
+                    << ". Documents in the in-memory icing: "
+                    << in_memory_icing_->GetNumAliveDocuments();
+  }
+}
+
+SetSchemaResultProto IcingMonkeyTestRunner::SetSchema(SchemaProto&& schema) {
+  in_memory_icing_->SetSchema(std::move(schema));
+  document_generator_ = std::make_unique<MonkeyDocumentGenerator>(
+      &random_, in_memory_icing_->GetSchema(), &config_);
+  return icing_->SetSchema(*in_memory_icing_->GetSchema(),
+                           /*ignore_errors_and_delete_documents=*/true);
+}
+
+void IcingMonkeyTestRunner::Initialize() {
+  ASSERT_NO_FATAL_FAILURE(CreateIcingSearchEngine());
+
+  SchemaProto schema = schema_generator_->GenerateSchema();
+  ICING_LOG(DBG) << "Schema Generated: " << schema.DebugString();
+
+  ASSERT_THAT(SetSchema(std::move(schema)).status(), ProtoIsOk());
+}
+
+void IcingMonkeyTestRunner::DoUpdateSchema() {
+  ICING_LOG(INFO) << "Monkey updating schema";
+
+  MonkeySchemaGenerator::UpdateSchemaResult result =
+      schema_generator_->UpdateSchema(*in_memory_icing_->GetSchema());
+  if (result.is_invalid_schema) {
+    SetSchemaResultProto set_schema_result =
+        icing_->SetSchema(result.schema,
+                          /*ignore_errors_and_delete_documents=*/true);
+    ASSERT_THAT(set_schema_result.status(), Not(ProtoIsOk()));
+    return;
+  }
+  ICING_LOG(DBG) << "Updating schema to: " << result.schema.DebugString();
+  SetSchemaResultProto icing_set_schema_result =
+      SetSchema(std::move(result.schema));
+  ASSERT_THAT(icing_set_schema_result.status(), ProtoIsOk());
+  ASSERT_THAT(icing_set_schema_result.deleted_schema_types(),
+              UnorderedElementsAreArray(result.schema_types_deleted));
+  ASSERT_THAT(icing_set_schema_result.incompatible_schema_types(),
+              UnorderedElementsAreArray(result.schema_types_incompatible));
+  ASSERT_THAT(
+      icing_set_schema_result.index_incompatible_changed_schema_types(),
+      UnorderedElementsAreArray(result.schema_types_index_incompatible));
+
+  // Update in-memory icing
+  for (const std::string& deleted_type : result.schema_types_deleted) {
+    ICING_ASSERT_OK(in_memory_icing_->DeleteBySchemaType(deleted_type));
+  }
+  for (const std::string& incompatible_type :
+       result.schema_types_incompatible) {
+    ICING_ASSERT_OK(in_memory_icing_->DeleteBySchemaType(incompatible_type));
+  }
+}
+
+void IcingMonkeyTestRunner::DoGet() {
+  InMemoryIcingSearchEngine::PickDocumentResult document =
+      in_memory_icing_->RandomPickDocument(/*p_alive=*/0.70, /*p_all=*/0.28,
+                                           /*p_other=*/0.02);
+  ICING_LOG(INFO) << "Monkey getting namespace: " << document.name_space
+                  << ", uri: " << document.uri;
+  GetResultProto get_result =
+      icing_->Get(document.name_space, document.uri,
+                  GetResultSpecProto::default_instance());
+  if (document.document.has_value()) {
+    ASSERT_THAT(get_result.status(), ProtoIsOk())
+        << "Cannot find the document that is supposed to exist.";
+    ASSERT_THAT(get_result.document(), EqualsProto(document.document.value()))
+        << "The document found does not match with the value in the in-memory "
+           "icing.";
+  } else {
+    // Should expect that no document has been found.
+    if (get_result.status().code() != StatusProto::NOT_FOUND) {
+      if (get_result.status().code() == StatusProto::OK) {
+        FAIL() << "Found a document that is not supposed to be found.";
+      }
+      FAIL() << "Icing search engine failure (code "
+             << get_result.status().code()
+             << "): " << get_result.status().message();
+    }
+  }
+}
+
+void IcingMonkeyTestRunner::DoGetAllNamespaces() {
+  ICING_LOG(INFO) << "Monkey getting all namespaces";
+  GetAllNamespacesResultProto get_result = icing_->GetAllNamespaces();
+  ASSERT_THAT(get_result.status(), ProtoIsOk());
+  ASSERT_THAT(get_result.namespaces(),
+              UnorderedElementsAreArray(in_memory_icing_->GetAllNamespaces()));
+}
+
+void IcingMonkeyTestRunner::DoPut() {
+  MonkeyTokenizedDocument doc = document_generator_->GenerateDocument();
+  ICING_LOG(INFO) << "Monkey document generated, namespace: "
+                  << doc.document.namespace_()
+                  << ", uri: " << doc.document.uri();
+  ICING_LOG(DBG) << doc.document.DebugString();
+  in_memory_icing_->Put(doc);
+  ASSERT_THAT(icing_->Put(doc.document).status(), ProtoIsOk());
+}
+
+void IcingMonkeyTestRunner::DoDelete() {
+  InMemoryIcingSearchEngine::PickDocumentResult document =
+      in_memory_icing_->RandomPickDocument(/*p_alive=*/0.70, /*p_all=*/0.2,
+                                           /*p_other=*/0.1);
+  ICING_LOG(INFO) << "Monkey deleting namespace: " << document.name_space
+                  << ", uri: " << document.uri;
+  DeleteResultProto delete_result =
+      icing_->Delete(document.name_space, document.uri);
+  if (document.document.has_value()) {
+    ICING_ASSERT_OK(
+        in_memory_icing_->Delete(document.name_space, document.uri));
+    ASSERT_THAT(delete_result.status(), ProtoIsOk())
+        << "Cannot delete an existing document.";
+  } else {
+    // Should expect that no document has been deleted.
+    if (delete_result.status().code() != StatusProto::NOT_FOUND) {
+      if (delete_result.status().code() == StatusProto::OK) {
+        FAIL() << "Deleted a non-existing document without an error.";
+      }
+      FAIL() << "Icing search engine failure (code "
+             << delete_result.status().code()
+             << "): " << delete_result.status().message();
+    }
+  }
+}
+
+void IcingMonkeyTestRunner::DoDeleteByNamespace() {
+  std::string name_space = document_generator_->GetNamespace();
+  ICING_LOG(INFO) << "Monkey deleting namespace: " << name_space;
+  DeleteByNamespaceResultProto delete_result =
+      icing_->DeleteByNamespace(name_space);
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t num_docs_deleted,
+                             in_memory_icing_->DeleteByNamespace(name_space));
+  if (num_docs_deleted != 0) {
+    ASSERT_THAT(delete_result.status(), ProtoIsOk())
+        << "Cannot delete an existing namespace.";
+    ASSERT_THAT(delete_result.delete_stats().num_documents_deleted(),
+                Eq(num_docs_deleted));
+  } else {
+    // Should expect that no document has been deleted.
+    if (delete_result.status().code() != StatusProto::NOT_FOUND) {
+      if (delete_result.status().code() == StatusProto::OK) {
+        FAIL() << "Deleted a non-existing namespace without an error.";
+      }
+      FAIL() << "Icing search engine failure (code "
+             << delete_result.status().code()
+             << "): " << delete_result.status().message();
+    }
+  }
+}
+
+void IcingMonkeyTestRunner::DoDeleteBySchemaType() {
+  std::string schema_type = document_generator_->GetType().schema_type();
+  ICING_LOG(INFO) << "Monkey deleting type: " << schema_type;
+  DeleteBySchemaTypeResultProto delete_result =
+      icing_->DeleteBySchemaType(schema_type);
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t num_docs_deleted,
+                             in_memory_icing_->DeleteBySchemaType(schema_type));
+  if (num_docs_deleted != 0) {
+    ASSERT_THAT(delete_result.status(), ProtoIsOk())
+        << "Cannot delete an existing schema type.";
+    ASSERT_THAT(delete_result.delete_stats().num_documents_deleted(),
+                Eq(num_docs_deleted));
+  } else {
+    // Should expect that no document has been deleted.
+    if (delete_result.status().code() != StatusProto::NOT_FOUND) {
+      if (delete_result.status().code() == StatusProto::OK) {
+        FAIL() << "Deleted a non-existing schema type without an error.";
+      }
+      FAIL() << "Icing search engine failure (code "
+             << delete_result.status().code()
+             << "): " << delete_result.status().message();
+    }
+  }
+}
+
+void IcingMonkeyTestRunner::DoDeleteByQuery() {
+  SearchSpecProto search_spec =
+      GenerateRandomSearchSpecProto(&random_, document_generator_.get());
+  ICING_LOG(INFO) << "Monkey deleting by query: " << search_spec.query();
+  DeleteByQueryResultProto delete_result = icing_->DeleteByQuery(search_spec);
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t num_docs_deleted,
+                             in_memory_icing_->DeleteByQuery(search_spec));
+  if (num_docs_deleted != 0) {
+    ASSERT_THAT(delete_result.status(), ProtoIsOk())
+        << "Cannot delete documents that matches with the query.";
+    ASSERT_THAT(delete_result.delete_by_query_stats().num_documents_deleted(),
+                Eq(num_docs_deleted));
+  } else {
+    // Should expect that no document has been deleted.
+    if (delete_result.status().code() != StatusProto::NOT_FOUND) {
+      if (delete_result.status().code() == StatusProto::OK) {
+        FAIL() << "Deleted documents that should not match with the query "
+                  "without an error.";
+      }
+      FAIL() << "Icing search engine failure (code "
+             << delete_result.status().code()
+             << "): " << delete_result.status().message();
+    }
+  }
+  ICING_LOG(INFO)
+      << delete_result.delete_by_query_stats().num_documents_deleted()
+      << " documents deleted by query.";
+}
+
+void IcingMonkeyTestRunner::DoSearch() {
+  std::unique_ptr<SearchSpecProto> search_spec =
+      std::make_unique<SearchSpecProto>(
+          GenerateRandomSearchSpecProto(&random_, document_generator_.get()));
+  std::unique_ptr<ScoringSpecProto> scoring_spec =
+      std::make_unique<ScoringSpecProto>(GenerateRandomScoringSpec(&random_));
+  std::unique_ptr<ResultSpecProto> result_spec =
+      std::make_unique<ResultSpecProto>(GenerateRandomResultSpecProto(
+          &random_, in_memory_icing_->GetSchema()));
+  const ResultSpecProto::SnippetSpecProto snippet_spec =
+      result_spec->snippet_spec();
+  bool is_projection_enabled = !result_spec->type_property_masks().empty();
+
+  ICING_LOG(INFO) << "Monkey searching by query: " << search_spec->query()
+                  << ", term_match_type: " << search_spec->term_match_type();
+  ICING_VLOG(1) << "search_spec:\n" << search_spec->DebugString();
+  ICING_VLOG(1) << "scoring_spec:\n" << scoring_spec->DebugString();
+  ICING_VLOG(1) << "result_spec:\n" << result_spec->DebugString();
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocumentProto> exp_documents,
+                             in_memory_icing_->Search(*search_spec));
+
+  SearchResultProto search_result =
+      icing_->Search(*search_spec, *scoring_spec, *result_spec);
+  ASSERT_THAT(search_result.status(), ProtoIsOk());
+
+  // Delete all of the specs used in the search. GetNextPage should have no
+  // problem because it shouldn't be keeping any references to them.
+  search_spec.reset();
+  scoring_spec.reset();
+  result_spec.reset();
+
+  std::vector<DocumentProto> actual_documents;
+  int num_snippeted = 0;
+  while (true) {
+    for (const SearchResultProto::ResultProto& doc : search_result.results()) {
+      actual_documents.push_back(doc.document());
+      if (!doc.snippet().entries().empty()) {
+        ++num_snippeted;
+        for (const SnippetProto::EntryProto& entry : doc.snippet().entries()) {
+          ASSERT_THAT(entry.snippet_matches(),
+                      SizeIs(Le(snippet_spec.num_matches_per_property())));
+        }
+      }
+    }
+    if (search_result.next_page_token() == kInvalidNextPageToken) {
+      break;
+    }
+    search_result = icing_->GetNextPage(search_result.next_page_token());
+    ASSERT_THAT(search_result.status(), ProtoIsOk());
+  }
+  // The maximum number of scored documents allowed in Icing is 30000, in which
+  // case we are not able to compare the results with the in-memory Icing.
+  if (exp_documents.size() >= 30000) {
+    return;
+  }
+  if (snippet_spec.num_matches_per_property() > 0 && !is_projection_enabled) {
+    ASSERT_THAT(num_snippeted,
+                Eq(std::min<uint32_t>(exp_documents.size(),
+                                      snippet_spec.num_to_snippet())));
+  }
+  SortDocuments(exp_documents);
+  SortDocuments(actual_documents);
+  ASSERT_THAT(actual_documents, SizeIs(exp_documents.size()));
+  for (int i = 0; i < exp_documents.size(); ++i) {
+    if (is_projection_enabled) {
+      ASSERT_THAT(actual_documents[i].namespace_(),
+                  Eq(exp_documents[i].namespace_()));
+      ASSERT_THAT(actual_documents[i].uri(), Eq(exp_documents[i].uri()));
+      continue;
+    }
+    ASSERT_THAT(actual_documents[i], EqualsProto(exp_documents[i]));
+  }
+  ICING_LOG(INFO) << exp_documents.size() << " documents found by query.";
+}
+
+void IcingMonkeyTestRunner::ReloadFromDisk() {
+  ICING_LOG(INFO) << "Monkey reloading from disk";
+  // Destruct the icing search engine by resetting the unique pointer.
+  icing_.reset();
+  ASSERT_NO_FATAL_FAILURE(CreateIcingSearchEngine());
+}
+
+void IcingMonkeyTestRunner::DoOptimize() {
+  ICING_LOG(INFO) << "Monkey doing optimization";
+  ASSERT_THAT(icing_->Optimize().status(), ProtoIsOk());
+}
+
+void IcingMonkeyTestRunner::CreateIcingSearchEngine() {
+  std::uniform_int_distribution<> dist(0, 1);
+
+  bool always_rebuild_index_optimize = dist(random_);
+  float optimize_rebuild_index_threshold =
+      always_rebuild_index_optimize ? 0.0 : 0.9;
+
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_index_merge_size(config_.index_merge_size);
+  icing_options.set_base_dir(icing_dir_->dir());
+  icing_options.set_optimize_rebuild_index_threshold(
+      optimize_rebuild_index_threshold);
+  // The method will be called every time when we ReloadFromDisk(), so randomly
+  // flip this flag to test document store's compatibility.
+  icing_options.set_document_store_namespace_id_fingerprint(
+      (bool)dist(random_));
+  icing_ = std::make_unique<IcingSearchEngine>(icing_options);
+  ASSERT_THAT(icing_->Initialize().status(), ProtoIsOk());
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/monkey_test/icing-monkey-test-runner.h b/icing/monkey_test/icing-monkey-test-runner.h
new file mode 100644
index 0000000..10be60c
--- /dev/null
+++ b/icing/monkey_test/icing-monkey-test-runner.h
@@ -0,0 +1,79 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_MONKEY_TEST_ICING_MONKEY_TEST_RUNNER_H_
+#define ICING_MONKEY_TEST_ICING_MONKEY_TEST_RUNNER_H_
+
+#include <cstdint>
+#include <memory>
+
+#include "icing/file/destructible-directory.h"
+#include "icing/file/filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/monkey_test/in-memory-icing-search-engine.h"
+#include "icing/monkey_test/monkey-test-generators.h"
+#include "icing/monkey_test/monkey-test-util.h"
+#include "icing/proto/schema.pb.h"
+
+namespace icing {
+namespace lib {
+
+class IcingMonkeyTestRunner {
+ public:
+  IcingMonkeyTestRunner(IcingMonkeyTestRunnerConfiguration config);
+  IcingMonkeyTestRunner(const IcingMonkeyTestRunner&) = delete;
+  IcingMonkeyTestRunner& operator=(const IcingMonkeyTestRunner&) = delete;
+
+  SetSchemaResultProto SetSchema(SchemaProto&& schema);
+
+  // This function must and should only be called before running the monkey
+  // test.
+  void Initialize();
+
+  // Run the monkey test with num operations.
+  void Run(uint32_t num);
+
+  // APIs supported in icing search engine.
+  void DoUpdateSchema();
+  void DoGet();
+  void DoGetAllNamespaces();
+  void DoPut();
+  void DoDelete();
+  void DoDeleteByNamespace();
+  void DoDeleteBySchemaType();
+  void DoDeleteByQuery();
+  void DoSearch();
+
+  // Operations with no observable side-effects.
+  void ReloadFromDisk();
+  void DoOptimize();
+
+ private:
+  IcingMonkeyTestRunnerConfiguration config_;
+  MonkeyTestRandomEngine random_;
+  Filesystem filesystem_;
+  std::unique_ptr<DestructibleDirectory> icing_dir_;
+  std::unique_ptr<InMemoryIcingSearchEngine> in_memory_icing_;
+  std::unique_ptr<IcingSearchEngine> icing_;
+
+  std::unique_ptr<MonkeySchemaGenerator> schema_generator_;
+  std::unique_ptr<MonkeyDocumentGenerator> document_generator_;
+
+  void CreateIcingSearchEngine();
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_MONKEY_TEST_ICING_MONKEY_TEST_RUNNER_H_
diff --git a/icing/monkey_test/icing-search-engine_monkey_test.cc b/icing/monkey_test/icing-search-engine_monkey_test.cc
new file mode 100644
index 0000000..436e27b
--- /dev/null
+++ b/icing/monkey_test/icing-search-engine_monkey_test.cc
@@ -0,0 +1,99 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <random>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "icing/monkey_test/icing-monkey-test-runner.h"
+#include "icing/monkey_test/monkey-test-util.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/schema/section.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+TEST(IcingSearchEngineMonkeyTest, MonkeyTest) {
+  IcingMonkeyTestRunnerConfiguration config(
+      /*seed=*/std::random_device()(),
+      /*num_types=*/30,
+      /*num_namespaces=*/100,
+      /*num_uris=*/1000,
+      /*index_merge_size=*/1024 * 1024);
+  config.possible_num_properties = {0,
+                                    1,
+                                    2,
+                                    4,
+                                    8,
+                                    16,
+                                    kTotalNumSections / 2,
+                                    kTotalNumSections,
+                                    kTotalNumSections + 1,
+                                    kTotalNumSections * 2};
+  config.possible_num_tokens_ = {0, 1, 4, 16, 64, 256};
+  config.monkey_api_schedules = {
+      {&IcingMonkeyTestRunner::DoPut, 500},
+      {&IcingMonkeyTestRunner::DoSearch, 200},
+      {&IcingMonkeyTestRunner::DoGet, 70},
+      {&IcingMonkeyTestRunner::DoGetAllNamespaces, 50},
+      {&IcingMonkeyTestRunner::DoDelete, 50},
+      {&IcingMonkeyTestRunner::DoDeleteByNamespace, 50},
+      {&IcingMonkeyTestRunner::DoDeleteBySchemaType, 45},
+      {&IcingMonkeyTestRunner::DoDeleteByQuery, 20},
+      {&IcingMonkeyTestRunner::DoOptimize, 5},
+      {&IcingMonkeyTestRunner::DoUpdateSchema, 5},
+      {&IcingMonkeyTestRunner::ReloadFromDisk, 5}};
+  uint32_t num_iterations = IsAndroidArm() ? 1000 : 5000;
+  IcingMonkeyTestRunner runner(std::move(config));
+  ASSERT_NO_FATAL_FAILURE(runner.Initialize());
+  ASSERT_NO_FATAL_FAILURE(runner.Run(num_iterations));
+}
+
+TEST(DISABLED_IcingSearchEngineMonkeyTest, MonkeyManyDocTest) {
+  IcingMonkeyTestRunnerConfiguration config(
+      /*seed=*/std::random_device()(),
+      /*num_types=*/30,
+      /*num_namespaces=*/200,
+      /*num_uris=*/100000,
+      /*index_merge_size=*/1024 * 1024);
+
+  // Due to the large amount of documents, we need to make each document smaller
+  // to finish the test.
+  config.possible_num_properties = {0, 1, 2};
+  config.possible_num_tokens_ = {0, 1, 4};
+
+  // No deletion is performed to preserve a large number of documents.
+  config.monkey_api_schedules = {
+      {&IcingMonkeyTestRunner::DoPut, 500},
+      {&IcingMonkeyTestRunner::DoSearch, 200},
+      {&IcingMonkeyTestRunner::DoGet, 70},
+      {&IcingMonkeyTestRunner::DoGetAllNamespaces, 50},
+      {&IcingMonkeyTestRunner::DoOptimize, 5},
+      {&IcingMonkeyTestRunner::ReloadFromDisk, 5}};
+  IcingMonkeyTestRunner runner(std::move(config));
+  ASSERT_NO_FATAL_FAILURE(runner.Initialize());
+  // Pre-fill with 4 million documents
+  SetLoggingLevel(LogSeverity::WARNING);
+  for (int i = 0; i < 4000000; i++) {
+    ASSERT_NO_FATAL_FAILURE(runner.DoPut());
+  }
+  SetLoggingLevel(LogSeverity::INFO);
+  ASSERT_NO_FATAL_FAILURE(runner.Run(1000));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/monkey_test/in-memory-icing-search-engine.cc b/icing/monkey_test/in-memory-icing-search-engine.cc
new file mode 100644
index 0000000..7baa06e
--- /dev/null
+++ b/icing/monkey_test/in-memory-icing-search-engine.cc
@@ -0,0 +1,352 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/monkey_test/in-memory-icing-search-engine.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <random>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/monkey_test/monkey-tokenized-document.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Check if s1 is a prefix of s2.
+bool IsPrefix(std::string_view s1, std::string_view s2) {
+  if (s1.length() > s2.length()) {
+    return false;
+  }
+  return s1 == s2.substr(0, s1.length());
+}
+
+}  // namespace
+
+libtextclassifier3::StatusOr<const PropertyConfigProto *>
+InMemoryIcingSearchEngine::GetPropertyConfig(
+    const std::string &schema_type, const std::string &property_name) const {
+  auto schema_iter = property_config_map_.find(schema_type);
+  if (schema_iter == property_config_map_.end()) {
+    return absl_ports::NotFoundError(
+        absl_ports::StrCat("Schema type: ", schema_type, " is not found."));
+  }
+  auto property_iter = schema_iter->second.find(property_name);
+  if (property_iter == schema_iter->second.end()) {
+    return absl_ports::NotFoundError(
+        absl_ports::StrCat("Property: ", property_name, " is not found."));
+  }
+  return &property_iter->second;
+}
+
+libtextclassifier3::StatusOr<TermMatchType::Code>
+InMemoryIcingSearchEngine::GetTermMatchType(
+    const std::string &schema_type,
+    const MonkeyTokenizedSection &section) const {
+  bool in_indexable_properties_list = false;
+  bool all_indexable_from_top = true;
+
+  std::vector<std::string_view> properties_in_path =
+      absl_ports::StrSplit(section.path, ".");
+  if (properties_in_path.empty()) {
+    return absl_ports::InvalidArgumentError("Got empty path.");
+  }
+  std::string curr_schema_type = schema_type;
+  for (int i = 0; i < properties_in_path.size(); ++i) {
+    ICING_ASSIGN_OR_RETURN(
+        const PropertyConfigProto *prop,
+        GetPropertyConfig(curr_schema_type,
+                          std::string(properties_in_path[i])));
+    if (prop->data_type() == PropertyConfigProto::DataType::STRING) {
+      return prop->string_indexing_config().term_match_type();
+    }
+
+    if (prop->data_type() != PropertyConfigProto::DataType::DOCUMENT) {
+      return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+    }
+
+    bool old_all_indexable_from_top = all_indexable_from_top;
+    all_indexable_from_top &=
+        prop->document_indexing_config().index_nested_properties();
+    if (!all_indexable_from_top && !in_indexable_properties_list) {
+      // Only try to update in_indexable_properties_list if this is the first
+      // level with index_nested_properties=false.
+      if (old_all_indexable_from_top) {
+        auto &indexable_properties =
+            prop->document_indexing_config().indexable_nested_properties_list();
+        std::string relative_path =
+            absl_ports::StrCatPieces(std::vector<std::string_view>(
+                properties_in_path.begin() + i + 1, properties_in_path.end()));
+        in_indexable_properties_list =
+            std::find(indexable_properties.begin(), indexable_properties.end(),
+                      relative_path) != indexable_properties.end();
+      }
+      // Check in_indexable_properties_list again.
+      if (!in_indexable_properties_list) {
+        return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+      }
+    }
+    curr_schema_type = prop->document_indexing_config().GetTypeName();
+  }
+  return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+}
+
+libtextclassifier3::StatusOr<bool>
+InMemoryIcingSearchEngine::DoesDocumentMatchQuery(
+    const MonkeyTokenizedDocument &document, const std::string &query,
+    TermMatchType::Code term_match_type) const {
+  std::vector<std::string_view> strs = absl_ports::StrSplit(query, ":");
+  std::string_view query_term;
+  std::string_view section_restrict;
+  if (strs.size() > 1) {
+    section_restrict = strs[0];
+    query_term = strs[1];
+  } else {
+    query_term = query;
+  }
+  for (const MonkeyTokenizedSection &section : document.tokenized_sections) {
+    if (!section_restrict.empty() && section.path != section_restrict) {
+      continue;
+    }
+    ICING_ASSIGN_OR_RETURN(
+        TermMatchType::Code section_term_match_type,
+        GetTermMatchType(document.document.schema(), section));
+    if (section_term_match_type == TermMatchType::UNKNOWN) {
+      // Skip non-indexable property.
+      continue;
+    }
+    for (const std::string &token : section.token_sequence) {
+      if (section_term_match_type == TermMatchType::EXACT_ONLY ||
+          term_match_type == TermMatchType::EXACT_ONLY) {
+        if (token == query_term) {
+          return true;
+        }
+      } else if (IsPrefix(query_term, token)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+void InMemoryIcingSearchEngine::SetSchema(SchemaProto &&schema) {
+  schema_ = std::make_unique<SchemaProto>(std::move(schema));
+  property_config_map_.clear();
+  for (const SchemaTypeConfigProto &type_config : schema_->types()) {
+    auto &curr_property_map = property_config_map_[type_config.schema_type()];
+    for (const PropertyConfigProto &property_config :
+         type_config.properties()) {
+      curr_property_map.insert(
+          {property_config.property_name(), property_config});
+    }
+  }
+}
+
+InMemoryIcingSearchEngine::PickDocumentResult
+InMemoryIcingSearchEngine::RandomPickDocument(float p_alive, float p_all,
+                                              float p_other) const {
+  // Normalizing p_alive, p_all and p_other, so that they sum to 1.
+  if (p_alive == 0 && p_all == 0 && p_other == 0) {
+    p_alive = p_all = p_other = 1 / 3.;
+  } else {
+    float p_sum = p_alive + p_all + p_other;
+    p_alive = p_alive / p_sum;
+    p_all = p_all / p_sum;
+    p_other = p_other / p_sum;
+  }
+
+  std::uniform_real_distribution<> real_dist(0, 1);
+  float p = real_dist(*random_);
+  if (p <= p_other || documents_.empty()) {
+    // 20 is a fair number of non-existing namespaces and uris, enough for
+    // monkey testing.
+    std::uniform_int_distribution<> dist(0, 19);
+    std::string name_space = absl_ports::StrCat("non_existing_namespace",
+                                                std::to_string(dist(*random_)));
+    std::string uri =
+        absl_ports::StrCat("non_existing_uri", std::to_string(dist(*random_)));
+    return {name_space, uri};
+  }
+  p -= p_other;
+  DocumentId doc_id;
+  if (p <= p_all || existing_doc_ids_.empty()) {
+    std::uniform_int_distribution<DocumentId> dist(0, documents_.size() - 1);
+    doc_id = dist(*random_);
+  } else {
+    std::uniform_int_distribution<DocumentId> dist(
+        0, existing_doc_ids_.size() - 1);
+    doc_id = existing_doc_ids_[dist(*random_)];
+  }
+  InMemoryIcingSearchEngine::PickDocumentResult result = {
+      documents_[doc_id].document.namespace_(),
+      documents_[doc_id].document.uri()};
+
+  // Even the (name_space, uri) of the picked doc_id has not been deleted
+  // specifically, doc_id may be outdated because of possible overwriting. So we
+  // need to find the latest document id, and return the latest DocumentProto.
+  auto latest_doc_id = InternalGet(result.name_space, result.uri);
+  if (latest_doc_id.ok()) {
+    result.document = documents_[latest_doc_id.ValueOrDie()].document;
+  }
+  return result;
+}
+
+void InMemoryIcingSearchEngine::Put(const MonkeyTokenizedDocument &document) {
+  // Delete the old one if existing.
+  Delete(document.document.namespace_(), document.document.uri()).IgnoreError();
+  existing_doc_ids_.push_back(documents_.size());
+  namespace_uri_docid_map[document.document.namespace_()]
+                         [document.document.uri()] = documents_.size();
+  documents_.push_back(document);
+}
+
+std::unordered_set<std::string> InMemoryIcingSearchEngine::GetAllNamespaces()
+    const {
+  std::unordered_set<std::string> namespaces;
+  for (DocumentId doc_id : existing_doc_ids_) {
+    namespaces.insert(documents_[doc_id].document.namespace_());
+  }
+  return namespaces;
+}
+
+libtextclassifier3::Status InMemoryIcingSearchEngine::Delete(
+    const std::string &name_space, const std::string &uri) {
+  libtextclassifier3::StatusOr<DocumentId> doc_id_or =
+      InternalGet(name_space, uri);
+  if (doc_id_or.ok()) {
+    DocumentId doc_id = doc_id_or.ValueOrDie();
+    const DocumentProto &document = documents_[doc_id].document;
+    namespace_uri_docid_map[document.namespace_()].erase(document.uri());
+    auto end_itr =
+        std::remove(existing_doc_ids_.begin(), existing_doc_ids_.end(), doc_id);
+    existing_doc_ids_.erase(end_itr, existing_doc_ids_.end());
+  }
+  return doc_id_or.status();
+}
+
+libtextclassifier3::StatusOr<uint32_t>
+InMemoryIcingSearchEngine::DeleteByNamespace(const std::string &name_space) {
+  std::vector<DocumentId> doc_ids_to_delete;
+  for (DocumentId doc_id : existing_doc_ids_) {
+    if (documents_[doc_id].document.namespace_() == name_space) {
+      doc_ids_to_delete.push_back(doc_id);
+    }
+  }
+  for (DocumentId doc_id : doc_ids_to_delete) {
+    const DocumentProto &document = documents_[doc_id].document;
+    if (!Delete(document.namespace_(), document.uri()).ok()) {
+      return absl_ports::InternalError(
+          "Should never happen. There are inconsistencies in the in-memory "
+          "Icing.");
+    }
+  }
+  return doc_ids_to_delete.size();
+}
+
+libtextclassifier3::StatusOr<uint32_t>
+InMemoryIcingSearchEngine::DeleteBySchemaType(const std::string &schema_type) {
+  std::vector<DocumentId> doc_ids_to_delete;
+  for (DocumentId doc_id : existing_doc_ids_) {
+    if (documents_[doc_id].document.schema() == schema_type) {
+      doc_ids_to_delete.push_back(doc_id);
+    }
+  }
+  for (DocumentId doc_id : doc_ids_to_delete) {
+    const DocumentProto &document = documents_[doc_id].document;
+    if (!Delete(document.namespace_(), document.uri()).ok()) {
+      return absl_ports::InternalError(
+          "Should never happen. There are inconsistencies in the in-memory "
+          "Icing.");
+    }
+  }
+  return doc_ids_to_delete.size();
+}
+
+libtextclassifier3::StatusOr<uint32_t> InMemoryIcingSearchEngine::DeleteByQuery(
+    const SearchSpecProto &search_spec) {
+  ICING_ASSIGN_OR_RETURN(std::vector<DocumentId> doc_ids_to_delete,
+                         InternalSearch(search_spec));
+  for (DocumentId doc_id : doc_ids_to_delete) {
+    const DocumentProto &document = documents_[doc_id].document;
+    if (!Delete(document.namespace_(), document.uri()).ok()) {
+      return absl_ports::InternalError(
+          "Should never happen. There are inconsistencies in the in-memory "
+          "Icing.");
+    }
+  }
+  return doc_ids_to_delete.size();
+}
+
+libtextclassifier3::StatusOr<std::vector<DocumentProto>>
+InMemoryIcingSearchEngine::Search(const SearchSpecProto &search_spec) const {
+  ICING_ASSIGN_OR_RETURN(std::vector<DocumentId> matched_doc_ids,
+                         InternalSearch(search_spec));
+  std::vector<DocumentProto> result;
+  result.reserve(matched_doc_ids.size());
+  for (DocumentId doc_id : matched_doc_ids) {
+    result.push_back(documents_[doc_id].document);
+  }
+  return result;
+}
+
+libtextclassifier3::StatusOr<DocumentId> InMemoryIcingSearchEngine::InternalGet(
+    const std::string &name_space, const std::string &uri) const {
+  auto uris = namespace_uri_docid_map.find(name_space);
+  if (uris != namespace_uri_docid_map.end()) {
+    auto doc = uris->second.find(uri);
+    if (doc != uris->second.end()) {
+      return doc->second;
+    }
+  }
+  return absl_ports::NotFoundError(absl_ports::StrCat(
+      name_space, ", ", uri,
+      " is not found by InMemoryIcingSearchEngine::InternalGet."));
+}
+
+libtextclassifier3::StatusOr<std::vector<DocumentId>>
+InMemoryIcingSearchEngine::InternalSearch(
+    const SearchSpecProto &search_spec) const {
+  std::vector<DocumentId> matched_doc_ids;
+  for (DocumentId doc_id : existing_doc_ids_) {
+    ICING_ASSIGN_OR_RETURN(
+        bool match,
+        DoesDocumentMatchQuery(documents_[doc_id], search_spec.query(),
+                               search_spec.term_match_type()));
+    if (match) {
+      matched_doc_ids.push_back(doc_id);
+    }
+  }
+  return matched_doc_ids;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/monkey_test/in-memory-icing-search-engine.h b/icing/monkey_test/in-memory-icing-search-engine.h
new file mode 100644
index 0000000..98e7e4c
--- /dev/null
+++ b/icing/monkey_test/in-memory-icing-search-engine.h
@@ -0,0 +1,167 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_
+#define ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_
+
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/monkey_test/monkey-test-util.h"
+#include "icing/monkey_test/monkey-tokenized-document.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+class InMemoryIcingSearchEngine {
+ public:
+  struct PickDocumentResult {
+    std::string name_space;
+    std::string uri;
+    // document is empty if and only if such (name_space, uri) is not alive
+    // in the in-memory icing.
+    std::optional<DocumentProto> document;
+  };
+
+  InMemoryIcingSearchEngine(MonkeyTestRandomEngine *random) : random_(random) {}
+
+  uint32_t GetNumAliveDocuments() const { return existing_doc_ids_.size(); }
+
+  const SchemaProto *GetSchema() const { return schema_.get(); }
+
+  void SetSchema(SchemaProto &&schema);
+
+  // Randomly pick a document from the in-memory Icing for monkey testing.
+  //
+  // p_alive: chance of getting an alive document.
+  // p_all:   chance of getting a document that has ever been "Put" before,
+  //          including already "Delete"d documents.
+  // p_other: chance of getting a random namespace + uri that has never been
+  //          "Put" before.
+  //
+  //  p_alive, p_all, and p_other is required to be positive and sum to 1.
+  //  Otherwise, they will be normalized to ensure this.
+  //
+  // Returns an instance of PickDocumentResult.
+  PickDocumentResult RandomPickDocument(float p_alive, float p_all,
+                                        float p_other) const;
+
+  // Puts the document into the in-memory Icing. If the (namespace, uri) pair
+  // already exists, the old document will be overwritten.
+  void Put(const MonkeyTokenizedDocument &document);
+
+  std::unordered_set<std::string> GetAllNamespaces() const;
+
+  // Deletes the Document specified by the given (namespace, uri) pair.
+  //
+  // Returns:
+  //   OK on success
+  //   NOT_FOUND if no document exists with namespace, uri
+  libtextclassifier3::Status Delete(const std::string &name_space,
+                                    const std::string &uri);
+
+  // Deletes all Documents belonging to the specified namespace.
+  //
+  // Returns:
+  //   The number of deleted documents on success
+  //   INTERNAL_ERROR if there are inconsistencies in the in-memory Icing
+  libtextclassifier3::StatusOr<uint32_t> DeleteByNamespace(
+      const std::string &name_space);
+
+  // Deletes all Documents belonging to the specified type
+  //
+  // Returns:
+  //   The number of deleted documents on success
+  //   INTERNAL_ERROR if there are inconsistencies in the in-memory Icing
+  libtextclassifier3::StatusOr<uint32_t> DeleteBySchemaType(
+      const std::string &schema_type);
+
+  // Deletes all Documents that match the query specified in search_spec.
+  // Currently, only the "query" and "term_match_type" fields are recognized by
+  // the in-memory Icing, and only single term queries with possible section
+  // restrictions are supported.
+  //
+  // Returns:
+  //   The number of deleted documents on success
+  //   INTERNAL_ERROR if there are inconsistencies in the in-memory Icing
+  libtextclassifier3::StatusOr<uint32_t> DeleteByQuery(
+      const SearchSpecProto &search_spec);
+
+  // Retrieves documents according to search_spec.
+  // Currently, only the "query" and "term_match_type" fields are recognized by
+  // the in-memory Icing, and only single term queries with possible section
+  // restrictions are supported.
+  libtextclassifier3::StatusOr<std::vector<DocumentProto>> Search(
+      const SearchSpecProto &search_spec) const;
+
+ private:
+  // Does not own.
+  MonkeyTestRandomEngine *random_;
+
+  std::vector<MonkeyTokenizedDocument> documents_;
+  std::vector<DocumentId> existing_doc_ids_;
+  // A map from namespaces to uris and then from uris to internal document ids,
+  // which is used for fast lookups.
+  std::unordered_map<std::string, std::unordered_map<std::string, DocumentId>>
+      namespace_uri_docid_map;
+
+  std::unique_ptr<SchemaProto> schema_;
+  // A map that maps from (schema_type, property_name) to the corresponding
+  // PropertyConfigProto.
+  std::unordered_map<
+      std::string, std::unordered_map<std::string, const PropertyConfigProto &>>
+      property_config_map_;
+
+  // Finds and returns the internal document id for the document identified by
+  // the given key (namespace, uri)
+  //
+  // Returns:
+  //   The document id found on success
+  //   NOT_FOUND if the key doesn't exist or doc has been deleted
+  libtextclassifier3::StatusOr<DocumentId> InternalGet(
+      const std::string &name_space, const std::string &uri) const;
+
+  // A helper method for DeleteByQuery and Search to get matched internal doc
+  // ids.
+  libtextclassifier3::StatusOr<std::vector<DocumentId>> InternalSearch(
+      const SearchSpecProto &search_spec) const;
+
+  libtextclassifier3::StatusOr<const PropertyConfigProto *> GetPropertyConfig(
+      const std::string &schema_type, const std::string &property_name) const;
+
+  libtextclassifier3::StatusOr<TermMatchType::Code> GetTermMatchType(
+      const std::string &schema_type,
+      const MonkeyTokenizedSection &section) const;
+
+  libtextclassifier3::StatusOr<bool> DoesDocumentMatchQuery(
+      const MonkeyTokenizedDocument &document, const std::string &query,
+      TermMatchType::Code term_match_type) const;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_
diff --git a/icing/monkey_test/monkey-test-common-words.h b/icing/monkey_test/monkey-test-common-words.h
new file mode 100644
index 0000000..f0ed08a
--- /dev/null
+++ b/icing/monkey_test/monkey-test-common-words.h
@@ -0,0 +1,284 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_MONKEY_TEST_MONKEY_TEST_COMMON_WORDS_H_
+#define ICING_MONKEY_TEST_MONKEY_TEST_COMMON_WORDS_H_
+
+#include <array>
+#include <string_view>
+
+namespace icing {
+namespace lib {
+
+// A bag of words in English for creating random documents. Only words that are
+// at least 3 letters long are included (that's kPrefixLength) so that prefix
+// queries are easily formed from any word in a random document.
+// Data source:
+// https://chromium.googlesource.com/chromium/src/+/HEAD/components/url_formatter/spoof_checks/common_words/data/
+static constexpr std::array<std::string_view, 1000> kCommonWords = {
+    "the",          "and",           "for",           "that",
+    "this",         "with",          "you",           "not",
+    "are",          "from",          "your",          "all",
+    "have",         "new",           "more",          "was",
+    "will",         "home",          "can",           "about",
+    "page",         "has",           "search",        "free",
+    "but",          "our",           "one",           "other",
+    "information",  "time",          "they",          "site",
+    "may",          "what",          "which",         "their",
+    "news",         "out",           "use",           "any",
+    "there",        "see",           "only",          "his",
+    "when",         "contact",       "here",          "business",
+    "who",          "web",           "also",          "now",
+    "help",         "get",           "view",          "online",
+    "first",        "been",          "would",         "how",
+    "were",         "services",      "some",          "these",
+    "click",        "its",           "like",          "service",
+    "than",         "find",          "price",         "date",
+    "back",         "top",           "people",        "had",
+    "list",         "name",          "just",          "over",
+    "state",        "year",          "day",           "into",
+    "email",        "two",           "health",        "world",
+    "next",         "used",          "work",          "last",
+    "most",         "products",      "music",         "buy",
+    "data",         "make",          "them",          "should",
+    "product",      "system",        "post",          "her",
+    "city",         "add",           "policy",        "number",
+    "such",         "please",        "available",     "copyright",
+    "support",      "message",       "after",         "best",
+    "software",     "then",          "jan",           "good",
+    "video",        "well",          "where",         "info",
+    "rights",       "public",        "books",         "high",
+    "school",       "through",       "each",          "links",
+    "she",          "review",        "years",         "order",
+    "very",         "privacy",       "book",          "items",
+    "company",      "read",          "group",         "sex",
+    "need",         "many",          "user",          "said",
+    "does",         "set",           "under",         "general",
+    "research",     "university",    "january",       "mail",
+    "full",         "map",           "reviews",       "program",
+    "life",         "know",          "games",         "way",
+    "days",         "management",    "part",          "could",
+    "great",        "united",        "hotel",         "real",
+    "item",         "international", "center",        "must",
+    "store",        "travel",        "comments",      "made",
+    "development",  "report",        "off",           "member",
+    "details",      "line",          "terms",         "before",
+    "hotels",       "did",           "send",          "right",
+    "type",         "because",       "local",         "those",
+    "using",        "results",       "office",        "education",
+    "national",     "car",           "design",        "take",
+    "posted",       "internet",      "address",       "community",
+    "within",       "states",        "area",          "want",
+    "phone",        "dvd",           "shipping",      "reserved",
+    "subject",      "between",       "forum",         "family",
+    "long",         "based",         "code",          "show",
+    "even",         "black",         "check",         "special",
+    "prices",       "website",       "index",         "being",
+    "women",        "much",          "sign",          "file",
+    "link",         "open",          "today",         "technology",
+    "south",        "case",          "project",       "same",
+    "pages",        "version",       "section",       "own",
+    "found",        "sports",        "house",         "related",
+    "security",     "both",          "county",        "american",
+    "photo",        "game",          "members",       "power",
+    "while",        "care",          "network",       "down",
+    "computer",     "systems",       "three",         "total",
+    "place",        "end",           "following",     "download",
+    "him",          "without",       "per",           "access",
+    "think",        "north",         "resources",     "current",
+    "posts",        "big",           "media",         "law",
+    "control",      "water",         "history",       "pictures",
+    "size",         "art",           "personal",      "since",
+    "including",    "guide",         "shop",          "directory",
+    "board",        "location",      "change",        "white",
+    "text",         "small",         "rating",        "rate",
+    "government",   "children",      "during",        "usa",
+    "return",       "students",      "shopping",      "account",
+    "times",        "sites",         "level",         "digital",
+    "profile",      "previous",      "form",          "events",
+    "love",         "old",           "john",          "main",
+    "call",         "hours",         "image",         "department",
+    "title",        "description",   "non",           "insurance",
+    "another",      "why",           "shall",         "property",
+    "class",        "still",         "money",         "quality",
+    "every",        "listing",       "content",       "country",
+    "private",      "little",        "visit",         "save",
+    "tools",        "low",           "reply",         "customer",
+    "december",     "compare",       "movies",        "include",
+    "college",      "value",         "article",       "york",
+    "man",          "card",          "jobs",          "provide",
+    "food",         "source",        "author",        "different",
+    "press",        "learn",         "sale",          "around",
+    "print",        "course",        "job",           "canada",
+    "process",      "teen",          "room",          "stock",
+    "training",     "too",           "credit",        "point",
+    "join",         "science",       "men",           "categories",
+    "advanced",     "west",          "sales",         "look",
+    "english",      "left",          "team",          "estate",
+    "box",          "conditions",    "select",        "windows",
+    "photos",       "gay",           "thread",        "week",
+    "category",     "note",          "live",          "large",
+    "gallery",      "table",         "register",      "however",
+    "june",         "october",       "november",      "market",
+    "library",      "really",        "action",        "start",
+    "series",       "model",         "features",      "air",
+    "industry",     "plan",          "human",         "provided",
+    "yes",          "required",      "second",        "hot",
+    "accessories",  "cost",          "movie",         "forums",
+    "march",        "september",     "better",        "say",
+    "questions",    "july",          "going",         "medical",
+    "test",         "friend",        "come",          "dec",
+    "server",       "study",         "application",   "cart",
+    "staff",        "articles",      "san",           "feedback",
+    "again",        "play",          "looking",       "issues",
+    "april",        "never",         "users",         "complete",
+    "street",       "topic",         "comment",       "financial",
+    "things",       "working",       "against",       "standard",
+    "tax",          "person",        "below",         "mobile",
+    "less",         "got",           "blog",          "party",
+    "payment",      "equipment",     "login",         "student",
+    "let",          "programs",      "offers",        "legal",
+    "above",        "recent",        "park",          "stores",
+    "side",         "act",           "problem",       "red",
+    "give",         "memory",        "performance",   "social",
+    "august",       "quote",         "language",      "story",
+    "sell",         "options",       "experience",    "rates",
+    "create",       "key",           "body",          "young",
+    "america",      "important",     "field",         "few",
+    "east",         "paper",         "single",        "age",
+    "activities",   "club",          "example",       "girls",
+    "additional",   "password",      "latest",        "something",
+    "road",         "gift",          "question",      "changes",
+    "night",        "hard",          "texas",         "oct",
+    "pay",          "four",          "poker",         "status",
+    "browse",       "issue",         "range",         "building",
+    "seller",       "court",         "february",      "always",
+    "result",       "audio",         "light",         "write",
+    "war",          "nov",           "offer",         "blue",
+    "groups",       "easy",          "given",         "files",
+    "event",        "release",       "analysis",      "request",
+    "fax",          "china",         "making",        "picture",
+    "needs",        "possible",      "might",         "professional",
+    "yet",          "month",         "major",         "star",
+    "areas",        "future",        "space",         "committee",
+    "hand",         "sun",           "cards",         "problems",
+    "london",       "washington",    "meeting",       "rss",
+    "become",       "interest",      "child",         "keep",
+    "enter",        "california",    "porn",          "share",
+    "similar",      "garden",        "schools",       "million",
+    "added",        "reference",     "companies",     "listed",
+    "baby",         "learning",      "energy",        "run",
+    "delivery",     "net",           "popular",       "term",
+    "film",         "stories",       "put",           "computers",
+    "journal",      "reports",       "try",           "welcome",
+    "central",      "images",        "president",     "notice",
+    "god",          "original",      "head",          "radio",
+    "until",        "cell",          "color",         "self",
+    "council",      "away",          "includes",      "track",
+    "australia",    "discussion",    "archive",       "once",
+    "others",       "entertainment", "agreement",     "format",
+    "least",        "society",       "months",        "log",
+    "safety",       "friends",       "sure",          "faq",
+    "trade",        "edition",       "cars",          "messages",
+    "marketing",    "tell",          "further",       "updated",
+    "association",  "able",          "having",        "provides",
+    "david",        "fun",           "already",       "green",
+    "studies",      "close",         "common",        "drive",
+    "specific",     "several",       "gold",          "feb",
+    "living",       "sep",           "collection",    "called",
+    "short",        "arts",          "lot",           "ask",
+    "display",      "limited",       "powered",       "solutions",
+    "means",        "director",      "daily",         "beach",
+    "past",         "natural",       "whether",       "due",
+    "electronics",  "five",          "upon",          "period",
+    "planning",     "database",      "says",          "official",
+    "weather",      "mar",           "land",          "average",
+    "done",         "technical",     "window",        "france",
+    "pro",          "region",        "island",        "record",
+    "direct",       "conference",    "environment",   "records",
+    "district",     "calendar",      "costs",         "style",
+    "url",          "front",         "statement",     "update",
+    "parts",        "aug",           "ever",          "downloads",
+    "early",        "miles",         "sound",         "resource",
+    "present",      "applications",  "either",        "ago",
+    "document",     "word",          "works",         "material",
+    "bill",         "apr",           "written",       "talk",
+    "federal",      "hosting",       "rules",         "final",
+    "adult",        "tickets",       "thing",         "centre",
+    "requirements", "via",           "cheap",         "nude",
+    "kids",         "finance",       "true",          "minutes",
+    "else",         "mark",          "third",         "rock",
+    "gifts",        "europe",        "reading",       "topics",
+    "bad",          "individual",    "tips",          "plus",
+    "auto",         "cover",         "usually",       "edit",
+    "together",     "videos",        "percent",       "fast",
+    "function",     "fact",          "unit",          "getting",
+    "global",       "tech",          "meet",          "far",
+    "economic",     "player",        "projects",      "lyrics",
+    "often",        "subscribe",     "submit",        "germany",
+    "amount",       "watch",         "included",      "feel",
+    "though",       "bank",          "risk",          "thanks",
+    "everything",   "deals",         "various",       "words",
+    "linux",        "jul",           "production",    "commercial",
+    "james",        "weight",        "town",          "heart",
+    "advertising",  "received",      "choose",        "treatment",
+    "newsletter",   "archives",      "points",        "knowledge",
+    "magazine",     "error",         "camera",        "jun",
+    "girl",         "currently",     "construction",  "toys",
+    "registered",   "clear",         "golf",          "receive",
+    "domain",       "methods",       "chapter",       "makes",
+    "protection",   "policies",      "loan",          "wide",
+    "beauty",       "manager",       "india",         "position",
+    "taken",        "sort",          "listings",      "models",
+    "michael",      "known",         "half",          "cases",
+    "step",         "engineering",   "florida",       "simple",
+    "quick",        "none",          "wireless",      "license",
+    "paul",         "friday",        "lake",          "whole",
+    "annual",       "published",     "later",         "basic",
+    "shows",        "corporate",     "church",        "method",
+    "purchase",     "customers",     "active",        "response",
+    "practice",     "hardware",      "figure",        "materials",
+    "fire",         "holiday",       "chat",          "enough",
+    "designed",     "along",         "among",         "death",
+    "writing",      "speed",         "html",          "countries",
+    "loss",         "face",          "brand",         "discount",
+    "higher",       "effects",       "created",       "remember",
+    "standards",    "oil",           "bit",           "yellow",
+    "political",    "increase",      "advertise",     "kingdom",
+    "base",         "near",          "environmental", "thought",
+    "stuff",        "french",        "storage",       "japan",
+    "doing",        "loans",         "shoes",         "entry",
+    "stay",         "nature",        "orders",        "availability",
+    "africa",       "summary",       "turn",          "mean",
+    "growth",       "notes",         "agency",        "king",
+    "monday",       "european",      "activity",      "copy",
+    "although",     "drug",          "pics",          "western",
+    "income",       "force",         "cash",          "employment",
+    "overall",      "bay",           "river",         "commission",
+    "package",      "contents",      "seen",          "players",
+    "engine",       "port",          "album",         "regional",
+    "stop",         "supplies",      "started",       "administration",
+    "bar",          "institute",     "views",         "plans",
+    "double",       "dog",           "build",         "screen",
+    "exchange",     "types",         "soon",          "sponsored",
+    "lines",        "electronic",    "continue",      "across",
+    "benefits",     "needed",        "season",        "apply",
+    "someone",      "held",          "anything",      "printer",
+    "condition",    "effective",     "believe",       "organization",
+    "effect",       "asked",         "eur",           "mind"};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_MONKEY_TEST_MONKEY_TEST_COMMON_WORDS_H_
diff --git a/icing/monkey_test/monkey-test-generators.cc b/icing/monkey_test/monkey-test-generators.cc
new file mode 100644
index 0000000..0d5ad73
--- /dev/null
+++ b/icing/monkey_test/monkey-test-generators.cc
@@ -0,0 +1,346 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/monkey_test/monkey-test-generators.h"
+
+#include <array>
+#include <cstdint>
+#include <random>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/document-builder.h"
+#include "icing/monkey_test/monkey-test-util.h"
+#include "icing/monkey_test/monkey-tokenized-document.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+constexpr std::array<PropertyConfigProto::Cardinality::Code, 3> kCardinalities =
+    {PropertyConfigProto::Cardinality::REPEATED,
+     PropertyConfigProto::Cardinality::OPTIONAL,
+     PropertyConfigProto::Cardinality::REQUIRED};
+
+constexpr std::array<TermMatchType::Code, 3> kTermMatchTypes = {
+    TermMatchType::UNKNOWN, TermMatchType::EXACT_ONLY, TermMatchType::PREFIX};
+
+PropertyConfigProto::Cardinality::Code GetRandomCardinality(
+    MonkeyTestRandomEngine* random) {
+  std::uniform_int_distribution<> dist(0, kCardinalities.size() - 1);
+  return kCardinalities[dist(*random)];
+}
+
+TermMatchType::Code GetRandomTermMatchType(MonkeyTestRandomEngine* random) {
+  std::uniform_int_distribution<> dist(0, kTermMatchTypes.size() - 1);
+  return kTermMatchTypes[dist(*random)];
+}
+
+// TODO: Update this function when supporting document_indexing_config.
+bool IsIndexableProperty(const PropertyConfigProto& property) {
+  return property.string_indexing_config().term_match_type() !=
+         TermMatchType::UNKNOWN;
+}
+
+void SetStringIndexingConfig(PropertyConfigProto& property,
+                             TermMatchType::Code term_match_type) {
+  if (term_match_type != TermMatchType::UNKNOWN) {
+    StringIndexingConfig* string_indexing_config =
+        property.mutable_string_indexing_config();
+    string_indexing_config->set_term_match_type(term_match_type);
+    // TODO: Try to add different TokenizerTypes. VERBATIM, RFC822, and URL are
+    // the remaining candidates to consider.
+    string_indexing_config->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
+  } else {
+    property.clear_string_indexing_config();
+  }
+}
+
+}  // namespace
+
+SchemaProto MonkeySchemaGenerator::GenerateSchema() {
+  SchemaProto schema;
+  for (int i = 0; i < config_->num_types; ++i) {
+    *schema.add_types() = GenerateType();
+  }
+  return schema;
+}
+
+MonkeySchemaGenerator::UpdateSchemaResult MonkeySchemaGenerator::UpdateSchema(
+    const SchemaProto& schema) {
+  UpdateSchemaResult result = {std::move(schema)};
+  SchemaProto& new_schema = result.schema;
+
+  // Delete up to 2 existing types.
+  std::uniform_int_distribution<> num_types_to_delete_dist(0, 2);
+  for (int num_types_to_delete = num_types_to_delete_dist(*random_);
+       num_types_to_delete >= 0; --num_types_to_delete) {
+    if (new_schema.types_size() > 0) {
+      std::uniform_int_distribution<> dist(0, new_schema.types_size() - 1);
+      int index_to_delete = dist(*random_);
+      result.schema_types_deleted.insert(
+          new_schema.types(index_to_delete).schema_type());
+      new_schema.mutable_types()->SwapElements(index_to_delete,
+                                               new_schema.types_size() - 1);
+      new_schema.mutable_types()->RemoveLast();
+    }
+  }
+
+  // Updating about 1/3 of existing types.
+  for (int i = 0; i < new_schema.types_size(); ++i) {
+    std::uniform_int_distribution<> dist(0, 2);
+    if (dist(*random_) == 0) {
+      UpdateType(*new_schema.mutable_types(i), result);
+    }
+  }
+
+  // Add up to 2 new types.
+  std::uniform_int_distribution<> num_types_to_add_dist(0, 2);
+  for (int num_types_to_add = num_types_to_add_dist(*random_);
+       num_types_to_add >= 0; --num_types_to_add) {
+    *new_schema.add_types() = GenerateType();
+  }
+
+  return result;
+}
+
+PropertyConfigProto MonkeySchemaGenerator::GenerateProperty(
+    const SchemaTypeConfigProto& type_config,
+    PropertyConfigProto::Cardinality::Code cardinality,
+    TermMatchType::Code term_match_type) {
+  PropertyConfigProto prop;
+  prop.set_property_name(
+      "MonkeyTestProp" +
+      std::to_string(num_properties_generated_[type_config.schema_type()]++));
+  // TODO: Perhaps in future iterations we will want to generate more than just
+  // string properties.
+  prop.set_data_type(PropertyConfigProto::DataType::STRING);
+  prop.set_cardinality(cardinality);
+  SetStringIndexingConfig(prop, term_match_type);
+  return prop;
+}
+
+void MonkeySchemaGenerator::UpdateProperty(
+    const SchemaTypeConfigProto& type_config, PropertyConfigProto& property,
+    UpdateSchemaResult& result) {
+  PropertyConfigProto::Cardinality::Code new_cardinality =
+      GetRandomCardinality(random_);
+  if (new_cardinality != property.cardinality()) {
+    // Only do compatible cardinality update for now, otherwise it would be hard
+    // to track which documents will be invalid after updating the schema.
+    //
+    // The following type of updates are not allowed:
+    // - optional -> required
+    // - repeated -> optional
+    // - repeated -> required
+    if (property.cardinality() == PropertyConfigProto::Cardinality::OPTIONAL &&
+        new_cardinality == PropertyConfigProto::Cardinality::REQUIRED) {
+      return;
+    }
+    if (property.cardinality() == PropertyConfigProto::Cardinality::REPEATED &&
+        (new_cardinality == PropertyConfigProto::Cardinality::OPTIONAL ||
+         new_cardinality == PropertyConfigProto::Cardinality::REQUIRED)) {
+      return;
+    }
+    property.set_cardinality(new_cardinality);
+  }
+
+  if (property.data_type() == PropertyConfigProto::DataType::STRING) {
+    TermMatchType::Code new_term_match_type = GetRandomTermMatchType(random_);
+    if (new_term_match_type !=
+        property.string_indexing_config().term_match_type()) {
+      SetStringIndexingConfig(property, new_term_match_type);
+      result.schema_types_index_incompatible.insert(type_config.schema_type());
+    }
+  }
+}
+
+SchemaTypeConfigProto MonkeySchemaGenerator::GenerateType() {
+  SchemaTypeConfigProto type_config;
+  type_config.set_schema_type("MonkeyTestType" +
+                              std::to_string(num_types_generated_++));
+  std::uniform_int_distribution<> possible_num_properties_dist(
+      0, config_->possible_num_properties.size() - 1);
+  int total_num_properties =
+      config_->possible_num_properties[possible_num_properties_dist(*random_)];
+
+  int num_indexed_properties = 0;
+  for (int i = 0; i < total_num_properties; ++i) {
+    TermMatchType::Code term_match_type = TermMatchType::UNKNOWN;
+    if (num_indexed_properties < kTotalNumSections) {
+      term_match_type = GetRandomTermMatchType(random_);
+    }
+    if (term_match_type != TermMatchType::UNKNOWN) {
+      num_indexed_properties += 1;
+    }
+    (*type_config.add_properties()) = GenerateProperty(
+        type_config, GetRandomCardinality(random_), term_match_type);
+  }
+  return type_config;
+}
+
+void MonkeySchemaGenerator::UpdateType(SchemaTypeConfigProto& type_config,
+                                       UpdateSchemaResult& result) {
+  // Delete up to 4 existing property.
+  std::uniform_int_distribution<> num_properties_to_delete_dist(0, 4);
+  for (int num_properties_to_delete = num_properties_to_delete_dist(*random_);
+       num_properties_to_delete >= 0; --num_properties_to_delete) {
+    if (type_config.properties_size() > 0) {
+      std::uniform_int_distribution<> dist(0,
+                                           type_config.properties_size() - 1);
+      int index_to_delete = dist(*random_);
+      // Only delete a required property for now, otherwise it would be hard
+      // to track which documents will be invalid after updating the schema.
+      if (type_config.properties(index_to_delete).cardinality() !=
+          PropertyConfigProto::Cardinality::REQUIRED) {
+        continue;
+      }
+      if (IsIndexableProperty(type_config.properties(index_to_delete))) {
+        result.schema_types_index_incompatible.insert(
+            type_config.schema_type());
+      }
+      // Removing a property will cause the type to be considered as
+      // incompatible.
+      result.schema_types_incompatible.insert(type_config.schema_type());
+
+      type_config.mutable_properties()->SwapElements(
+          index_to_delete, type_config.properties_size() - 1);
+      type_config.mutable_properties()->RemoveLast();
+    }
+  }
+
+  // Updating about 1/3 of existing properties.
+  for (int i = 0; i < type_config.properties_size(); ++i) {
+    std::uniform_int_distribution<> dist(0, 2);
+    if (dist(*random_) == 0) {
+      UpdateProperty(type_config, *type_config.mutable_properties(i), result);
+    }
+  }
+
+  // Add up to 4 new properties.
+  std::uniform_int_distribution<> num_types_to_add_dist(0, 4);
+  for (int num_types_to_add = num_types_to_add_dist(*random_);
+       num_types_to_add >= 0; --num_types_to_add) {
+    PropertyConfigProto::Cardinality::Code new_cardinality =
+        GetRandomCardinality(random_);
+    // Adding a required property will make all document of this type invalid.
+    if (new_cardinality == PropertyConfigProto::Cardinality::REQUIRED) {
+      result.schema_types_incompatible.insert(type_config.schema_type());
+    }
+    PropertyConfigProto new_property = GenerateProperty(
+        type_config, new_cardinality, GetRandomTermMatchType(random_));
+    if (IsIndexableProperty(new_property)) {
+      result.schema_types_index_incompatible.insert(type_config.schema_type());
+    }
+    (*type_config.add_properties()) = std::move(new_property);
+  }
+
+  int num_indexed_properties = 0;
+  for (int i = 0; i < type_config.properties_size(); ++i) {
+    if (IsIndexableProperty(type_config.properties(i))) {
+      ++num_indexed_properties;
+    }
+  }
+
+  if (num_indexed_properties > kTotalNumSections) {
+    result.is_invalid_schema = true;
+  }
+}
+
+std::string MonkeyDocumentGenerator::GetNamespace() const {
+  uint32_t name_space;
+  // When num_namespaces is 0, all documents generated get different namespaces.
+  // Otherwise, namespaces will be randomly picked from a set with
+  // num_namespaces elements.
+  if (config_->num_namespaces == 0) {
+    name_space = num_docs_generated_;
+  } else {
+    std::uniform_int_distribution<> dist(0, config_->num_namespaces - 1);
+    name_space = dist(*random_);
+  }
+  return absl_ports::StrCat("namespace", std::to_string(name_space));
+}
+
+std::string MonkeyDocumentGenerator::GetUri() const {
+  uint32_t uri;
+  // When num_uris is 0, all documents generated get different URIs. Otherwise,
+  // URIs will be randomly picked from a set with num_uris elements.
+  if (config_->num_uris == 0) {
+    uri = num_docs_generated_;
+  } else {
+    std::uniform_int_distribution<> dist(0, config_->num_uris - 1);
+    uri = dist(*random_);
+  }
+  return absl_ports::StrCat("uri", std::to_string(uri));
+}
+
+int MonkeyDocumentGenerator::GetNumTokens() const {
+  std::uniform_int_distribution<> dist(
+      0, config_->possible_num_tokens_.size() - 1);
+  int n = config_->possible_num_tokens_[dist(*random_)];
+  // Add some noise
+  std::uniform_real_distribution<> real_dist(0.5, 1);
+  float p = real_dist(*random_);
+  return n * p;
+}
+
+std::vector<std::string> MonkeyDocumentGenerator::GetPropertyContent() const {
+  std::vector<std::string> content;
+  int num_tokens = GetNumTokens();
+  while (num_tokens) {
+    content.push_back(std::string(GetToken()));
+    --num_tokens;
+  }
+  return content;
+}
+
+MonkeyTokenizedDocument MonkeyDocumentGenerator::GenerateDocument() {
+  MonkeyTokenizedDocument document;
+  const SchemaTypeConfigProto& type_config = GetType();
+  const std::string& name_space = GetNamespace();
+  DocumentBuilder doc_builder =
+      DocumentBuilder()
+          .SetNamespace(name_space)
+          .SetSchema(type_config.schema_type())
+          .SetUri(GetUri())
+          .SetCreationTimestampMs(clock_.GetSystemTimeMilliseconds());
+  for (const PropertyConfigProto& prop : type_config.properties()) {
+    std::vector<std::string> prop_content = GetPropertyContent();
+    doc_builder.AddStringProperty(prop.property_name(),
+                                  absl_ports::StrJoin(prop_content, " "));
+    // No matter whether the property is indexable currently, we have to create
+    // a section for it since a non-indexable property can become indexable
+    // after a schema type change. The in-memory icing will automatically skip
+    // sections that are non-indexable at the time of search requests.
+    MonkeyTokenizedSection section = {prop.property_name(),
+                                      std::move(prop_content)};
+    document.tokenized_sections.push_back(std::move(section));
+  }
+  document.document = doc_builder.Build();
+  ++num_docs_generated_;
+  return document;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/monkey_test/monkey-test-generators.h b/icing/monkey_test/monkey-test-generators.h
new file mode 100644
index 0000000..72a4723
--- /dev/null
+++ b/icing/monkey_test/monkey-test-generators.h
@@ -0,0 +1,127 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_
+#define ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_
+
+#include <cstdint>
+#include <random>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/monkey_test/monkey-test-common-words.h"
+#include "icing/monkey_test/monkey-test-util.h"
+#include "icing/monkey_test/monkey-tokenized-document.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/util/clock.h"
+
+namespace icing {
+namespace lib {
+
+// A random schema generator used for monkey testing.
+class MonkeySchemaGenerator {
+ public:
+  struct UpdateSchemaResult {
+    SchemaProto schema;
+    bool is_invalid_schema;
+    std::unordered_set<std::string> schema_types_deleted;
+    std::unordered_set<std::string> schema_types_incompatible;
+    std::unordered_set<std::string> schema_types_index_incompatible;
+  };
+
+  explicit MonkeySchemaGenerator(
+      MonkeyTestRandomEngine* random,
+      const IcingMonkeyTestRunnerConfiguration* config)
+      : random_(random), config_(config) {}
+
+  SchemaProto GenerateSchema();
+
+  UpdateSchemaResult UpdateSchema(const SchemaProto& schema);
+
+ private:
+  PropertyConfigProto GenerateProperty(
+      const SchemaTypeConfigProto& type_config,
+      PropertyConfigProto::Cardinality::Code cardinality,
+      TermMatchType::Code term_match_type);
+
+  void UpdateProperty(const SchemaTypeConfigProto& type_config,
+                      PropertyConfigProto& property,
+                      UpdateSchemaResult& result);
+
+  SchemaTypeConfigProto GenerateType();
+
+  void UpdateType(SchemaTypeConfigProto& type_config,
+                  UpdateSchemaResult& result);
+
+  int num_types_generated_ = 0;
+  // A map from type name to the number of properties generated in the
+  // corresponding types.
+  std::unordered_map<std::string, int> num_properties_generated_;
+
+  MonkeyTestRandomEngine* random_;                    // Does not own.
+  const IcingMonkeyTestRunnerConfiguration* config_;  // Does not own.
+};
+
+// A random document generator used for monkey testing.
+// When num_uris is 0, all documents generated get different URIs. Otherwise,
+// URIs will be randomly picked from a set with num_uris elements.
+// Same for num_namespaces.
+class MonkeyDocumentGenerator {
+ public:
+  explicit MonkeyDocumentGenerator(
+      MonkeyTestRandomEngine* random, const SchemaProto* schema,
+      const IcingMonkeyTestRunnerConfiguration* config)
+      : random_(random), schema_(schema), config_(config) {}
+
+  const SchemaTypeConfigProto& GetType() const {
+    std::uniform_int_distribution<> dist(0, schema_->types_size() - 1);
+    return schema_->types(dist(*random_));
+  }
+
+  std::string_view GetToken() const {
+    // TODO: Instead of randomly picking tokens from the language set
+    // kCommonWords, we can make some words more common than others to simulate
+    // term frequencies in the real world. This can help us get extremely large
+    // posting lists.
+    std::uniform_int_distribution<> dist(0, kCommonWords.size() - 1);
+    return kCommonWords[dist(*random_)];
+  }
+
+  std::string GetNamespace() const;
+
+  std::string GetUri() const;
+
+  int GetNumTokens() const;
+
+  std::vector<std::string> GetPropertyContent() const;
+
+  MonkeyTokenizedDocument GenerateDocument();
+
+ private:
+  MonkeyTestRandomEngine* random_;                    // Does not own.
+  const SchemaProto* schema_;                         // Does not own.
+  const IcingMonkeyTestRunnerConfiguration* config_;  // Does not own.
+
+  uint32_t num_docs_generated_ = 0;
+  Clock clock_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_
diff --git a/icing/monkey_test/monkey-test-util.h b/icing/monkey_test/monkey-test-util.h
new file mode 100644
index 0000000..d6053d8
--- /dev/null
+++ b/icing/monkey_test/monkey-test-util.h
@@ -0,0 +1,68 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_MONKEY_TEST_MONKEY_TEST_UTIL_H_
+#define ICING_MONKEY_TEST_MONKEY_TEST_UTIL_H_
+
+#include <cstdint>
+#include <functional>
+#include <random>
+#include <utility>
+#include <vector>
+
+namespace icing {
+namespace lib {
+
+using MonkeyTestRandomEngine = std::mt19937;
+
+class IcingMonkeyTestRunner;
+
+struct IcingMonkeyTestRunnerConfiguration {
+  explicit IcingMonkeyTestRunnerConfiguration(uint32_t seed, int num_types,
+                                              int num_namespaces, int num_uris,
+                                              int index_merge_size)
+      : seed(seed),
+        num_types(num_types),
+        num_namespaces(num_namespaces),
+        num_uris(num_uris),
+        index_merge_size(index_merge_size) {}
+
+  uint32_t seed;
+  int num_types;
+  int num_namespaces;
+  int num_uris;
+  int index_merge_size;
+
+  // To ensure that the random schema is generated with the best quality, the
+  // number of properties for each type will only be randomly picked from this
+  // list, instead of picking it from a range. For example, a vector of
+  // [1, 2, 3, 4] means each generated types have a 25% chance of getting 1
+  // property, 2 properties, 3 properties and 4 properties.
+  std::vector<int> possible_num_properties;
+
+  // The possible number of tokens that may appear in generated documents, with
+  // a noise factor from 0.5 to 1 applied.
+  std::vector<int> possible_num_tokens_;
+
+  // An array of pairs of monkey test APIs with frequencies.
+  // If f_sum is the sum of all the frequencies, an operation with frequency f
+  // means for every f_sum iterations, the operation is expected to run f times.
+  std::vector<std::pair<std::function<void(IcingMonkeyTestRunner*)>, uint32_t>>
+      monkey_api_schedules;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_MONKEY_TEST_MONKEY_TEST_UTIL_H_
diff --git a/icing/monkey_test/monkey-tokenized-document.h b/icing/monkey_test/monkey-tokenized-document.h
new file mode 100644
index 0000000..87b77bb
--- /dev/null
+++ b/icing/monkey_test/monkey-tokenized-document.h
@@ -0,0 +1,38 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_MONKEY_TEST_MONKEY_TOKENIZED_DOCUMENT_H_
+#define ICING_MONKEY_TEST_MONKEY_TOKENIZED_DOCUMENT_H_
+
+#include <string>
+#include <vector>
+
+#include "icing/proto/document.pb.h"
+
+namespace icing {
+namespace lib {
+
+struct MonkeyTokenizedSection {
+  std::string path;
+  std::vector<std::string> token_sequence;
+};
+
+struct MonkeyTokenizedDocument {
+  DocumentProto document;
+  std::vector<MonkeyTokenizedSection> tokenized_sections;
+};
+
+}  // namespace lib
+}  // namespace icing
+#endif  // ICING_MONKEY_TEST_MONKEY_TOKENIZED_DOCUMENT_H_
diff --git a/icing/performance-configuration.cc b/icing/performance-configuration.cc
index aeaa449..1518381 100644
--- a/icing/performance-configuration.cc
+++ b/icing/performance-configuration.cc
@@ -14,7 +14,7 @@
 
 #include "icing/performance-configuration.h"
 
-#include "icing/result/result-state.h"
+#include "icing/scoring/scored-document-hit.h"
 
 namespace icing {
 namespace lib {
@@ -38,54 +38,33 @@ namespace {
 // rendering 2 frames.
 //
 // With the information above, we then try to choose default values for
-// query_length and num_to_score so that the overall time can comfortably fit
-// in with our goal.
+// query_length so that the overall time can comfortably fit in with our goal
+// (note that num_to_score will be decided by the client, which is specified in
+// ResultSpecProto).
 // 1. Set query_length to 23000 so that any query can be executed by
 //    QueryProcessor within 15 ms on a Pixel 3 XL according to results of
 //    //icing/query:query-processor_benchmark.
-// 2. Set num_to_score to 30000 so that results can be scored and ranked within
-//    3 ms on a Pixel 3 XL according to results of
-//    //icing/scoring:score-and-rank_benchmark.
 //
 // In the worse-case scenario, we still have [33 ms - 15 ms - 3 ms] = 15 ms left
 // for all the other things like proto parsing, document fetching, and even
 // Android Binder calls if Icing search engine runs in a separate process.
 constexpr int kMaxQueryLength = 23000;
-constexpr int kDefaultNumToScore = 30000;
 
 // New Android devices nowadays all allow more than 16 MB memory per app. Using
-// that as a guideline, we set 16 MB as the safe memory threshold.
+// that as a guideline and being more conservative, we set 4 MB as the safe
+// memory threshold.
 // TODO(b/150029642): Android apps / framework have better understanding of how
 // much memory is allowed, so it would be better to let clients pass in this
 // value.
-constexpr int kSafeMemoryUsage = 16 * 1024 * 1024;  // 16MB
+constexpr int kSafeMemoryUsage = 4 * 1024 * 1024;  // 4MB
 
-// This number is not determined by benchmarks. We just assume that returning
-// the best 1000 scored document hits of a query is enough. To find the best
-// 1000 scored document hits from a heap, we need roughly 0.7 ms on a Pixel 3 XL
-// according to //icing/scoring:ranker_benchmark.
-constexpr int kMaxNumHitsPerQuery = 1000;
+// The maximum number of hits that can fit below the kSafeMemoryUsage threshold.
+constexpr int kMaxNumTotalHits = kSafeMemoryUsage / sizeof(ScoredDocumentHit);
 
-// A rough estimation of the size of ResultState if it stores the maximum number
-// of scored document hits.
-constexpr int kMaxMemoryPerResult =
-    sizeof(ResultState) + kMaxNumHitsPerQuery * sizeof(ScoredDocumentHit);
-
-// To be safer, we assume that all the Results contain the maximum number of
-// hits and only use half of the memory allowed.
-constexpr int kDefaultNumResultsToCache =
-    kSafeMemoryUsage / 2 / kMaxMemoryPerResult;
-
-static_assert(
-    kDefaultNumResultsToCache > 500,
-    "Default number of results to cache has changed, please update and make "
-    "sure it still meets our requirements.");
 }  // namespace
 
 PerformanceConfiguration::PerformanceConfiguration()
-    : PerformanceConfiguration(kMaxQueryLength, kDefaultNumToScore,
-                               kMaxNumHitsPerQuery, kDefaultNumResultsToCache) {
-}
+    : PerformanceConfiguration(kMaxQueryLength, kMaxNumTotalHits) {}
 
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/performance-configuration.h b/icing/performance-configuration.h
index fa4050b..3ec67f3 100644
--- a/icing/performance-configuration.h
+++ b/icing/performance-configuration.h
@@ -23,29 +23,20 @@ struct PerformanceConfiguration {
   // Loads default configuration.
   PerformanceConfiguration();
 
-  PerformanceConfiguration(int max_query_length_in, int num_to_score_in,
-                           int max_num_hits_per_query_in,
-                           int max_num_cache_results_in)
+  PerformanceConfiguration(int max_query_length_in, int max_num_total_hits)
       : max_query_length(max_query_length_in),
-        num_to_score(num_to_score_in),
-        max_num_hits_per_query(max_num_hits_per_query_in),
-        max_num_cache_results(max_num_cache_results_in) {}
+        max_num_total_hits(max_num_total_hits) {}
 
   // Search performance
 
   // Maximum length of query to execute in IndexProcessor.
   int max_query_length;
 
-  // Number of results to score in ScoringProcessor for every query.
-  int num_to_score;
-
   // Memory
 
-  // Maximum number of ScoredDocumentHits to return per query.
-  int max_num_hits_per_query;
-
-  // Maximum number of ResultStates to store in ResultStateManager.
-  int max_num_cache_results;
+  // Maximum number of ScoredDocumentHits to cache in the ResultStateManager at
+  // one time.
+  int max_num_total_hits;
 };
 
 // TODO(b/149040810): Consider creating a class to manage performance
diff --git a/icing/portable/endian.h b/icing/portable/endian.h
new file mode 100644
index 0000000..ecebb15
--- /dev/null
+++ b/icing/portable/endian.h
@@ -0,0 +1,208 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Utility functions that depend on bytesex. We define versions of htonll and
+// ntohll (HostToNetworkLL and NetworkToHostLL in our naming), as well as
+// "Google" versions of all the standards: ghtonl, ghtons, and so on
+// (GHostToNetworkL, GHostToNetworkS, etc in our naming). These functions do
+// exactly the same as their standard variants, but don't require including the
+// dangerous netinet/in.h.
+
+#ifndef ICING_PORTABLE_ENDIAN_H_
+#define ICING_PORTABLE_ENDIAN_H_
+
+#include <cstdint>
+
+// IS_LITTLE_ENDIAN, IS_BIG_ENDIAN
+#if defined OS_LINUX || defined OS_ANDROID || defined(__ANDROID__)
+// _BIG_ENDIAN
+#include <endian.h>
+
+#elif defined(__APPLE__)
+
+// BIG_ENDIAN
+#include <machine/endian.h>  // NOLINT(build/include)
+
+/* Let's try and follow the Linux convention */
+#define __BYTE_ORDER BYTE_ORDER
+#define __LITTLE_ENDIAN LITTLE_ENDIAN
+#define __BIG_ENDIAN BIG_ENDIAN
+
+#endif  // operating system
+
+// defines __BYTE_ORDER for MSVC
+#ifdef COMPILER_MSVC
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#else  // COMPILER_MSVC
+
+// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
+// using the above endian definitions from endian.h if
+// endian.h was included
+#ifdef __BYTE_ORDER
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#endif  // __BYTE_ORDER == __LITTLE_ENDIAN
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define IS_BIG_ENDIAN
+#endif  // __BYTE_ORDER == __BIG_ENDIAN
+
+#else  // __BYTE_ORDER
+
+#if defined(__LITTLE_ENDIAN__)
+#define IS_LITTLE_ENDIAN
+#elif defined(__BIG_ENDIAN__)
+#define IS_BIG_ENDIAN
+#endif  // __LITTLE_ENDIAN__ or __BIG_ENDIAN__
+
+#endif  // __BYTE_ORDER
+#endif  // COMPILER_MSVC
+
+// byte swap functions (bswap_16, bswap_32, bswap_64).
+// byte swap functions reverse the order of bytes, e.g.
+//   byteswap of 102030405060708 = 807060504030201
+//   byteswap of 1020304 = 4030201
+
+// The following guarantees declaration of the byte swap functions
+#ifdef COMPILER_MSVC
+#include <cstdlib>  // NOLINT(build/include)
+
+#define bswap_16(x) _byteswap_ushort(x)
+#define bswap_32(x) _byteswap_ulong(x)
+#define bswap_64(x) _byteswap_uint64(x)
+
+#elif defined(__APPLE__)
+// Mac OS X / Darwin features
+#include <libkern/OSByteOrder.h>
+
+#define bswap_16(x) OSSwapInt16(x)
+#define bswap_32(x) OSSwapInt32(x)
+#define bswap_64(x) OSSwapInt64(x)
+
+#elif defined(__GLIBC__) || defined(__BIONIC__) || defined(__ASYLO__)
+#include <byteswap.h>  // IWYU pragma: export
+
+#else  // built-in byteswap functions
+
+static inline uint16 bswap_16(uint16 x) {
+#ifdef __cplusplus
+  return static_cast<uint16>(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8));
+#else   // __cplusplus
+  return (uint16)(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8));  // NOLINT
+#endif  // __cplusplus
+}
+#define bswap_16(x) bswap_16(x)
+static inline uint32 bswap_32(uint32 x) {
+  return (((x & 0xFF) << 24) | ((x & 0xFF00) << 8) | ((x & 0xFF0000) >> 8) |
+          ((x & 0xFF000000) >> 24));
+}
+#define bswap_32(x) bswap_32(x)
+static inline uint64 bswap_64(uint64 x) {
+  return (((x & (uint64_t)0xFF) << 56) | ((x & (uint64_t)0xFF00) << 40) |
+          ((x & (uint64_t)0xFF0000) << 24) | ((x & (uint64_t)0xFF000000) << 8) |
+          ((x & (uint64_t)0xFF00000000) >> 8) |
+          ((x & (uint64_t)0xFF0000000000) >> 24) |
+          ((x & (uint64_t)0xFF000000000000) >> 40) |
+          ((x & (uint64_t)0xFF00000000000000) >> 56));
+}
+#define bswap_64(x) bswap_64(x)
+
+#endif  // end byteswap functions
+
+// Use compiler byte-swapping intrinsics if they are available.  32-bit
+// and 64-bit versions are available in Clang and GCC as of GCC 4.3.0.
+// The 16-bit version is available in Clang and GCC only as of GCC 4.8.0.
+// For simplicity, we enable them all only for GCC 4.8.0 or later.
+#if defined(__clang__) || \
+    (defined(__GNUC__) && \
+     ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ >= 5))
+
+inline uint64_t gbswap_64(uint64_t host_int) {
+  return __builtin_bswap64(host_int);
+}
+inline uint32_t gbswap_32(uint32_t host_int) {
+  return __builtin_bswap32(host_int);
+}
+inline uint16_t gbswap_16(uint16_t host_int) {
+  return __builtin_bswap16(host_int);
+}
+
+#else  // intrinsics available
+
+inline uint64 gbswap_64(uint64 host_int) {
+#if defined(__GNUC__) && defined(__x86_64__) && \
+    !(defined(__APPLE__) && defined(__MACH__))
+  // Adapted from /usr/include/byteswap.h.  Not available on Mac.
+  if (__builtin_constant_p(host_int)) {
+    return __bswap_constant_64(host_int);
+  } else {
+    uint64 result;
+    __asm__("bswap %0" : "=r"(result) : "0"(host_int));
+    return result;
+  }
+#elif defined(bswap_64)
+  return bswap_64(host_int);
+#else   // bswap_64
+  return static_cast<uint64>(bswap_32(static_cast<uint32>(host_int >> 32))) |
+         (static_cast<uint64>(bswap_32(static_cast<uint32>(host_int))) << 32);
+#endif  // bswap_64
+}
+inline uint32 gbswap_32(uint32 host_int) { return bswap_32(host_int); }
+inline uint16 gbswap_16(uint16 host_int) { return bswap_16(host_int); }
+
+#endif  // intrinsics available
+
+#ifdef IS_LITTLE_ENDIAN
+
+// Definitions for ntohl etc. that don't require us to include
+// netinet/in.h. We wrap gbswap_32 and gbswap_16 in functions rather
+// than just #defining them because in debug mode, gcc doesn't
+// correctly handle the (rather involved) definitions of bswap_32.
+// gcc guarantees that inline functions are as fast as macros, so
+// this isn't a performance hit.
+inline uint16_t GHostToNetworkS(uint16_t x) { return gbswap_16(x); }
+inline uint32_t GHostToNetworkL(uint32_t x) { return gbswap_32(x); }
+inline uint64_t GHostToNetworkLL(uint64_t x) { return gbswap_64(x); }
+
+#elif defined IS_BIG_ENDIAN
+
+// These definitions are simpler on big-endian machines
+// These are functions instead of macros to avoid self-assignment warnings
+// on calls such as "i = ghtnol(i);".  This also provides type checking.
+inline uint16 GHostToNetworkS(uint16 x) { return x; }
+inline uint32 GHostToNetworkL(uint32 x) { return x; }
+inline uint64 GHostToNetworkLL(uint64 x) { return x; }
+
+#else  // bytesex
+#error \
+    "Unsupported bytesex: Either IS_BIG_ENDIAN or IS_LITTLE_ENDIAN must be defined"  // NOLINT
+#endif  // bytesex
+
+#ifndef HostToNetworkLL
+// With the rise of 64-bit, some systems are beginning to define this.
+#define HostToNetworkLL(x) GHostToNetworkLL(x)
+#endif  // HostToNetworkLL
+
+// ntoh* and hton* are the same thing for any size and bytesex,
+// since the function is an involution, i.e., its own inverse.
+inline uint16_t GNetworkToHostS(uint16_t x) { return GHostToNetworkS(x); }
+inline uint32_t GNetworkToHostL(uint32_t x) { return GHostToNetworkL(x); }
+inline uint64_t GNetworkToHostLL(uint64_t x) { return GHostToNetworkLL(x); }
+
+#ifndef NetworkToHostLL
+#define NetworkToHostLL(x) GHostToNetworkLL(x)
+#endif  // NetworkToHostLL
+
+#endif  // ICING_PORTABLE_ENDIAN_H_
diff --git a/icing/portable/equals-proto.h b/icing/portable/equals-proto.h
index 6a600be..8bb835e 100644
--- a/icing/portable/equals-proto.h
+++ b/icing/portable/equals-proto.h
@@ -20,8 +20,8 @@
 #ifndef ICING_PORTABLE_EQUALS_PROTO_H_
 #define ICING_PORTABLE_EQUALS_PROTO_H_
 
+#include "gmock/gmock.h"          // IWYU pragma: export
 #include <google/protobuf/message_lite.h>  // IWYU pragma: export
-#include "gmock/gmock.h"              // IWYU pragma: export
 
 #if defined(__ANDROID__) || defined(__APPLE__)
 namespace icing {
diff --git a/icing/portable/gzip_stream.cc b/icing/portable/gzip_stream.cc
new file mode 100644
index 0000000..f00a993
--- /dev/null
+++ b/icing/portable/gzip_stream.cc
@@ -0,0 +1,313 @@
+// Copyright (C) 2009 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file contains the implementation of classes GzipInputStream and
+// GzipOutputStream. It is forked from protobuf because these classes are only
+// provided in libprotobuf-full but we would like to link libicing against the
+// smaller libprotobuf-lite instead.
+
+#include "icing/portable/gzip_stream.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+namespace protobuf_ports {
+
+static const int kDefaultBufferSize = 65536;
+
+GzipInputStream::GzipInputStream(ZeroCopyInputStream* sub_stream, Format format,
+                                 int buffer_size)
+    : format_(format), sub_stream_(sub_stream), zerror_(Z_OK), byte_count_(0) {
+  zcontext_.state = Z_NULL;
+  zcontext_.zalloc = Z_NULL;
+  zcontext_.zfree = Z_NULL;
+  zcontext_.opaque = Z_NULL;
+  zcontext_.total_out = 0;
+  zcontext_.next_in = NULL;
+  zcontext_.avail_in = 0;
+  zcontext_.total_in = 0;
+  zcontext_.msg = NULL;
+  if (buffer_size == -1) {
+    output_buffer_length_ = kDefaultBufferSize;
+  } else {
+    output_buffer_length_ = buffer_size;
+  }
+  output_buffer_ = operator new(output_buffer_length_);
+  zcontext_.next_out = static_cast<Bytef*>(output_buffer_);
+  zcontext_.avail_out = output_buffer_length_;
+  output_position_ = output_buffer_;
+}
+GzipInputStream::~GzipInputStream() {
+  operator delete(output_buffer_);
+  zerror_ = inflateEnd(&zcontext_);
+}
+
+static inline int internalInflateInit2(z_stream* zcontext,
+                                       GzipInputStream::Format format) {
+  int windowBitsFormat = 0;
+  switch (format) {
+    case GzipInputStream::GZIP:
+      windowBitsFormat = 16;
+      break;
+    case GzipInputStream::AUTO:
+      windowBitsFormat = 32;
+      break;
+    case GzipInputStream::ZLIB:
+      windowBitsFormat = 0;
+      break;
+  }
+  return inflateInit2(zcontext, /* windowBits */ 15 | windowBitsFormat);
+}
+
+int GzipInputStream::Inflate(int flush) {
+  if ((zerror_ == Z_OK) && (zcontext_.avail_out == 0)) {
+    // previous inflate filled output buffer. don't change input params yet.
+  } else if (zcontext_.avail_in == 0) {
+    const void* in;
+    int in_size;
+    bool first = zcontext_.next_in == NULL;
+    bool ok = sub_stream_->Next(&in, &in_size);
+    if (!ok) {
+      zcontext_.next_out = NULL;
+      zcontext_.avail_out = 0;
+      return Z_STREAM_END;
+    }
+    zcontext_.next_in = static_cast<Bytef*>(const_cast<void*>(in));
+    zcontext_.avail_in = in_size;
+    if (first) {
+      int error = internalInflateInit2(&zcontext_, format_);
+      if (error != Z_OK) {
+        return error;
+      }
+    }
+  }
+  zcontext_.next_out = static_cast<Bytef*>(output_buffer_);
+  zcontext_.avail_out = output_buffer_length_;
+  output_position_ = output_buffer_;
+  int error = inflate(&zcontext_, flush);
+  return error;
+}
+
+void GzipInputStream::DoNextOutput(const void** data, int* size) {
+  *data = output_position_;
+  *size = ((uintptr_t)zcontext_.next_out) - ((uintptr_t)output_position_);
+  output_position_ = zcontext_.next_out;
+}
+
+// implements ZeroCopyInputStream ----------------------------------
+bool GzipInputStream::Next(const void** data, int* size) {
+  bool ok = (zerror_ == Z_OK) || (zerror_ == Z_STREAM_END) ||
+            (zerror_ == Z_BUF_ERROR);
+  if ((!ok) || (zcontext_.next_out == NULL)) {
+    return false;
+  }
+  if (zcontext_.next_out != output_position_) {
+    DoNextOutput(data, size);
+    return true;
+  }
+  if (zerror_ == Z_STREAM_END) {
+    if (zcontext_.next_out != NULL) {
+      // sub_stream_ may have concatenated streams to follow
+      zerror_ = inflateEnd(&zcontext_);
+      byte_count_ += zcontext_.total_out;
+      if (zerror_ != Z_OK) {
+        return false;
+      }
+      zerror_ = internalInflateInit2(&zcontext_, format_);
+      if (zerror_ != Z_OK) {
+        return false;
+      }
+    } else {
+      *data = NULL;
+      *size = 0;
+      return false;
+    }
+  }
+  zerror_ = Inflate(Z_NO_FLUSH);
+  if ((zerror_ == Z_STREAM_END) && (zcontext_.next_out == NULL)) {
+    // The underlying stream's Next returned false inside Inflate.
+    return false;
+  }
+  ok = (zerror_ == Z_OK) || (zerror_ == Z_STREAM_END) ||
+       (zerror_ == Z_BUF_ERROR);
+  if (!ok) {
+    return false;
+  }
+  DoNextOutput(data, size);
+  return true;
+}
+void GzipInputStream::BackUp(int count) {
+  output_position_ = reinterpret_cast<void*>(
+      reinterpret_cast<uintptr_t>(output_position_) - count);
+}
+bool GzipInputStream::Skip(int count) {
+  const void* data;
+  int size = 0;
+  bool ok = Next(&data, &size);
+  while (ok && (size < count)) {
+    count -= size;
+    ok = Next(&data, &size);
+  }
+  if (size > count) {
+    BackUp(size - count);
+  }
+  return ok;
+}
+int64_t GzipInputStream::ByteCount() const {
+  int64_t ret = byte_count_ + zcontext_.total_out;
+  if (zcontext_.next_out != NULL && output_position_ != NULL) {
+    ret += reinterpret_cast<uintptr_t>(zcontext_.next_out) -
+           reinterpret_cast<uintptr_t>(output_position_);
+  }
+  return ret;
+}
+
+// =========================================================================
+
+GzipOutputStream::Options::Options()
+    : format(GZIP),
+      buffer_size(kDefaultBufferSize),
+      compression_level(Z_DEFAULT_COMPRESSION),
+      compression_strategy(Z_DEFAULT_STRATEGY) {}
+
+GzipOutputStream::GzipOutputStream(ZeroCopyOutputStream* sub_stream) {
+  Init(sub_stream, Options());
+}
+
+GzipOutputStream::GzipOutputStream(ZeroCopyOutputStream* sub_stream,
+                                   const Options& options) {
+  Init(sub_stream, options);
+}
+
+void GzipOutputStream::Init(ZeroCopyOutputStream* sub_stream,
+                            const Options& options) {
+  sub_stream_ = sub_stream;
+  sub_data_ = NULL;
+  sub_data_size_ = 0;
+
+  input_buffer_length_ = options.buffer_size;
+  input_buffer_ = operator new(input_buffer_length_);
+
+  zcontext_.zalloc = Z_NULL;
+  zcontext_.zfree = Z_NULL;
+  zcontext_.opaque = Z_NULL;
+  zcontext_.next_out = NULL;
+  zcontext_.avail_out = 0;
+  zcontext_.total_out = 0;
+  zcontext_.next_in = NULL;
+  zcontext_.avail_in = 0;
+  zcontext_.total_in = 0;
+  zcontext_.msg = NULL;
+  // default to GZIP format
+  int windowBitsFormat = 16;
+  if (options.format == ZLIB) {
+    windowBitsFormat = 0;
+  }
+  zerror_ =
+      deflateInit2(&zcontext_, options.compression_level, Z_DEFLATED,
+                   /* windowBits */ 15 | windowBitsFormat,
+                   /* memLevel (default) */ 8, options.compression_strategy);
+}
+
+GzipOutputStream::~GzipOutputStream() {
+  Close();
+  operator delete(input_buffer_);
+}
+
+// private
+int GzipOutputStream::Deflate(int flush) {
+  int error = Z_OK;
+  do {
+    if ((sub_data_ == NULL) || (zcontext_.avail_out == 0)) {
+      bool ok = sub_stream_->Next(&sub_data_, &sub_data_size_);
+      if (!ok) {
+        sub_data_ = NULL;
+        sub_data_size_ = 0;
+        return Z_BUF_ERROR;
+      }
+      if (sub_data_size_ <= 0) {
+        ICING_LOG(FATAL) << "Failed to advance underlying stream";
+      }
+      zcontext_.next_out = static_cast<Bytef*>(sub_data_);
+      zcontext_.avail_out = sub_data_size_;
+    }
+    error = deflate(&zcontext_, flush);
+  } while (error == Z_OK && zcontext_.avail_out == 0);
+  if ((flush == Z_FULL_FLUSH) || (flush == Z_FINISH)) {
+    // Notify lower layer of data.
+    sub_stream_->BackUp(zcontext_.avail_out);
+    // We don't own the buffer anymore.
+    sub_data_ = NULL;
+    sub_data_size_ = 0;
+  }
+  return error;
+}
+
+// implements ZeroCopyOutputStream ---------------------------------
+bool GzipOutputStream::Next(void** data, int* size) {
+  if ((zerror_ != Z_OK) && (zerror_ != Z_BUF_ERROR)) {
+    return false;
+  }
+  if (zcontext_.avail_in != 0) {
+    zerror_ = Deflate(Z_NO_FLUSH);
+    if (zerror_ != Z_OK) {
+      return false;
+    }
+  }
+  if (zcontext_.avail_in == 0) {
+    // all input was consumed. reset the buffer.
+    zcontext_.next_in = static_cast<Bytef*>(input_buffer_);
+    zcontext_.avail_in = input_buffer_length_;
+    *data = input_buffer_;
+    *size = input_buffer_length_;
+  } else {
+    // The loop in Deflate should consume all avail_in
+    ICING_LOG(ERROR) << "Deflate left bytes unconsumed";
+  }
+  return true;
+}
+void GzipOutputStream::BackUp(int count) {
+  if (zcontext_.avail_in < static_cast<uInt>(count)) {
+    ICING_LOG(FATAL) << "Not enough data to back up " << count << " bytes";
+  }
+  zcontext_.avail_in -= count;
+}
+int64_t GzipOutputStream::ByteCount() const {
+  return zcontext_.total_in + zcontext_.avail_in;
+}
+
+bool GzipOutputStream::Flush() {
+  zerror_ = Deflate(Z_FULL_FLUSH);
+  // Return true if the flush succeeded or if it was a no-op.
+  return (zerror_ == Z_OK) ||
+         (zerror_ == Z_BUF_ERROR && zcontext_.avail_in == 0 &&
+          zcontext_.avail_out != 0);
+}
+
+bool GzipOutputStream::Close() {
+  if ((zerror_ != Z_OK) && (zerror_ != Z_BUF_ERROR)) {
+    return false;
+  }
+  do {
+    zerror_ = Deflate(Z_FINISH);
+  } while (zerror_ == Z_OK);
+  zerror_ = deflateEnd(&zcontext_);
+  bool ok = zerror_ == Z_OK;
+  zerror_ = Z_STREAM_END;
+  return ok;
+}
+
+}  // namespace protobuf_ports
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/portable/gzip_stream.h b/icing/portable/gzip_stream.h
new file mode 100644
index 0000000..8008a55
--- /dev/null
+++ b/icing/portable/gzip_stream.h
@@ -0,0 +1,177 @@
+// Copyright (C) 2009 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file contains the definition for classes GzipInputStream and
+// GzipOutputStream. It is forked from protobuf because these classes are only
+// provided in libprotobuf-full but we would like to link libicing against the
+// smaller libprotobuf-lite instead.
+//
+// GzipInputStream decompresses data from an underlying
+// ZeroCopyInputStream and provides the decompressed data as a
+// ZeroCopyInputStream.
+//
+// GzipOutputStream is an ZeroCopyOutputStream that compresses data to
+// an underlying ZeroCopyOutputStream.
+
+#ifndef GOOGLE3_ICING_PORTABLE_GZIP_STREAM_H_
+#define GOOGLE3_ICING_PORTABLE_GZIP_STREAM_H_
+
+#include "icing/portable/zlib.h"
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+
+namespace icing {
+namespace lib {
+namespace protobuf_ports {
+
+// A ZeroCopyInputStream that reads compressed data through zlib
+class GzipInputStream : public google::protobuf::io::ZeroCopyInputStream {
+ public:
+  // Format key for constructor
+  enum Format {
+    // zlib will autodetect gzip header or deflate stream
+    AUTO = 0,
+
+    // GZIP streams have some extra header data for file attributes.
+    GZIP = 1,
+
+    // Simpler zlib stream format.
+    ZLIB = 2,
+  };
+
+  // buffer_size and format may be -1 for default of 64kB and GZIP format
+  explicit GzipInputStream(google::protobuf::io::ZeroCopyInputStream* sub_stream,
+                           Format format = AUTO, int buffer_size = -1);
+  virtual ~GzipInputStream();
+
+  // Return last error message or NULL if no error.
+  inline const char* ZlibErrorMessage() const { return zcontext_.msg; }
+  inline int ZlibErrorCode() const { return zerror_; }
+
+  // implements ZeroCopyInputStream ----------------------------------
+  bool Next(const void** data, int* size) override;
+  void BackUp(int count) override;
+  bool Skip(int count) override;
+  int64_t ByteCount() const override;
+
+ private:
+  Format format_;
+
+  google::protobuf::io::ZeroCopyInputStream* sub_stream_;
+
+  z_stream zcontext_;
+  int zerror_;
+
+  void* output_buffer_;
+  void* output_position_;
+  size_t output_buffer_length_;
+  int64_t byte_count_;
+
+  int Inflate(int flush);
+  void DoNextOutput(const void** data, int* size);
+};
+
+class GzipOutputStream : public google::protobuf::io::ZeroCopyOutputStream {
+ public:
+  // Format key for constructor
+  enum Format {
+    // GZIP streams have some extra header data for file attributes.
+    GZIP = 1,
+
+    // Simpler zlib stream format.
+    ZLIB = 2,
+  };
+
+  struct Options {
+    // Defaults to GZIP.
+    Format format;
+
+    // What size buffer to use internally.  Defaults to 64kB.
+    int buffer_size;
+
+    // A number between 0 and 9, where 0 is no compression and 9 is best
+    // compression.  Defaults to Z_DEFAULT_COMPRESSION (see zlib.h).
+    int compression_level;
+
+    // Defaults to Z_DEFAULT_STRATEGY.  Can also be set to Z_FILTERED,
+    // Z_HUFFMAN_ONLY, or Z_RLE.  See the documentation for deflateInit2 in
+    // zlib.h for definitions of these constants.
+    int compression_strategy;
+
+    Options();  // Initializes with default values.
+  };
+
+  // Create a GzipOutputStream with default options.
+  explicit GzipOutputStream(google::protobuf::io::ZeroCopyOutputStream* sub_stream);
+
+  // Create a GzipOutputStream with the given options.
+  GzipOutputStream(google::protobuf::io::ZeroCopyOutputStream* sub_stream,
+                   const Options& options);
+
+  virtual ~GzipOutputStream();
+
+  // Return last error message or NULL if no error.
+  inline const char* ZlibErrorMessage() const { return zcontext_.msg; }
+  inline int ZlibErrorCode() const { return zerror_; }
+
+  // Flushes data written so far to zipped data in the underlying stream.
+  // It is the caller's responsibility to flush the underlying stream if
+  // necessary.
+  // Compression may be less efficient stopping and starting around flushes.
+  // Returns true if no error.
+  //
+  // Please ensure that block size is > 6. Here is an excerpt from the zlib
+  // doc that explains why:
+  //
+  // In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that avail_out
+  // is greater than six to avoid repeated flush markers due to
+  // avail_out == 0 on return.
+  bool Flush();
+
+  // Writes out all data and closes the gzip stream.
+  // It is the caller's responsibility to close the underlying stream if
+  // necessary.
+  // Returns true if no error.
+  bool Close();
+
+  // implements ZeroCopyOutputStream ---------------------------------
+  bool Next(void** data, int* size) override;
+  void BackUp(int count) override;
+  int64_t ByteCount() const override;
+
+ private:
+  google::protobuf::io::ZeroCopyOutputStream* sub_stream_;
+  // Result from calling Next() on sub_stream_
+  void* sub_data_;
+  int sub_data_size_;
+
+  z_stream zcontext_;
+  int zerror_;
+  void* input_buffer_;
+  size_t input_buffer_length_;
+
+  // Shared constructor code.
+  void Init(google::protobuf::io::ZeroCopyOutputStream* sub_stream,
+            const Options& options);
+
+  // Do some compression.
+  // Takes zlib flush mode.
+  // Returns zlib error code.
+  int Deflate(int flush);
+};
+
+}  // namespace protobuf_ports
+}  // namespace lib
+}  // namespace icing
+
+#endif  // GOOGLE3_ICING_PORTABLE_GZIP_STREAM_H_
diff --git a/icing/portable/platform.h b/icing/portable/platform.h
new file mode 100644
index 0000000..6d8c668
--- /dev/null
+++ b/icing/portable/platform.h
@@ -0,0 +1,106 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_PORTABLE_PLATFORM_H_
+#define ICING_PORTABLE_PLATFORM_H_
+
+#include "unicode/uconfig.h"  // IWYU pragma: keep
+// clang-format: do not reorder the above include.
+
+#include "unicode/uvernum.h"
+
+namespace icing {
+namespace lib {
+
+// Returns true if the test was built with the CFStringTokenizer as the
+// implementation of LanguageSegmenter.
+inline bool IsCfStringTokenization() {
+#if defined(__APPLE__) && !defined(ICING_IOS_ICU4C_SEGMENTATION)
+  return true;
+#endif  // defined(__APPLE__) && !defined(ICING_IOS_ICU4C_SEGMENTATION)
+  return false;
+}
+
+inline bool IsReverseJniTokenization() {
+#ifdef ICING_REVERSE_JNI_SEGMENTATION
+  return true;
+#endif  // ICING_REVERSE_JNI_SEGMENTATION
+  return false;
+}
+
+inline bool IsIcuTokenization() {
+  return !IsReverseJniTokenization() && !IsCfStringTokenization();
+}
+
+inline int GetIcuTokenizationVersion() {
+  return IsIcuTokenization() ? U_ICU_VERSION_MAJOR_NUM : 0;
+}
+
+// Whether we're running on android_x86
+inline bool IsAndroidX86() {
+#if defined(__ANDROID__) && defined(__i386__)
+  return true;
+#endif  // defined(__ANDROID__) && defined(__i386__)
+  return false;
+}
+
+// Whether we're running on android_armeabi-v7a
+inline bool IsAndroidArm() {
+#if defined(__ANDROID__) && defined(__arm__)
+  return true;
+#endif  // defined(__ANDROID__) && defined(__arm__)
+  return false;
+}
+
+// Whether the running test is an iOS test.
+inline bool IsIosPlatform() {
+#if defined(__APPLE__)
+  return true;
+#endif  // defined(__APPLE__)
+  return false;
+}
+
+// TODO(b/259129263): verify the flag works for different platforms.
+#if defined(__arm__) || defined(__i386__)
+#define ICING_ARCH_BIT_32
+#elif defined(__aarch64__) || defined(__x86_64__)
+#define ICING_ARCH_BIT_64
+#else
+#define ICING_ARCH_BIT_UNKNOWN
+#endif
+
+enum Architecture {
+  UNKNOWN,
+  BIT_32,
+  BIT_64,
+};
+
+// Returns which architecture we're running on.
+//
+// Architecture macros pulled from
+// https://developer.android.com/ndk/guides/cpu-features
+inline Architecture GetArchitecture() {
+#if defined(ICING_ARCH_BIT_32)
+  return BIT_32;
+#elif defined(ICING_ARCH_BIT_64)
+  return BIT_64;
+#else
+  return UNKNOWN;
+#endif
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_PORTABLE_PLATFORM_H_
diff --git a/icing/query/advanced_query_parser/abstract-syntax-tree-test-utils.h b/icing/query/advanced_query_parser/abstract-syntax-tree-test-utils.h
new file mode 100644
index 0000000..42be07d
--- /dev/null
+++ b/icing/query/advanced_query_parser/abstract-syntax-tree-test-utils.h
@@ -0,0 +1,108 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_ABSTRACT_SYNTAX_TREE_TEST_UTILS_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_ABSTRACT_SYNTAX_TREE_TEST_UTILS_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
+
+namespace icing {
+namespace lib {
+
+// A visitor that simply collects the nodes and flattens them in left-side
+// depth-first order.
+enum class NodeType {
+  kFunctionName,
+  kString,
+  kText,
+  kMember,
+  kFunction,
+  kUnaryOperator,
+  kNaryOperator
+};
+
+struct NodeInfo {
+  std::string value;
+  NodeType type;
+
+  bool operator==(const NodeInfo& rhs) const {
+    return value == rhs.value && type == rhs.type;
+  }
+};
+
+MATCHER_P2(EqualsNodeInfo, value, type, "") {
+  if (arg.value != value || arg.type != type) {
+    *result_listener << "(Expected: value=\"" << value
+                     << "\", type=" << static_cast<int>(type)
+                     << ". Actual: value=\"" << arg.value
+                     << "\", type=" << static_cast<int>(arg.type) << ")";
+    return false;
+  }
+  return true;
+}
+
+class SimpleVisitor : public AbstractSyntaxTreeVisitor {
+ public:
+  void VisitFunctionName(const FunctionNameNode* node) override {
+    nodes_.push_back({node->value(), NodeType::kFunctionName});
+  }
+  void VisitString(const StringNode* node) override {
+    nodes_.push_back({node->value(), NodeType::kString});
+  }
+  void VisitText(const TextNode* node) override {
+    nodes_.push_back({node->value(), NodeType::kText});
+  }
+  void VisitMember(const MemberNode* node) override {
+    for (const std::unique_ptr<TextNode>& child : node->children()) {
+      child->Accept(this);
+    }
+    if (node->function() != nullptr) {
+      node->function()->Accept(this);
+    }
+    nodes_.push_back({"", NodeType::kMember});
+  }
+  void VisitFunction(const FunctionNode* node) override {
+    node->function_name()->Accept(this);
+    for (const std::unique_ptr<Node>& arg : node->args()) {
+      arg->Accept(this);
+    }
+    nodes_.push_back({"", NodeType::kFunction});
+  }
+  void VisitUnaryOperator(const UnaryOperatorNode* node) override {
+    node->child()->Accept(this);
+    nodes_.push_back({node->operator_text(), NodeType::kUnaryOperator});
+  }
+  void VisitNaryOperator(const NaryOperatorNode* node) override {
+    for (const std::unique_ptr<Node>& child : node->children()) {
+      child->Accept(this);
+    }
+    nodes_.push_back({node->operator_text(), NodeType::kNaryOperator});
+  }
+
+  const std::vector<NodeInfo>& nodes() const { return nodes_; }
+
+ private:
+  std::vector<NodeInfo> nodes_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_ADVANCED_QUERY_PARSER_ABSTRACT_SYNTAX_TREE_TEST_UTILS_H_
diff --git a/icing/query/advanced_query_parser/abstract-syntax-tree.h b/icing/query/advanced_query_parser/abstract-syntax-tree.h
new file mode 100644
index 0000000..67049ad
--- /dev/null
+++ b/icing/query/advanced_query_parser/abstract-syntax-tree.h
@@ -0,0 +1,184 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_ABSTRACT_SYNTAX_TREE_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_ABSTRACT_SYNTAX_TREE_H_
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+namespace icing {
+namespace lib {
+
+class FunctionNameNode;
+class StringNode;
+class TextNode;
+class MemberNode;
+class FunctionNode;
+class UnaryOperatorNode;
+class NaryOperatorNode;
+
+class AbstractSyntaxTreeVisitor {
+ public:
+  virtual ~AbstractSyntaxTreeVisitor() = default;
+
+  virtual void VisitFunctionName(const FunctionNameNode* node) = 0;
+  virtual void VisitString(const StringNode* node) = 0;
+  virtual void VisitText(const TextNode* node) = 0;
+  virtual void VisitMember(const MemberNode* node) = 0;
+  virtual void VisitFunction(const FunctionNode* node) = 0;
+  virtual void VisitUnaryOperator(const UnaryOperatorNode* node) = 0;
+  virtual void VisitNaryOperator(const NaryOperatorNode* node) = 0;
+};
+
+class Node {
+ public:
+  virtual ~Node() = default;
+  virtual void Accept(AbstractSyntaxTreeVisitor* visitor) const = 0;
+};
+
+class TerminalNode : public Node {
+ public:
+  explicit TerminalNode(std::string value, std::string_view raw_value,
+                        bool is_prefix)
+      : value_(std::move(value)),
+        raw_value_(raw_value),
+        is_prefix_(is_prefix) {}
+
+  const std::string& value() const& { return value_; }
+  std::string value() && { return std::move(value_); }
+
+  bool is_prefix() const { return is_prefix_; }
+
+  std::string_view raw_value() const { return raw_value_; }
+
+ private:
+  std::string value_;
+  std::string_view raw_value_;
+  bool is_prefix_;
+};
+
+class FunctionNameNode : public TerminalNode {
+ public:
+  explicit FunctionNameNode(std::string value)
+      : TerminalNode(std::move(value), /*raw_value=*/"", /*is_prefix=*/false) {}
+  void Accept(AbstractSyntaxTreeVisitor* visitor) const override {
+    visitor->VisitFunctionName(this);
+  }
+};
+
+class StringNode : public TerminalNode {
+ public:
+  explicit StringNode(std::string value, std::string_view raw_value,
+                      bool is_prefix = false)
+      : TerminalNode(std::move(value), raw_value, is_prefix) {}
+  void Accept(AbstractSyntaxTreeVisitor* visitor) const override {
+    visitor->VisitString(this);
+  }
+};
+
+class TextNode : public TerminalNode {
+ public:
+  explicit TextNode(std::string value, std::string_view raw_value,
+                    bool is_prefix = false)
+      : TerminalNode(std::move(value), raw_value, is_prefix) {}
+  void Accept(AbstractSyntaxTreeVisitor* visitor) const override {
+    visitor->VisitText(this);
+  }
+};
+
+class MemberNode : public Node {
+ public:
+  explicit MemberNode(std::vector<std::unique_ptr<TextNode>> children,
+                      std::unique_ptr<FunctionNode> function)
+      : children_(std::move(children)), function_(std::move(function)) {}
+
+  void Accept(AbstractSyntaxTreeVisitor* visitor) const override {
+    visitor->VisitMember(this);
+  }
+  const std::vector<std::unique_ptr<TextNode>>& children() const {
+    return children_;
+  }
+  const FunctionNode* function() const { return function_.get(); }
+
+ private:
+  std::vector<std::unique_ptr<TextNode>> children_;
+  // This is nullable. When it is not nullptr, this class will represent a
+  // function call.
+  std::unique_ptr<FunctionNode> function_;
+};
+
+class FunctionNode : public Node {
+ public:
+  explicit FunctionNode(std::unique_ptr<FunctionNameNode> function_name)
+      : FunctionNode(std::move(function_name), {}) {}
+  explicit FunctionNode(std::unique_ptr<FunctionNameNode> function_name,
+                        std::vector<std::unique_ptr<Node>> args)
+      : function_name_(std::move(function_name)), args_(std::move(args)) {}
+
+  void Accept(AbstractSyntaxTreeVisitor* visitor) const override {
+    visitor->VisitFunction(this);
+  }
+  const FunctionNameNode* function_name() const { return function_name_.get(); }
+  const std::vector<std::unique_ptr<Node>>& args() const { return args_; }
+
+ private:
+  std::unique_ptr<FunctionNameNode> function_name_;
+  std::vector<std::unique_ptr<Node>> args_;
+};
+
+class UnaryOperatorNode : public Node {
+ public:
+  explicit UnaryOperatorNode(std::string operator_text,
+                             std::unique_ptr<Node> child)
+      : operator_text_(std::move(operator_text)), child_(std::move(child)) {}
+
+  void Accept(AbstractSyntaxTreeVisitor* visitor) const override {
+    visitor->VisitUnaryOperator(this);
+  }
+  const std::string& operator_text() const { return operator_text_; }
+  const Node* child() const { return child_.get(); }
+
+ private:
+  std::string operator_text_;
+  std::unique_ptr<Node> child_;
+};
+
+class NaryOperatorNode : public Node {
+ public:
+  explicit NaryOperatorNode(std::string operator_text,
+                            std::vector<std::unique_ptr<Node>> children)
+      : operator_text_(std::move(operator_text)),
+        children_(std::move(children)) {}
+
+  void Accept(AbstractSyntaxTreeVisitor* visitor) const override {
+    visitor->VisitNaryOperator(this);
+  }
+  const std::string& operator_text() const { return operator_text_; }
+  const std::vector<std::unique_ptr<Node>>& children() const {
+    return children_;
+  }
+
+ private:
+  std::string operator_text_;
+  std::vector<std::unique_ptr<Node>> children_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_ADVANCED_QUERY_PARSER_ABSTRACT_SYNTAX_TREE_H_
diff --git a/icing/query/advanced_query_parser/abstract-syntax-tree_test.cc b/icing/query/advanced_query_parser/abstract-syntax-tree_test.cc
new file mode 100644
index 0000000..5e28278
--- /dev/null
+++ b/icing/query/advanced_query_parser/abstract-syntax-tree_test.cc
@@ -0,0 +1,143 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
+
+#include <memory>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/query/advanced_query_parser/abstract-syntax-tree-test-utils.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+using ::testing::ElementsAre;
+
+TEST(AbstractSyntaxTreeTest, Simple) {
+  std::string_view query = "foo";
+  std::unique_ptr<Node> root = std::make_unique<TextNode>("foo", query);
+  SimpleVisitor visitor;
+  root->Accept(&visitor);
+
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText)));
+}
+
+TEST(AbstractSyntaxTreeTest, Composite) {
+  std::string_view query = "(foo bar) OR baz";
+  std::vector<std::unique_ptr<Node>> and_args;
+  and_args.push_back(std::make_unique<TextNode>("foo", query.substr(1, 3)));
+  and_args.push_back(std::make_unique<TextNode>("bar", query.substr(5, 3)));
+  auto and_node =
+      std::make_unique<NaryOperatorNode>("AND", std::move(and_args));
+
+  std::vector<std::unique_ptr<Node>> or_args;
+  or_args.push_back(std::move(and_node));
+  or_args.push_back(std::make_unique<TextNode>("baz", query.substr(13, 3)));
+  std::unique_ptr<Node> root =
+      std::make_unique<NaryOperatorNode>("OR", std::move(or_args));
+
+  SimpleVisitor visitor;
+  root->Accept(&visitor);
+
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("AND", NodeType::kNaryOperator),
+                          EqualsNodeInfo("baz", NodeType::kText),
+                          EqualsNodeInfo("OR", NodeType::kNaryOperator)));
+}
+
+TEST(AbstractSyntaxTreeTest, Function) {
+  // foo()
+  std::unique_ptr<Node> root =
+      std::make_unique<FunctionNode>(std::make_unique<FunctionNameNode>("foo"));
+  SimpleVisitor visitor;
+  root->Accept(&visitor);
+
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("", NodeType::kFunction)));
+
+  std::string_view query = "foo(\"bar\")";
+  std::vector<std::unique_ptr<Node>> args;
+  args.push_back(std::make_unique<StringNode>("bar", query.substr(5, 3)));
+  root = std::make_unique<FunctionNode>(
+      std::make_unique<FunctionNameNode>("foo"), std::move(args));
+  visitor = SimpleVisitor();
+  root->Accept(&visitor);
+
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("bar", NodeType::kString),
+                          EqualsNodeInfo("", NodeType::kFunction)));
+
+  query = "foo(bar(\"baz\"))";
+  std::vector<std::unique_ptr<Node>> inner_args;
+  inner_args.push_back(std::make_unique<StringNode>("baz", query.substr(9, 3)));
+  args.clear();
+  args.push_back(std::make_unique<FunctionNode>(
+      std::make_unique<FunctionNameNode>("bar"), std::move(inner_args)));
+  root = std::make_unique<FunctionNode>(
+      std::make_unique<FunctionNameNode>("foo"), std::move(args));
+  visitor = SimpleVisitor();
+  root->Accept(&visitor);
+
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("bar", NodeType::kFunctionName),
+                          EqualsNodeInfo("baz", NodeType::kString),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("", NodeType::kFunction)));
+}
+
+TEST(AbstractSyntaxTreeTest, Restriction) {
+  std::string_view query = "sender.name:(IMPORTANT OR URGENT)";
+  std::vector<std::unique_ptr<TextNode>> member_args;
+  member_args.push_back(
+      std::make_unique<TextNode>("sender", query.substr(0, 6)));
+  member_args.push_back(std::make_unique<TextNode>("name", query.substr(7, 4)));
+
+  std::vector<std::unique_ptr<Node>> or_args;
+  or_args.push_back(
+      std::make_unique<TextNode>("IMPORTANT", query.substr(13, 9)));
+  or_args.push_back(std::make_unique<TextNode>("URGENT", query.substr(26, 6)));
+
+  std::vector<std::unique_ptr<Node>> has_args;
+  has_args.push_back(std::make_unique<MemberNode>(std::move(member_args),
+                                                  /*function=*/nullptr));
+  has_args.push_back(
+      std::make_unique<NaryOperatorNode>("OR", std::move(or_args)));
+
+  std::unique_ptr<Node> root =
+      std::make_unique<NaryOperatorNode>(":", std::move(has_args));
+
+  SimpleVisitor visitor;
+  root->Accept(&visitor);
+
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("sender", NodeType::kText),
+                          EqualsNodeInfo("name", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("IMPORTANT", NodeType::kText),
+                          EqualsNodeInfo("URGENT", NodeType::kText),
+                          EqualsNodeInfo("OR", NodeType::kNaryOperator),
+                          EqualsNodeInfo(":", NodeType::kNaryOperator)));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/query/advanced_query_parser/function.cc b/icing/query/advanced_query_parser/function.cc
new file mode 100644
index 0000000..e7938db
--- /dev/null
+++ b/icing/query/advanced_query_parser/function.cc
@@ -0,0 +1,77 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "icing/query/advanced_query_parser/function.h"
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+/*static*/ libtextclassifier3::StatusOr<Function> Function::Create(
+    DataType return_type, std::string name, std::vector<Param> params,
+    Function::EvalFunction eval) {
+  bool has_had_optional = false;
+  for (int i = 0; i < params.size(); ++i) {
+    switch (params.at(i).cardinality) {
+      case Cardinality::kVariable:
+        if (i != params.size() - 1) {
+          return absl_ports::InvalidArgumentError(
+              "Can only specify a variable param as the final param.");
+        }
+        break;
+      case Cardinality::kOptional:
+        has_had_optional = true;
+        break;
+      case Cardinality::kRequired:
+        if (has_had_optional) {
+          return absl_ports::InvalidArgumentError(
+              "Can't specify optional params followed by required params.");
+        }
+        break;
+    }
+  }
+  return Function(return_type, std::move(name), std::move(params),
+                  std::move(eval));
+}
+
+libtextclassifier3::StatusOr<PendingValue> Function::Eval(
+    std::vector<PendingValue>&& args) const {
+  for (int i = 0; i < params_.size() || i < args.size(); ++i) {
+    if (i < args.size() && i < params_.size()) {
+      ICING_RETURN_IF_ERROR(params_.at(i).Matches(args.at(i)));
+    } else if (i >= params_.size()) {
+      // There are remaining args. This would happen if the final arg is
+      // kVariable.
+      if (params_.empty() ||
+          params_.rbegin()->cardinality != Cardinality::kVariable) {
+        return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+            "Expected to find only ", std::to_string(params_.size()),
+            " arguments, but found ", std::to_string(args.size())));
+      }
+      ICING_RETURN_IF_ERROR(params_.rbegin()->Matches(args.at(i)));
+    } else if (params_.at(i).cardinality == Cardinality::kRequired) {
+      // There are no more args, but there are still params to check for. If
+      // These params are kRequired, then there is an error.
+      return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+          "Expected to find ", std::to_string(i + 1), "th argument, but only ",
+          std::to_string(args.size()), " arguments provided."));
+    }
+  }
+  return eval_(std::move(args));
+}
+
+}  // namespace lib
+}  // namespace icing
+\ No newline at end of file
diff --git a/icing/query/advanced_query_parser/function.h b/icing/query/advanced_query_parser/function.h
new file mode 100644
index 0000000..3514878
--- /dev/null
+++ b/icing/query/advanced_query_parser/function.h
@@ -0,0 +1,66 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_FUNCTION_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_FUNCTION_H_
+
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/query/advanced_query_parser/param.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
+
+namespace icing {
+namespace lib {
+
+class Function {
+ public:
+  using EvalFunction = std::function<libtextclassifier3::StatusOr<PendingValue>(
+      std::vector<PendingValue>&&)>;
+
+  static libtextclassifier3::StatusOr<Function> Create(
+      DataType return_type, std::string name, std::vector<Param> params,
+      EvalFunction eval);
+
+  Function(const Function& rhs) = default;
+  Function(Function&& rhs) = default;
+
+  Function& operator=(const Function& rhs) = default;
+  Function& operator=(Function&& rhs) = default;
+
+  const std::string& name() const { return name_; }
+
+  libtextclassifier3::StatusOr<PendingValue> Eval(
+      std::vector<PendingValue>&& args) const;
+
+ private:
+  Function(DataType return_type, std::string name, std::vector<Param> params,
+           EvalFunction eval)
+      : name_(std::move(name)),
+        params_(std::move(params)),
+        eval_(std::move(eval)),
+        return_type_(return_type) {}
+
+  std::string name_;
+  std::vector<Param> params_;
+  EvalFunction eval_;
+  DataType return_type_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_ADVANCED_QUERY_PARSER_FUNCTION_H_
diff --git a/icing/query/advanced_query_parser/function_test.cc b/icing/query/advanced_query_parser/function_test.cc
new file mode 100644
index 0000000..afd4e04
--- /dev/null
+++ b/icing/query/advanced_query_parser/function_test.cc
@@ -0,0 +1,332 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "icing/query/advanced_query_parser/function.h"
+
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gtest/gtest.h"
+#include "icing/query/advanced_query_parser/param.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::IsTrue;
+
+struct TrivialEval {
+  libtextclassifier3::StatusOr<PendingValue> operator()(
+      const std::vector<PendingValue>&) const {
+    return PendingValue();
+  }
+};
+
+TEST(FunctionTest, NoParamCreateSucceeds) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      Function function, Function::Create(/*return_type=*/DataType::kString,
+                                          "foo", /*params=*/{}, TrivialEval()));
+  // foo()
+  std::vector<PendingValue> empty_args;
+  ICING_ASSERT_OK_AND_ASSIGN(PendingValue val,
+                             function.Eval(std::move(empty_args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, NoParamNonEmptyArgsFails) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      Function function, Function::Create(/*return_type=*/DataType::kString,
+                                          "foo", /*params=*/{}, TrivialEval()));
+
+  // foo()
+  std::vector<PendingValue> args;
+  args.push_back(PendingValue());
+  EXPECT_THAT(function.Eval(std::move(args)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, ParamNotWrongTypeFails) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      Function function,
+      Function::Create(/*return_type=*/DataType::kString, "foo",
+                       /*params=*/{Param(DataType::kString)}, TrivialEval()));
+  std::string_view query = "foo(bar)";
+  std::vector<PendingValue> args;
+  args.push_back(PendingValue::CreateTextPendingValue(
+      QueryTerm{"bar", query.substr(4, 3), /*is_prefix_val=*/false}));
+  EXPECT_THAT(function.Eval(std::move(args)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, ParamRequiredArgSucceeds) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      Function function,
+      Function::Create(/*return_type=*/DataType::kString, "foo",
+                       /*params=*/{Param(DataType::kString)}, TrivialEval()));
+
+  std::string_view query = R"(foo("bar"))";
+  std::vector<PendingValue> args;
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+  ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, ParamRequiredArgNotPresentFails) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      Function function,
+      Function::Create(/*return_type=*/DataType::kString, "foo",
+                       /*params=*/{Param(DataType::kString)}, TrivialEval()));
+
+  // foo()
+  std::vector<PendingValue> empty_args;
+  EXPECT_THAT(function.Eval(std::move(empty_args)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, ParamOptionalArgNotPresentSucceeds) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      Function function,
+      Function::Create(
+          /*return_type=*/DataType::kString, "foo",
+          /*params=*/{Param(DataType::kString, Cardinality::kOptional)},
+          TrivialEval()));
+
+  // foo()
+  std::vector<PendingValue> empty_args;
+  ICING_ASSERT_OK_AND_ASSIGN(PendingValue val,
+                             function.Eval(std::move(empty_args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, ParamVariableArgNotPresentSucceeds) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      Function function,
+      Function::Create(
+          /*return_type=*/DataType::kString, "foo",
+          /*params=*/{Param(DataType::kString, Cardinality::kVariable)},
+          TrivialEval()));
+
+  // foo()
+  std::vector<PendingValue> empty_args;
+  ICING_ASSERT_OK_AND_ASSIGN(PendingValue val,
+                             function.Eval(std::move(empty_args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, MultipleArgsTrailingOptionalSucceeds) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      Function function, Function::Create(
+                             /*return_type=*/DataType::kString, "foo",
+                             /*params=*/
+                             {Param(DataType::kString, Cardinality::kRequired),
+                              Param(DataType::kString, Cardinality::kOptional)},
+                             TrivialEval()));
+
+  std::string_view query = R"(foo("bar"))";
+  std::vector<PendingValue> args;
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+  ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+  query = R"(foo("bar", "baz"))";
+  args = std::vector<PendingValue>();
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"baz", query.substr(12, 3), /*is_prefix_val=*/false}));
+  ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, MultipleArgsTrailingVariableSucceeds) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      Function function, Function::Create(
+                             /*return_type=*/DataType::kString, "foo",
+                             /*params=*/
+                             {Param(DataType::kString, Cardinality::kRequired),
+                              Param(DataType::kString, Cardinality::kVariable)},
+                             TrivialEval()));
+
+  std::string_view query = R"(foo("bar"))";
+  std::vector<PendingValue> args;
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+  ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+  query = R"(foo("bar", "baz"))";
+  args = std::vector<PendingValue>();
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"baz", query.substr(12, 3), /*is_prefix_val=*/false}));
+  ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+  query = R"(foo("bar", "baz", "bat"))";
+  args = std::vector<PendingValue>();
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"baz", query.substr(12, 3), /*is_prefix_val=*/false}));
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"bat", query.substr(19, 3), /*is_prefix_val=*/false}));
+  ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+}
+
+TEST(FunctionTest, MultipleArgsOptionalBeforeRequiredFails) {
+  EXPECT_THAT(Function::Create(
+                  /*return_type=*/DataType::kString, "foo",
+                  /*params=*/
+                  {Param(DataType::kString, Cardinality::kOptional),
+                   Param(DataType::kString, Cardinality::kRequired)},
+                  TrivialEval()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsOptionalBeforeOptionalSucceeds) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      Function function, Function::Create(
+                             /*return_type=*/DataType::kString, "foo",
+                             /*params=*/
+                             {Param(DataType::kString, Cardinality::kOptional),
+                              Param(DataType::kText, Cardinality::kOptional)},
+                             TrivialEval()));
+
+  // foo()
+  std::vector<PendingValue> args;
+  ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+  std::string_view query = R"(foo("bar"))";
+  args = std::vector<PendingValue>();
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+  ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+  query = R"(foo("bar", baz))";
+  args = std::vector<PendingValue>();
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+  args.push_back(PendingValue::CreateTextPendingValue(
+      QueryTerm{"baz", query.substr(11, 3), /*is_prefix_val=*/false}));
+  ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+  query = R"(foo(baz))";
+  args = std::vector<PendingValue>();
+  args.push_back(PendingValue::CreateTextPendingValue(
+      QueryTerm{"baz", query.substr(4, 3), /*is_prefix_val=*/false}));
+  EXPECT_THAT(function.Eval(std::move(args)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsOptionalBeforeVariableSucceeds) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      Function function, Function::Create(
+                             /*return_type=*/DataType::kString, "foo",
+                             /*params=*/
+                             {Param(DataType::kString, Cardinality::kOptional),
+                              Param(DataType::kText, Cardinality::kVariable)},
+                             TrivialEval()));
+
+  // foo()
+  std::vector<PendingValue> args;
+  ICING_ASSERT_OK_AND_ASSIGN(PendingValue val, function.Eval(std::move(args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+  std::string_view query = R"(foo("bar"))";
+  args = std::vector<PendingValue>();
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+  ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+  query = R"(foo("bar", baz))";
+  args = std::vector<PendingValue>();
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+  args.push_back(PendingValue::CreateTextPendingValue(
+      QueryTerm{"baz", query.substr(11, 3), /*is_prefix_val=*/false}));
+  ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+  query = R"(foo("bar", baz, bat))";
+  args = std::vector<PendingValue>();
+  args.push_back(PendingValue::CreateStringPendingValue(
+      QueryTerm{"bar", query.substr(5, 3), /*is_prefix_val=*/false}));
+  args.push_back(PendingValue::CreateTextPendingValue(
+      QueryTerm{"baz", query.substr(11, 3), /*is_prefix_val=*/false}));
+  args.push_back(PendingValue::CreateTextPendingValue(
+      QueryTerm{"bat", query.substr(16, 3), /*is_prefix_val=*/false}));
+  ICING_ASSERT_OK_AND_ASSIGN(val, function.Eval(std::move(args)));
+  EXPECT_THAT(val.is_placeholder(), IsTrue());
+
+  query = R"(foo(baz))";
+  args = std::vector<PendingValue>();
+  args.push_back(PendingValue::CreateTextPendingValue(
+      QueryTerm{"baz", query.substr(4, 3), /*is_prefix_val=*/false}));
+  EXPECT_THAT(function.Eval(std::move(args)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  query = R"(foo(baz, bat))";
+  args = std::vector<PendingValue>();
+  args.push_back(PendingValue::CreateTextPendingValue(
+      QueryTerm{"baz", query.substr(4, 3), /*is_prefix_val=*/false}));
+  args.push_back(PendingValue::CreateTextPendingValue(
+      QueryTerm{"bat", query.substr(9, 3), /*is_prefix_val=*/false}));
+  EXPECT_THAT(function.Eval(std::move(args)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsVariableBeforeRequiredFails) {
+  EXPECT_THAT(Function::Create(
+                  /*return_type=*/DataType::kString, "foo",
+                  /*params=*/
+                  {Param(DataType::kString, Cardinality::kVariable),
+                   Param(DataType::kString, Cardinality::kRequired)},
+                  TrivialEval()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsVariableBeforeOptionalFails) {
+  EXPECT_THAT(Function::Create(
+                  /*return_type=*/DataType::kString, "foo",
+                  /*params=*/
+                  {Param(DataType::kString, Cardinality::kVariable),
+                   Param(DataType::kString, Cardinality::kOptional)},
+                  TrivialEval()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(FunctionTest, MultipleArgsVariableBeforeVariableFails) {
+  EXPECT_THAT(Function::Create(
+                  /*return_type=*/DataType::kString, "foo",
+                  /*params=*/
+                  {Param(DataType::kString, Cardinality::kVariable),
+                   Param(DataType::kString, Cardinality::kVariable)},
+                  TrivialEval()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
+\ No newline at end of file
diff --git a/icing/query/advanced_query_parser/lexer.cc b/icing/query/advanced_query_parser/lexer.cc
new file mode 100644
index 0000000..0dd0bb0
--- /dev/null
+++ b/icing/query/advanced_query_parser/lexer.cc
@@ -0,0 +1,270 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/advanced_query_parser/lexer.h"
+
+#include <string>
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/util/i18n-utils.h"
+
+namespace icing {
+namespace lib {
+
+bool Lexer::ConsumeWhitespace() {
+  if (current_char_ == '\0') {
+    return false;
+  }
+  if (i18n_utils::IsWhitespaceAt(query_, current_index_)) {
+    UChar32 uchar32 = i18n_utils::GetUChar32At(query_.data(), query_.length(),
+                                               current_index_);
+    int length = i18n_utils::GetUtf8Length(uchar32);
+    Advance(length);
+    return true;
+  }
+  return false;
+}
+
+bool Lexer::ConsumeQuerySingleChar() {
+  std::string_view original_text = query_.substr(current_index_, 1);
+  switch (current_char_) {
+    case ':':
+      tokens_.push_back({":", original_text, TokenType::COMPARATOR});
+      break;
+    case '*':
+      tokens_.push_back({"", original_text, TokenType::STAR});
+      break;
+    case '-':
+      if (in_text_) {
+        // MINUS ('-') is considered to be a part of a text segment if it is
+        // in the middle of a TEXT segment (ex. `foo-bar`).
+        return false;
+      }
+      tokens_.push_back({"", original_text, TokenType::MINUS});
+      break;
+    default:
+      return false;
+  }
+  Advance();
+  return true;
+}
+
+bool Lexer::ConsumeScoringSingleChar() {
+  std::string_view original_text = query_.substr(current_index_, 1);
+  switch (current_char_) {
+    case '+':
+      tokens_.push_back({"", original_text, TokenType::PLUS});
+      break;
+    case '*':
+      tokens_.push_back({"", original_text, TokenType::TIMES});
+      break;
+    case '/':
+      tokens_.push_back({"", original_text, TokenType::DIV});
+      break;
+    case '-':
+      tokens_.push_back({"", original_text, TokenType::MINUS});
+      break;
+    default:
+      return false;
+  }
+  Advance();
+  return true;
+}
+
+bool Lexer::ConsumeGeneralSingleChar() {
+  std::string_view original_text = query_.substr(current_index_, 1);
+  switch (current_char_) {
+    case ',':
+      tokens_.push_back({"", original_text, TokenType::COMMA});
+      break;
+    case '.':
+      tokens_.push_back({"", original_text, TokenType::DOT});
+      break;
+    case '(':
+      tokens_.push_back({"", original_text, TokenType::LPAREN});
+      break;
+    case ')':
+      tokens_.push_back({"", original_text, TokenType::RPAREN});
+      break;
+    default:
+      return false;
+  }
+  Advance();
+  return true;
+}
+
+bool Lexer::ConsumeSingleChar() {
+  if (language_ == Language::QUERY) {
+    if (ConsumeQuerySingleChar()) {
+      return true;
+    }
+  } else if (language_ == Language::SCORING) {
+    if (ConsumeScoringSingleChar()) {
+      return true;
+    }
+  }
+  return ConsumeGeneralSingleChar();
+}
+
+bool Lexer::ConsumeComparator() {
+  if (current_char_ != '<' && current_char_ != '>' && current_char_ != '!' &&
+      current_char_ != '=') {
+    return false;
+  }
+  // Now, current_char_ must be one of '<', '>', '!', or '='.
+  // Matching for '<=', '>=', '!=', or '=='.
+  char next_char = PeekNext(1);
+  if (next_char == '=') {
+    tokens_.push_back({{current_char_, next_char},
+                       query_.substr(current_index_, 2),
+                       TokenType::COMPARATOR});
+    Advance(2);
+    return true;
+  }
+  // Now, next_char must not be '='. Let's match for '<' and '>'.
+  if (current_char_ == '<' || current_char_ == '>') {
+    tokens_.push_back({{current_char_},
+                       query_.substr(current_index_, 1),
+                       TokenType::COMPARATOR});
+    Advance();
+    return true;
+  }
+  return false;
+}
+
+bool Lexer::ConsumeAndOr() {
+  if (current_char_ != '&' && current_char_ != '|') {
+    return false;
+  }
+  char next_char = PeekNext(1);
+  if (current_char_ != next_char) {
+    return false;
+  }
+  std::string_view original_text = query_.substr(current_index_, 2);
+  if (current_char_ == '&') {
+    tokens_.push_back({"", original_text, TokenType::AND});
+  } else {
+    tokens_.push_back({"", original_text, TokenType::OR});
+  }
+  Advance(2);
+  return true;
+}
+
+bool Lexer::ConsumeStringLiteral() {
+  if (current_char_ != '"') {
+    return false;
+  }
+  Advance();
+  int32_t unnormalized_start_pos = current_index_;
+  while (current_char_ != '\0' && current_char_ != '"') {
+    // When getting a backslash, we will always match the next character, even
+    // if the next character is a quotation mark
+    if (current_char_ == '\\') {
+      Advance();
+      if (current_char_ == '\0') {
+        // In this case, we are missing a terminating quotation mark.
+        break;
+      }
+    }
+    Advance();
+  }
+  if (current_char_ == '\0') {
+    SyntaxError("missing terminating \" character");
+    return false;
+  }
+  int32_t unnormalized_length = current_index_ - unnormalized_start_pos;
+  std::string_view raw_token_text =
+      query_.substr(unnormalized_start_pos, unnormalized_length);
+  std::string token_text(raw_token_text);
+  tokens_.push_back({std::move(token_text), raw_token_text, TokenType::STRING});
+  Advance();
+  return true;
+}
+
+bool Lexer::ConsumeText() {
+  if (current_char_ == '\0') {
+    return false;
+  }
+  tokens_.push_back({"", query_.substr(current_index_, 0), TokenType::TEXT});
+  int token_index = tokens_.size() - 1;
+
+  int32_t unnormalized_start_pos = current_index_;
+  int32_t unnormalized_end_pos = current_index_;
+  while (!ConsumeNonText() && current_char_ != '\0') {
+    in_text_ = true;
+    // When getting a backslash in TEXT, unescape it by accepting its following
+    // character no matter which character it is, including white spaces,
+    // operator symbols, parentheses, etc.
+    if (current_char_ == '\\') {
+      Advance();
+      if (current_char_ == '\0') {
+        SyntaxError("missing a escaping character after \\");
+        break;
+      }
+    }
+    tokens_[token_index].text.push_back(current_char_);
+    Advance();
+    unnormalized_end_pos = current_index_;
+  }
+  in_text_ = false;
+
+  tokens_[token_index].original_text = query_.substr(
+      unnormalized_start_pos, unnormalized_end_pos - unnormalized_start_pos);
+  if (unnormalized_end_pos < query_.length() &&
+      query_[unnormalized_end_pos] == '(') {
+    // A TEXT followed by a LPAREN is a FUNCTION_NAME.
+    tokens_[token_index].type = TokenType::FUNCTION_NAME;
+  }
+
+  if (language_ == Lexer::Language::QUERY) {
+    std::string &text = tokens_[token_index].text;
+    TokenType &type = tokens_[token_index].type;
+    if (text == "AND") {
+      text.clear();
+      type = TokenType::AND;
+    } else if (text == "OR") {
+      text.clear();
+      type = TokenType::OR;
+    } else if (text == "NOT") {
+      text.clear();
+      type = TokenType::NOT;
+    }
+  }
+  return true;
+}
+
+libtextclassifier3::StatusOr<std::vector<Lexer::LexerToken>>
+Lexer::ExtractTokens() {
+  while (current_char_ != '\0') {
+    // Clear out any non-text before matching a Text.
+    while (ConsumeNonText()) {
+    }
+    ConsumeText();
+  }
+  if (!error_.empty()) {
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("Syntax Error: ", error_));
+  }
+  if (tokens_.size() > kMaxNumTokens) {
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("The maximum number of tokens allowed is ",
+                           std::to_string(kMaxNumTokens), ", but got ",
+                           std::to_string(tokens_.size()), " tokens."));
+  }
+  return tokens_;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/query/advanced_query_parser/lexer.h b/icing/query/advanced_query_parser/lexer.h
new file mode 100644
index 0000000..b313fa7
--- /dev/null
+++ b/icing/query/advanced_query_parser/lexer.h
@@ -0,0 +1,169 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_LEXER_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_LEXER_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+
+namespace icing {
+namespace lib {
+
+class Lexer {
+ public:
+  enum class Language { QUERY, SCORING };
+
+  // The maximum number of tokens allowed, in order to prevent stack overflow
+  // issues in the parsers or visitors.
+  static constexpr uint32_t kMaxNumTokens = 2048;
+
+  enum class TokenType {
+    COMMA,       // ','
+    DOT,         // '.'
+    PLUS,        // '+'            Not allowed in QUERY language.
+    MINUS,       // '-'
+    STAR,        // '*'            Not allowed in SCORING language.
+    TIMES,       // '*'            Not allowed in QUERY language.
+    DIV,         // '/'            Not allowed in QUERY language.
+    LPAREN,      // '('
+    RPAREN,      // ')'
+    COMPARATOR,  // '<=' | '<' | '>=' | '>' | '!=' | '==' | ':'
+                 //                Not allowed in SCORING language.
+    AND,         // 'AND' | '&&'   Not allowed in SCORING language.
+    OR,          // 'OR' | '||'    Not allowed in SCORING language.
+    NOT,         // 'NOT'          Not allowed in SCORING language.
+    STRING,      // String literal surrounded by quotation marks. The
+                 // original_text of a STRING token will not include quotation
+                 // marks.
+    TEXT,        // A sequence of chars that are not any above-listed operator
+    FUNCTION_NAME,  // A TEXT followed by LPAREN.
+    // Whitespaces not inside a string literal will be skipped.
+    // WS: " " | "\t" | "\n" | "\r" | "\f" -> skip ;
+  };
+
+  struct LexerToken {
+    // For STRING, text will contain the raw original text of the token
+    // in between quotation marks, without unescaping.
+    //
+    // For TEXT, text will contain the text of the token after unescaping all
+    // escaped characters.
+    //
+    // For FUNCTION_NAME, this field will contain the name of the function.
+    //
+    // For COMPARATOR, this field will contain the comparator.
+    //
+    // For other types, this field will be empty.
+    std::string text;
+
+    // Lifecycle is dependent on the lifecycle of the string pointed to by
+    // query_.
+    std::string_view original_text;
+
+    // The type of the token.
+    TokenType type;
+  };
+
+  explicit Lexer(std::string_view query, Language language)
+      : query_(query), language_(language) {
+    Advance();
+  }
+
+  // Get a vector of LexerToken after lexing the query given in the constructor.
+  //
+  // Returns:
+  //   A vector of LexerToken on success
+  //   INVALID_ARGUMENT on syntax error.
+  libtextclassifier3::StatusOr<std::vector<LexerToken>> ExtractTokens();
+
+ private:
+  // Advance to current_index_ + n.
+  void Advance(uint32_t n = 1) {
+    if (current_index_ + n >= query_.size()) {
+      current_index_ = query_.size();
+      current_char_ = '\0';
+    } else {
+      current_index_ += n;
+      current_char_ = query_[current_index_];
+    }
+  }
+
+  // Get the character at current_index_ + n.
+  char PeekNext(uint32_t n = 1) {
+    if (current_index_ + n >= query_.size()) {
+      return '\0';
+    } else {
+      return query_[current_index_ + n];
+    }
+  }
+
+  void SyntaxError(std::string error) {
+    current_index_ = query_.size();
+    current_char_ = '\0';
+    error_ = std::move(error);
+  }
+
+  // Try to match a whitespace token and skip it.
+  bool ConsumeWhitespace();
+
+  // Try to match a single-char token other than '<' and '>'.
+  bool ConsumeSingleChar();
+  bool ConsumeQuerySingleChar();
+  bool ConsumeScoringSingleChar();
+  bool ConsumeGeneralSingleChar();
+
+  // Try to match a comparator token other than ':'.
+  bool ConsumeComparator();
+
+  // Try to match '&&' and '||'.
+  // 'AND' and 'OR' will be handled in Text() instead, so that 'ANDfoo' and
+  // 'fooOR' is a TEXT, instead of an 'AND' or 'OR'.
+  bool ConsumeAndOr();
+
+  // Try to match a string literal.
+  bool ConsumeStringLiteral();
+
+  // Try to match a non-text.
+  bool ConsumeNonText() {
+    return ConsumeWhitespace() || ConsumeSingleChar() ||
+           (language_ == Language::QUERY && ConsumeComparator()) ||
+           (language_ == Language::QUERY && ConsumeAndOr()) ||
+           ConsumeStringLiteral();
+  }
+
+  // Try to match TEXT, FUNCTION_NAME, 'AND', 'OR' and 'NOT'.
+  // REQUIRES: ConsumeNonText() must be called immediately before calling this
+  // function.
+  bool ConsumeText();
+
+  std::string_view query_;
+  std::string error_;
+  Language language_;
+  int32_t current_index_ = -1;
+  char current_char_ = '\0';
+  std::vector<LexerToken> tokens_;
+
+  // Stores whether the lexer is currently inspecting a TEXT segment while
+  // handling current_char_.
+  bool in_text_ = false;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_ADVANCED_QUERY_PARSER_LEXER_H_
diff --git a/icing/query/advanced_query_parser/lexer_fuzz_test.cc b/icing/query/advanced_query_parser/lexer_fuzz_test.cc
new file mode 100644
index 0000000..f9190db
--- /dev/null
+++ b/icing/query/advanced_query_parser/lexer_fuzz_test.cc
@@ -0,0 +1,37 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <memory>
+#include <string_view>
+
+#include "icing/query/advanced_query_parser/lexer.h"
+
+namespace icing {
+namespace lib {
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  std::string_view text(reinterpret_cast<const char*>(data), size);
+
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>(text, Lexer::Language::QUERY);
+  lexer->ExtractTokens();
+
+  lexer = std::make_unique<Lexer>(text, Lexer::Language::SCORING);
+  lexer->ExtractTokens();
+  return 0;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/query/advanced_query_parser/lexer_test.cc b/icing/query/advanced_query_parser/lexer_test.cc
new file mode 100644
index 0000000..ec0e663
--- /dev/null
+++ b/icing/query/advanced_query_parser/lexer_test.cc
@@ -0,0 +1,698 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/advanced_query_parser/lexer.h"
+
+#include <memory>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+using ::testing::ElementsAre;
+
+MATCHER_P2(EqualsLexerToken, text, type, "") {
+  const Lexer::LexerToken& actual = arg;
+  *result_listener << "actual is {text=" << actual.text
+                   << ", type=" << static_cast<int>(actual.type)
+                   << "}, but expected was {text=" << text
+                   << ", type=" << static_cast<int>(type) << "}.";
+  return actual.text == text && actual.type == type;
+}
+
+MATCHER_P(EqualsLexerToken, type, "") {
+  const Lexer::LexerToken& actual = arg;
+  *result_listener << "actual is {text=" << actual.text
+                   << ", type=" << static_cast<int>(actual.type)
+                   << "}, but expected was {text=(empty), type="
+                   << static_cast<int>(type) << "}.";
+  return actual.text.empty() && actual.type == type;
+}
+
+TEST(LexerTest, SimpleQuery) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("foo", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("fooAND", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("fooAND", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("ORfoo", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("ORfoo", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("fooANDbar", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens, ElementsAre(EqualsLexerToken("fooANDbar",
+                                                   Lexer::TokenType::TEXT)));
+}
+
+TEST(LexerTest, PrefixQuery) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("foo*", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("", Lexer::TokenType::STAR)));
+
+  lexer = std::make_unique<Lexer>("fooAND*", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("fooAND", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("", Lexer::TokenType::STAR)));
+
+  lexer = std::make_unique<Lexer>("*ORfoo", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("", Lexer::TokenType::STAR),
+                          EqualsLexerToken("ORfoo", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("fooANDbar*", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("fooANDbar", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("", Lexer::TokenType::STAR)));
+}
+
+TEST(LexerTest, SimpleStringQuery) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("\"foo\"", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::STRING)));
+
+  lexer = std::make_unique<Lexer>("\"fooAND\"", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens, ElementsAre(EqualsLexerToken("fooAND",
+                                                   Lexer::TokenType::STRING)));
+
+  lexer = std::make_unique<Lexer>("\"ORfoo\"", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("ORfoo", Lexer::TokenType::STRING)));
+
+  lexer = std::make_unique<Lexer>("\"fooANDbar\"", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens, ElementsAre(EqualsLexerToken("fooANDbar",
+                                                   Lexer::TokenType::STRING)));
+}
+
+TEST(LexerTest, TwoTermQuery) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("foo AND bar", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::AND),
+                          EqualsLexerToken("bar", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("foo && bar", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::AND),
+                          EqualsLexerToken("bar", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("foo&&bar", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::AND),
+                          EqualsLexerToken("bar", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("foo OR \"bar\"", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::OR),
+                          EqualsLexerToken("bar", Lexer::TokenType::STRING)));
+}
+
+TEST(LexerTest, QueryWithSpecialSymbol) {
+  // With escaping
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("foo\\ \\&\\&bar", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens, ElementsAre(EqualsLexerToken("foo &&bar",
+                                                   Lexer::TokenType::TEXT)));
+  lexer = std::make_unique<Lexer>("foo\\&\\&bar&&baz", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo&&bar", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::AND),
+                          EqualsLexerToken("baz", Lexer::TokenType::TEXT)));
+  lexer = std::make_unique<Lexer>("foo\\\"", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo\"", Lexer::TokenType::TEXT)));
+
+  // With quotation marks
+  lexer = std::make_unique<Lexer>("\"foo &&bar\"", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens, ElementsAre(EqualsLexerToken("foo &&bar",
+                                                   Lexer::TokenType::STRING)));
+  lexer = std::make_unique<Lexer>("\"foo&&bar\"&&baz", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(
+      tokens,
+      ElementsAre(EqualsLexerToken("foo&&bar", Lexer::TokenType::STRING),
+                  EqualsLexerToken(Lexer::TokenType::AND),
+                  EqualsLexerToken("baz", Lexer::TokenType::TEXT)));
+  lexer = std::make_unique<Lexer>("\"foo\\\"\"", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens, ElementsAre(EqualsLexerToken("foo\\\"",
+                                                   Lexer::TokenType::STRING)));
+}
+
+TEST(LexerTest, TextInStringShouldBeOriginal) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("\"foo\\nbar\"", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens, ElementsAre(EqualsLexerToken("foo\\nbar",
+                                                   Lexer::TokenType::STRING)));
+}
+
+TEST(LexerTest, QueryWithFunctionCalls) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("foo AND fun(bar)", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(
+      tokens,
+      ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+                  EqualsLexerToken(Lexer::TokenType::AND),
+                  EqualsLexerToken("fun", Lexer::TokenType::FUNCTION_NAME),
+                  EqualsLexerToken(Lexer::TokenType::LPAREN),
+                  EqualsLexerToken("bar", Lexer::TokenType::TEXT),
+                  EqualsLexerToken(Lexer::TokenType::RPAREN)));
+
+  // Not a function call
+  lexer = std::make_unique<Lexer>("foo AND fun (bar)", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::AND),
+                          EqualsLexerToken("fun", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::LPAREN),
+                          EqualsLexerToken("bar", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::RPAREN)));
+}
+
+TEST(LexerTest, QueryWithComparator) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("name: foo", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("name", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("foo", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("email.name:foo", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("email", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::DOT),
+                          EqualsLexerToken("name", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("foo", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("age > 20", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("age", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(">", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("20", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("age>=20", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("age", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(">=", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("20", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("age <20", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("age", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("<", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("20", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("age<= 20", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("age", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("<=", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("20", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("age == 20", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("age", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("==", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("20", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("age != 20", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("age", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("!=", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("20", Lexer::TokenType::TEXT)));
+}
+
+TEST(LexerTest, ComplexQuery) {
+  std::unique_ptr<Lexer> lexer = std::make_unique<Lexer>(
+      "email.sender: (foo* AND bar OR pow(age, 2)>100) || (-baz foo) && "
+      "NOT verbatimSearch(\"hello world\")",
+      Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(
+      tokens,
+      ElementsAre(
+          EqualsLexerToken("email", Lexer::TokenType::TEXT),
+          EqualsLexerToken(Lexer::TokenType::DOT),
+          EqualsLexerToken("sender", Lexer::TokenType::TEXT),
+          EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+          EqualsLexerToken(Lexer::TokenType::LPAREN),
+          EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+          EqualsLexerToken("", Lexer::TokenType::STAR),
+          EqualsLexerToken(Lexer::TokenType::AND),
+          EqualsLexerToken("bar", Lexer::TokenType::TEXT),
+          EqualsLexerToken(Lexer::TokenType::OR),
+          EqualsLexerToken("pow", Lexer::TokenType::FUNCTION_NAME),
+          EqualsLexerToken(Lexer::TokenType::LPAREN),
+          EqualsLexerToken("age", Lexer::TokenType::TEXT),
+          EqualsLexerToken(Lexer::TokenType::COMMA),
+          EqualsLexerToken("2", Lexer::TokenType::TEXT),
+          EqualsLexerToken(Lexer::TokenType::RPAREN),
+          EqualsLexerToken(">", Lexer::TokenType::COMPARATOR),
+          EqualsLexerToken("100", Lexer::TokenType::TEXT),
+          EqualsLexerToken(Lexer::TokenType::RPAREN),
+          EqualsLexerToken(Lexer::TokenType::OR),
+          EqualsLexerToken(Lexer::TokenType::LPAREN),
+          EqualsLexerToken(Lexer::TokenType::MINUS),
+          EqualsLexerToken("baz", Lexer::TokenType::TEXT),
+          EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+          EqualsLexerToken(Lexer::TokenType::RPAREN),
+          EqualsLexerToken(Lexer::TokenType::AND),
+          EqualsLexerToken(Lexer::TokenType::NOT),
+          EqualsLexerToken("verbatimSearch", Lexer::TokenType::FUNCTION_NAME),
+          EqualsLexerToken(Lexer::TokenType::LPAREN),
+          EqualsLexerToken("hello world", Lexer::TokenType::STRING),
+          EqualsLexerToken(Lexer::TokenType::RPAREN)));
+}
+
+TEST(LexerTest, UTF8WhiteSpace) {
+  std::unique_ptr<Lexer> lexer = std::make_unique<Lexer>(
+      "\xe2\x80\x88"
+      "foo"
+      "\xe2\x80\x89"
+      "\xe2\x80\x89"
+      "bar"
+      "\xe2\x80\x8a",
+      Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("bar", Lexer::TokenType::TEXT)));
+}
+
+TEST(LexerTest, CJKT) {
+  std::unique_ptr<Lexer> lexer = std::make_unique<Lexer>(
+      "我 && 每天 || 走路 OR 去 -上班", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("我", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::AND),
+                          EqualsLexerToken("每天", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::OR),
+                          EqualsLexerToken("走路", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::OR),
+                          EqualsLexerToken("去", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::MINUS),
+                          EqualsLexerToken("上班", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("私&& は ||毎日 AND 仕事 -に 歩い て い ます",
+                                  Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("私", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::AND),
+                          EqualsLexerToken("は", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::OR),
+                          EqualsLexerToken("毎日", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::AND),
+                          EqualsLexerToken("仕事", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::MINUS),
+                          EqualsLexerToken("に", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("歩い", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("て", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("い", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("ます", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("ញុំ&&ដើរទៅ||ធ្វើការ-រាល់ថ្ងៃ",
+                                  Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(
+      tokens,
+      ElementsAre(EqualsLexerToken("ញុំ", Lexer::TokenType::TEXT),
+                  EqualsLexerToken(Lexer::TokenType::AND),
+                  EqualsLexerToken("ដើរទៅ", Lexer::TokenType::TEXT),
+                  EqualsLexerToken(Lexer::TokenType::OR),
+                  EqualsLexerToken("ធ្វើការ-រាល់ថ្ងៃ", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>(
+      "나는"
+      "\xe2\x80\x88"  // White Space
+      "매일"
+      "\xe2\x80\x89"  // White Space
+      "출근합니다",
+      Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(
+      tokens,
+      ElementsAre(EqualsLexerToken("나는", Lexer::TokenType::TEXT),
+                  EqualsLexerToken("매일", Lexer::TokenType::TEXT),
+                  EqualsLexerToken("출근합니다", Lexer::TokenType::TEXT)));
+}
+
+TEST(LexerTest, SyntaxError) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("\"foo", Lexer::Language::QUERY);
+  EXPECT_THAT(lexer->ExtractTokens(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  lexer = std::make_unique<Lexer>("\"foo\\", Lexer::Language::QUERY);
+  EXPECT_THAT(lexer->ExtractTokens(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  lexer = std::make_unique<Lexer>("foo\\", Lexer::Language::QUERY);
+  EXPECT_THAT(lexer->ExtractTokens(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+// "!", "=", "&" and "|" should be treated as valid symbols in TEXT, if not
+// matched as "!=", "==", "&&", or "||".
+TEST(LexerTest, SpecialSymbolAsText) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("age=20", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("age=20", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("age !20", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("age", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("!20", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("foo& bar", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo&", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("bar", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("foo | bar", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("|", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("bar", Lexer::TokenType::TEXT)));
+}
+
+TEST(LexerTest, ScoringArithmetic) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("1 + 2", Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("1", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::PLUS),
+                          EqualsLexerToken("2", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("1+2*3/4", Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("1", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::PLUS),
+                          EqualsLexerToken("2", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::TIMES),
+                          EqualsLexerToken("3", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(Lexer::TokenType::DIV),
+                          EqualsLexerToken("4", Lexer::TokenType::TEXT)));
+
+  // Arithmetic operators will not be produced in query language.
+  lexer = std::make_unique<Lexer>("1 + 2", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("1", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("+", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("2", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("1+2*3/4", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("1+2", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("", Lexer::TokenType::STAR),
+                          EqualsLexerToken("3/4", Lexer::TokenType::TEXT)));
+}
+
+// Currently, in scoring language, the lexer will view these logic operators as
+// TEXTs. In the future, they may be rejected instead.
+TEST(LexerTest, LogicOperatorNotInScoring) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("1 && 2", Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("1", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("&&", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("2", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("1&&2", Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("1&&2", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("1&&2 ||3", Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("1&&2", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("||3", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("1 AND 2 OR 3 AND NOT 4",
+                                  Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("1", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("AND", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("2", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("OR", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("3", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("AND", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("NOT", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("4", Lexer::TokenType::TEXT)));
+}
+
+TEST(LexerTest, ComparatorNotInScoring) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("1 > 2", Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("1", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(">", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("2", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("1>2", Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("1>2", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("1>2>=3 <= 4:5== 6<7<=8!= 9",
+                                  Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("1>2>=3", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("<=", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("4:5==", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("6<7<=8!=", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("9", Lexer::TokenType::TEXT)));
+
+  // Comparator should be produced in query language.
+  lexer = std::make_unique<Lexer>("1>2>=3 <= 4:5== 6<7<=8!= 9",
+                                  Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("1", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(">", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("2", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(">=", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("3", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("<=", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("4", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("5", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("==", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("6", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("<", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("7", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("<=", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("8", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("!=", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("9", Lexer::TokenType::TEXT)));
+}
+
+TEST(LexerTest, ComplexScoring) {
+  std::unique_ptr<Lexer> lexer = std::make_unique<Lexer>(
+      "1/log( (CreationTimestamp(document) + LastUsedTimestamp(document)) / 2 "
+      ") * pow(2.3, DocumentScore())",
+      Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(
+      tokens,
+      ElementsAre(
+          EqualsLexerToken("1", Lexer::TokenType::TEXT),
+          EqualsLexerToken(Lexer::TokenType::DIV),
+          EqualsLexerToken("log", Lexer::TokenType::FUNCTION_NAME),
+          EqualsLexerToken(Lexer::TokenType::LPAREN),
+          EqualsLexerToken(Lexer::TokenType::LPAREN),
+          EqualsLexerToken("CreationTimestamp",
+                           Lexer::TokenType::FUNCTION_NAME),
+          EqualsLexerToken(Lexer::TokenType::LPAREN),
+          EqualsLexerToken("document", Lexer::TokenType::TEXT),
+          EqualsLexerToken(Lexer::TokenType::RPAREN),
+          EqualsLexerToken(Lexer::TokenType::PLUS),
+          EqualsLexerToken("LastUsedTimestamp",
+                           Lexer::TokenType::FUNCTION_NAME),
+          EqualsLexerToken(Lexer::TokenType::LPAREN),
+          EqualsLexerToken("document", Lexer::TokenType::TEXT),
+          EqualsLexerToken(Lexer::TokenType::RPAREN),
+          EqualsLexerToken(Lexer::TokenType::RPAREN),
+          EqualsLexerToken(Lexer::TokenType::DIV),
+          EqualsLexerToken("2", Lexer::TokenType::TEXT),
+          EqualsLexerToken(Lexer::TokenType::RPAREN),
+          EqualsLexerToken(Lexer::TokenType::TIMES),
+          EqualsLexerToken("pow", Lexer::TokenType::FUNCTION_NAME),
+          EqualsLexerToken(Lexer::TokenType::LPAREN),
+          EqualsLexerToken("2", Lexer::TokenType::TEXT),
+          EqualsLexerToken(Lexer::TokenType::DOT),
+          EqualsLexerToken("3", Lexer::TokenType::TEXT),
+          EqualsLexerToken(Lexer::TokenType::COMMA),
+          EqualsLexerToken("DocumentScore", Lexer::TokenType::FUNCTION_NAME),
+          EqualsLexerToken(Lexer::TokenType::LPAREN),
+          EqualsLexerToken(Lexer::TokenType::RPAREN),
+          EqualsLexerToken(Lexer::TokenType::RPAREN)));
+}
+
+// foo:bar:baz is considered an invalid query as proposed in
+// http://go/appsearch-advanced-query-impl-plan#bookmark=id.yoeyepokmbc5 ; this
+// ensures that the lexer consistently tokenizes colons independently.
+TEST(LexerTest, NoAmbiguousTokenizing) {
+  // This is an invalid query; the lexer doesn't treat `bar:baz` as one token.
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("foo:bar:baz", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> invalidQueryTokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(invalidQueryTokens,
+              ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("bar", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("baz", Lexer::TokenType::TEXT)));
+
+  lexer = std::make_unique<Lexer>("foo:\"bar:baz\"", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> validQueryTokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(
+      validQueryTokens,
+      ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+                  EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+                  EqualsLexerToken("bar:baz", Lexer::TokenType::STRING)));
+}
+
+TEST(LexerTest, WhiteSpacesDoNotAffectColonTokenization) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("a:b c : d e: f g :h", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("a", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("b", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("c", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("d", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("e", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("f", Lexer::TokenType::TEXT),
+                          EqualsLexerToken("g", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("h", Lexer::TokenType::TEXT)));
+}
+
+// For the "bar:baz" part to be treated as a TEXT token in a query like
+// foo:bar:baz, an explicit escape is required, so use foo:bar\:baz instead.
+TEST(LexerTest, ColonInTextRequiresExplicitEscaping) {
+  std::unique_ptr<Lexer> lexer =
+      std::make_unique<Lexer>("foo:bar\\:baz", Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+                             lexer->ExtractTokens());
+  EXPECT_THAT(tokens,
+              ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+                          EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+                          EqualsLexerToken("bar:baz", Lexer::TokenType::TEXT)));
+}
+
+TEST(LexerTest, QueryShouldRejectTokensBeyondLimit) {
+  std::string query;
+  for (int i = 0; i < Lexer::kMaxNumTokens + 1; ++i) {
+    query.push_back('(');
+  }
+  Lexer lexer(query, Lexer::Language::QUERY);
+  EXPECT_THAT(lexer.ExtractTokens(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(LexerTest, ScoringShouldRejectTokensBeyondLimit) {
+  std::string scoring;
+  for (int i = 0; i < Lexer::kMaxNumTokens + 1; ++i) {
+    scoring.push_back('(');
+  }
+  Lexer lexer(scoring, Lexer::Language::SCORING);
+  EXPECT_THAT(lexer.ExtractTokens(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/query/advanced_query_parser/param.h b/icing/query/advanced_query_parser/param.h
new file mode 100644
index 0000000..69c46be
--- /dev/null
+++ b/icing/query/advanced_query_parser/param.h
@@ -0,0 +1,57 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_PARAM_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_PARAM_H_
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+enum class Cardinality {
+  kRequired,
+  kOptional,
+  kVariable,
+};
+
+struct Param {
+  explicit Param(DataType data_type,
+                 Cardinality cardinality = Cardinality::kRequired)
+      : data_type(data_type), cardinality(cardinality) {}
+
+  libtextclassifier3::Status Matches(PendingValue& arg) const {
+    bool matches = arg.data_type() == data_type;
+    // Values of type kText could also potentially be valid kLong values. If
+    // we're expecting a kLong and we have a kText, try to parse it as a kLong.
+    if (!matches && data_type == DataType::kLong &&
+        arg.data_type() == DataType::kText) {
+      ICING_RETURN_IF_ERROR(arg.ParseInt());
+      matches = true;
+    }
+    return matches ? libtextclassifier3::Status::OK
+                   : absl_ports::InvalidArgumentError(
+                         "Provided arg doesn't match required param type.");
+  }
+
+  DataType data_type;
+  Cardinality cardinality;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_ADVANCED_QUERY_PARSER_PARAM_H_
diff --git a/icing/query/advanced_query_parser/parser.cc b/icing/query/advanced_query_parser/parser.cc
new file mode 100644
index 0000000..82576a1
--- /dev/null
+++ b/icing/query/advanced_query_parser/parser.cc
@@ -0,0 +1,449 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/advanced_query_parser/parser.h"
+
+#include <memory>
+#include <string_view>
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+std::unique_ptr<Node> CreateNaryNode(
+    std::string_view operator_text,
+    std::vector<std::unique_ptr<Node>>&& operands) {
+  if (operands.empty()) {
+    return nullptr;
+  }
+  if (operands.size() == 1) {
+    return std::move(operands.at(0));
+  }
+  return std::make_unique<NaryOperatorNode>(std::string(operator_text),
+                                            std::move(operands));
+}
+
+}  // namespace
+
+libtextclassifier3::Status Parser::Consume(Lexer::TokenType token_type) {
+  if (!Match(token_type)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Unable to consume token %d.", static_cast<int>(token_type)));
+  }
+  ++current_token_;
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<TextNode>> Parser::ConsumeText() {
+  if (!Match(Lexer::TokenType::TEXT)) {
+    return absl_ports::InvalidArgumentError("Unable to consume token as TEXT.");
+  }
+  auto text_node = std::make_unique<TextNode>(std::move(current_token_->text),
+                                              current_token_->original_text);
+  ++current_token_;
+  return text_node;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<FunctionNameNode>>
+Parser::ConsumeFunctionName() {
+  if (!Match(Lexer::TokenType::FUNCTION_NAME)) {
+    return absl_ports::InvalidArgumentError(
+        "Unable to consume token as FUNCTION_NAME.");
+  }
+  auto function_name_node =
+      std::make_unique<FunctionNameNode>(std::move(current_token_->text));
+  ++current_token_;
+  return function_name_node;
+}
+
+// stringElement
+//    : STRING STAR?
+libtextclassifier3::StatusOr<std::unique_ptr<StringNode>>
+Parser::ConsumeStringElement() {
+  if (!Match(Lexer::TokenType::STRING)) {
+    return absl_ports::InvalidArgumentError(
+        "Unable to consume token as STRING.");
+  }
+  std::string text = std::move(current_token_->text);
+  std::string_view raw_text = current_token_->original_text;
+  ++current_token_;
+
+  bool is_prefix = false;
+  if (Match(Lexer::TokenType::STAR)) {
+    is_prefix = true;
+    ++current_token_;
+  }
+
+  return std::make_unique<StringNode>(std::move(text), raw_text, is_prefix);
+}
+
+libtextclassifier3::StatusOr<std::string> Parser::ConsumeComparator() {
+  if (!Match(Lexer::TokenType::COMPARATOR)) {
+    return absl_ports::InvalidArgumentError(
+        "Unable to consume token as COMPARATOR.");
+  }
+  std::string comparator = std::move(current_token_->text);
+  ++current_token_;
+  return comparator;
+}
+
+// member
+//    :  TEXT (DOT TEXT)* (DOT function)?
+//    |  TEXT STAR
+//    ;
+libtextclassifier3::StatusOr<std::unique_ptr<MemberNode>>
+Parser::ConsumeMember() {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<TextNode> text_node, ConsumeText());
+  std::vector<std::unique_ptr<TextNode>> children;
+
+  // Member could be either `TEXT (DOT TEXT)* (DOT function)?` or `TEXT STAR`
+  // at this point. So check for 'STAR' to differentiate the two cases.
+  if (Match(Lexer::TokenType::STAR)) {
+    ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::STAR));
+    std::string_view raw_text = text_node->raw_value();
+    std::string text = std::move(*text_node).value();
+    text_node = std::make_unique<TextNode>(std::move(text), raw_text,
+                                           /*is_prefix=*/true);
+    children.push_back(std::move(text_node));
+  } else {
+    children.push_back(std::move(text_node));
+    while (Match(Lexer::TokenType::DOT)) {
+      ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::DOT));
+      if (MatchFunction()) {
+        ICING_ASSIGN_OR_RETURN(std::unique_ptr<FunctionNode> function_node,
+                               ConsumeFunction());
+        // Once a function is matched, we should exit the current rule based on
+        // the grammar.
+        return std::make_unique<MemberNode>(std::move(children),
+                                            std::move(function_node));
+      }
+      ICING_ASSIGN_OR_RETURN(text_node, ConsumeText());
+      children.push_back(std::move(text_node));
+    }
+  }
+  return std::make_unique<MemberNode>(std::move(children),
+                                      /*function=*/nullptr);
+}
+
+// function
+//    : FUNCTION_NAME LPAREN argList? RPAREN
+//    ;
+libtextclassifier3::StatusOr<std::unique_ptr<FunctionNode>>
+Parser::ConsumeFunction() {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<FunctionNameNode> function_name,
+                         ConsumeFunctionName());
+  ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::LPAREN));
+
+  std::vector<std::unique_ptr<Node>> args;
+  if (Match(Lexer::TokenType::RPAREN)) {
+    // Got empty argument.
+    ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::RPAREN));
+  } else {
+    ICING_ASSIGN_OR_RETURN(args, ConsumeArgs());
+    ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::RPAREN));
+  }
+  return std::make_unique<FunctionNode>(std::move(function_name),
+                                        std::move(args));
+}
+
+// comparable
+//     : stringElement
+//     | member
+//     | function
+//     ;
+libtextclassifier3::StatusOr<std::unique_ptr<Node>>
+Parser::ConsumeComparable() {
+  if (Match(Lexer::TokenType::STRING)) {
+    return ConsumeStringElement();
+  } else if (MatchMember()) {
+    return ConsumeMember();
+  }
+  // The current token sequence isn't a STRING or member. Therefore, it must be
+  // a function.
+  return ConsumeFunction();
+}
+
+// composite
+//    : LPAREN expression RPAREN
+//    ;
+libtextclassifier3::StatusOr<std::unique_ptr<Node>> Parser::ConsumeComposite() {
+  ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::LPAREN));
+
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> expression, ConsumeExpression());
+
+  ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::RPAREN));
+  return expression;
+}
+
+// argList
+//    : expression (COMMA expression)*
+//    ;
+libtextclassifier3::StatusOr<std::vector<std::unique_ptr<Node>>>
+Parser::ConsumeArgs() {
+  std::vector<std::unique_ptr<Node>> args;
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> arg, ConsumeExpression());
+  args.push_back(std::move(arg));
+  while (Match(Lexer::TokenType::COMMA)) {
+    ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::COMMA));
+    ICING_ASSIGN_OR_RETURN(arg, ConsumeExpression());
+    args.push_back(std::move(arg));
+  }
+  return args;
+}
+
+// restriction
+//     : comparable (COMPARATOR MINUS? (comparable | composite))?
+//     ;
+// COMPARATOR will not be produced in Scoring Lexer.
+libtextclassifier3::StatusOr<std::unique_ptr<Node>>
+Parser::ConsumeRestriction() {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> comparable, ConsumeComparable());
+
+  if (!Match(Lexer::TokenType::COMPARATOR)) {
+    return comparable;
+  }
+  ICING_ASSIGN_OR_RETURN(std::string operator_text, ConsumeComparator());
+
+  bool has_minus = Match(Lexer::TokenType::MINUS);
+  if (has_minus) {
+    ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::MINUS));
+  }
+
+  std::unique_ptr<Node> arg;
+  if (MatchComposite()) {
+    ICING_ASSIGN_OR_RETURN(arg, ConsumeComposite());
+  } else if (MatchComparable()) {
+    ICING_ASSIGN_OR_RETURN(arg, ConsumeComparable());
+  } else {
+    return absl_ports::InvalidArgumentError(
+        "ARG: must begin with LPAREN or FIRST(comparable)");
+  }
+
+  if (has_minus) {
+    arg = std::make_unique<UnaryOperatorNode>("MINUS", std::move(arg));
+  }
+
+  std::vector<std::unique_ptr<Node>> args;
+  args.push_back(std::move(comparable));
+  args.push_back(std::move(arg));
+  return std::make_unique<NaryOperatorNode>(std::move(operator_text),
+                                            std::move(args));
+}
+
+// simple
+//     : restriction
+//     | composite
+//     ;
+libtextclassifier3::StatusOr<std::unique_ptr<Node>> Parser::ConsumeSimple() {
+  if (MatchComposite()) {
+    return ConsumeComposite();
+  } else if (MatchRestriction()) {
+    return ConsumeRestriction();
+  }
+  return absl_ports::InvalidArgumentError(
+      "SIMPLE: must be a restriction or composite");
+}
+
+// term
+//     : NOT? simple
+//     | MINUS simple
+//     ;
+// NOT will not be produced in Scoring Lexer.
+libtextclassifier3::StatusOr<std::unique_ptr<Node>> Parser::ConsumeTerm() {
+  if (!Match(Lexer::TokenType::NOT) && !Match(Lexer::TokenType::MINUS)) {
+    return ConsumeSimple();
+  }
+  std::string operator_text;
+  if (language_ == Lexer::Language::SCORING) {
+    ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::MINUS));
+    operator_text = "MINUS";
+  } else {
+    if (Match(Lexer::TokenType::NOT)) {
+      ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::NOT));
+      operator_text = "NOT";
+    } else {
+      ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::MINUS));
+      operator_text = "MINUS";
+    }
+  }
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> simple, ConsumeSimple());
+  return std::make_unique<UnaryOperatorNode>(operator_text, std::move(simple));
+}
+
+// factor
+//     : term (OR term)*
+//     ;
+libtextclassifier3::StatusOr<std::unique_ptr<Node>> Parser::ConsumeFactor() {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> term, ConsumeTerm());
+  std::vector<std::unique_ptr<Node>> terms;
+  terms.push_back(std::move(term));
+
+  while (Match(Lexer::TokenType::OR)) {
+    ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::OR));
+    ICING_ASSIGN_OR_RETURN(term, ConsumeTerm());
+    terms.push_back(std::move(term));
+  }
+
+  return CreateNaryNode("OR", std::move(terms));
+}
+
+// sequence
+//    : (factor)+
+//    ;
+libtextclassifier3::StatusOr<std::unique_ptr<Node>> Parser::ConsumeSequence() {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> factor, ConsumeFactor());
+  std::vector<std::unique_ptr<Node>> factors;
+  factors.push_back(std::move(factor));
+
+  while (MatchFactor()) {
+    ICING_ASSIGN_OR_RETURN(factor, ConsumeFactor());
+    factors.push_back(std::move(factor));
+  }
+
+  return CreateNaryNode("AND", std::move(factors));
+}
+
+// expression
+//    : sequence (AND sequence)*
+//    ;
+libtextclassifier3::StatusOr<std::unique_ptr<Node>>
+Parser::ConsumeQueryExpression() {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> sequence, ConsumeSequence());
+  std::vector<std::unique_ptr<Node>> sequences;
+  sequences.push_back(std::move(sequence));
+
+  while (Match(Lexer::TokenType::AND)) {
+    ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::AND));
+    ICING_ASSIGN_OR_RETURN(sequence, ConsumeSequence());
+    sequences.push_back(std::move(sequence));
+  }
+
+  return CreateNaryNode("AND", std::move(sequences));
+}
+
+// multExpr
+//     : term ((TIMES | DIV) term)*
+//     ;
+libtextclassifier3::StatusOr<std::unique_ptr<Node>> Parser::ConsumeMultExpr() {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> node, ConsumeTerm());
+  std::vector<std::unique_ptr<Node>> stack;
+  stack.push_back(std::move(node));
+
+  while (Match(Lexer::TokenType::TIMES) || Match(Lexer::TokenType::DIV)) {
+    while (Match(Lexer::TokenType::TIMES)) {
+      ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::TIMES));
+      ICING_ASSIGN_OR_RETURN(node, ConsumeTerm());
+      stack.push_back(std::move(node));
+    }
+    node = CreateNaryNode("TIMES", std::move(stack));
+    stack.clear();
+    stack.push_back(std::move(node));
+
+    while (Match(Lexer::TokenType::DIV)) {
+      ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::DIV));
+      ICING_ASSIGN_OR_RETURN(node, ConsumeTerm());
+      stack.push_back(std::move(node));
+    }
+    node = CreateNaryNode("DIV", std::move(stack));
+    stack.clear();
+    stack.push_back(std::move(node));
+  }
+
+  return std::move(stack[0]);
+}
+
+// expression
+//    : multExpr ((PLUS | MINUS) multExpr)*
+//    ;
+libtextclassifier3::StatusOr<std::unique_ptr<Node>>
+Parser::ConsumeScoringExpression() {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> node, ConsumeMultExpr());
+  std::vector<std::unique_ptr<Node>> stack;
+  stack.push_back(std::move(node));
+
+  while (Match(Lexer::TokenType::PLUS) || Match(Lexer::TokenType::MINUS)) {
+    while (Match(Lexer::TokenType::PLUS)) {
+      ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::PLUS));
+      ICING_ASSIGN_OR_RETURN(node, ConsumeMultExpr());
+      stack.push_back(std::move(node));
+    }
+    node = CreateNaryNode("PLUS", std::move(stack));
+    stack.clear();
+    stack.push_back(std::move(node));
+
+    while (Match(Lexer::TokenType::MINUS)) {
+      ICING_RETURN_IF_ERROR(Consume(Lexer::TokenType::MINUS));
+      ICING_ASSIGN_OR_RETURN(node, ConsumeMultExpr());
+      stack.push_back(std::move(node));
+    }
+    node = CreateNaryNode("MINUS", std::move(stack));
+    stack.clear();
+    stack.push_back(std::move(node));
+  }
+
+  return std::move(stack[0]);
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<Node>>
+Parser::ConsumeExpression() {
+  switch (language_) {
+    case Lexer::Language::QUERY:
+      return ConsumeQueryExpression();
+    case Lexer::Language::SCORING:
+      return ConsumeScoringExpression();
+  }
+}
+
+// query
+//     : expression? EOF
+//     ;
+libtextclassifier3::StatusOr<std::unique_ptr<Node>> Parser::ConsumeQuery() {
+  language_ = Lexer::Language::QUERY;
+  std::unique_ptr<Node> node;
+  if (current_token_ != lexer_tokens_.end()) {
+    ICING_ASSIGN_OR_RETURN(node, ConsumeExpression());
+  }
+  if (current_token_ != lexer_tokens_.end()) {
+    return absl_ports::InvalidArgumentError(
+        "Error parsing Query. Must reach EOF after parsing Expression!");
+  }
+  return node;
+}
+
+// scoring
+//     : expression EOF
+//     ;
+libtextclassifier3::StatusOr<std::unique_ptr<Node>> Parser::ConsumeScoring() {
+  language_ = Lexer::Language::SCORING;
+  std::unique_ptr<Node> node;
+  if (current_token_ == lexer_tokens_.end()) {
+    return absl_ports::InvalidArgumentError("Got empty scoring expression!");
+  }
+  ICING_ASSIGN_OR_RETURN(node, ConsumeExpression());
+  if (current_token_ != lexer_tokens_.end()) {
+    return absl_ports::InvalidArgumentError(
+        "Error parsing the scoring expression. Must reach EOF after parsing "
+        "Expression!");
+  }
+  return node;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/query/advanced_query_parser/parser.h b/icing/query/advanced_query_parser/parser.h
new file mode 100644
index 0000000..a48c562
--- /dev/null
+++ b/icing/query/advanced_query_parser/parser.h
@@ -0,0 +1,141 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_PARSER_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_PARSER_H_
+
+#include <memory>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
+#include "icing/query/advanced_query_parser/lexer.h"
+
+namespace icing {
+namespace lib {
+
+class Parser {
+ public:
+  static Parser Create(std::vector<Lexer::LexerToken>&& lexer_tokens) {
+    return Parser(std::move(lexer_tokens));
+  }
+
+  // Returns:
+  //   On success, pointer to the root node of the AST
+  //   INVALID_ARGUMENT for input that does not conform to the grammar
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeQuery();
+
+  // Returns:
+  //   On success, pointer to the root node of the AST
+  //   INVALID_ARGUMENT for input that does not conform to the grammar
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeScoring();
+
+ private:
+  explicit Parser(std::vector<Lexer::LexerToken>&& lexer_tokens)
+      : lexer_tokens_(std::move(lexer_tokens)),
+        current_token_(lexer_tokens_.begin()) {}
+
+  // Match Functions
+  // These functions are used to test whether the current_token matches a member
+  // of the FIRST set of a particular symbol in our grammar.
+  bool Match(Lexer::TokenType token_type) const {
+    return current_token_ != lexer_tokens_.end() &&
+           current_token_->type == token_type;
+  }
+
+  bool MatchMember() const { return Match(Lexer::TokenType::TEXT); }
+
+  bool MatchFunction() const { return Match(Lexer::TokenType::FUNCTION_NAME); }
+
+  bool MatchComparable() const {
+    return Match(Lexer::TokenType::STRING) || MatchMember() || MatchFunction();
+  }
+
+  bool MatchComposite() const { return Match(Lexer::TokenType::LPAREN); }
+
+  bool MatchRestriction() const { return MatchComparable(); }
+
+  bool MatchSimple() const { return MatchRestriction() || MatchComposite(); }
+
+  bool MatchTerm() const {
+    return MatchSimple() || Match(Lexer::TokenType::NOT) ||
+           Match(Lexer::TokenType::MINUS);
+  }
+
+  bool MatchFactor() const { return MatchTerm(); }
+
+  // Consume Functions
+  // These functions attempt to parse the token sequence starting at
+  // current_token_.
+  // Returns INVALID_ARGUMENT if unable to parse the token sequence starting at
+  // current_token_ as that particular grammar symbol. There are no guarantees
+  // about what state current_token and lexer_tokens_ are in when returning an
+  // error.
+  //
+  // Consume functions for terminal symbols. These are the only Consume
+  // functions that will directly modify current_token_.
+  // The Consume functions for terminals will guarantee not to modify
+  // current_token_ and lexer_tokens_ when returning an error.
+  libtextclassifier3::Status Consume(Lexer::TokenType token_type);
+
+  libtextclassifier3::StatusOr<std::unique_ptr<TextNode>> ConsumeText();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<FunctionNameNode>>
+  ConsumeFunctionName();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<StringNode>>
+  ConsumeStringElement();
+
+  libtextclassifier3::StatusOr<std::string> ConsumeComparator();
+
+  // Consume functions for non-terminal symbols.
+  libtextclassifier3::StatusOr<std::unique_ptr<MemberNode>> ConsumeMember();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<FunctionNode>> ConsumeFunction();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeComparable();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeComposite();
+
+  libtextclassifier3::StatusOr<std::vector<std::unique_ptr<Node>>>
+  ConsumeArgs();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeRestriction();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeSimple();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeTerm();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeFactor();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeSequence();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeQueryExpression();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeMultExpr();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>>
+  ConsumeScoringExpression();
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeExpression();
+
+  std::vector<Lexer::LexerToken> lexer_tokens_;
+  std::vector<Lexer::LexerToken>::const_iterator current_token_;
+  Lexer::Language language_ = Lexer::Language::QUERY;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_ADVANCED_QUERY_PARSER_PARSER_H_
diff --git a/icing/query/advanced_query_parser/parser_integration_test.cc b/icing/query/advanced_query_parser/parser_integration_test.cc
new file mode 100644
index 0000000..fa1bd2e
--- /dev/null
+++ b/icing/query/advanced_query_parser/parser_integration_test.cc
@@ -0,0 +1,1012 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/query/advanced_query_parser/abstract-syntax-tree-test-utils.h"
+#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
+#include "icing/query/advanced_query_parser/lexer.h"
+#include "icing/query/advanced_query_parser/parser.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::IsNull;
+using ::testing::SizeIs;
+
+TEST(ParserIntegrationTest, EmptyQuery) {
+  std::string query = "";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+  EXPECT_THAT(tree_root, IsNull());
+}
+
+TEST(ParserIntegrationTest, EmptyScoring) {
+  std::string query = "";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeScoring(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserIntegrationTest, SingleTerm) {
+  std::string query = "foo";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  // member
+  //   |
+  //  text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember)));
+}
+
+TEST(ParserIntegrationTest, ImplicitAnd) {
+  std::string query = "foo bar";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //      AND
+  //     /   \
+  // member  member
+  //   |       |
+  //  text    text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, AND }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("AND", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, Or) {
+  std::string query = "foo OR bar";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //      OR
+  //     /   \
+  // member  member
+  //   |       |
+  //  text    text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, OR }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("OR", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, And) {
+  std::string query = "foo AND bar";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //      AND
+  //     /   \
+  // member  member
+  //   |       |
+  //  text    text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, AND }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("AND", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, Not) {
+  std::string query = "NOT foo";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //  NOT
+  //   |
+  // member
+  //   |
+  //  text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, NOT }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("NOT", NodeType::kUnaryOperator)));
+}
+
+TEST(ParserIntegrationTest, Minus) {
+  std::string query = "-foo";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //  MINUS
+  //   |
+  // member
+  //   |
+  //  text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, MINUS }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("MINUS", NodeType::kUnaryOperator)));
+}
+
+TEST(ParserIntegrationTest, Has) {
+  std::string query = "subject:foo";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //        :
+  //      /   \
+  // member  member
+  //   |       |
+  //  text    text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, binaryOp }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("subject", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo(":", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, HasNested) {
+  std::string query = "sender.name:foo";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //        :
+  //      /   \
+  //   member  member
+  //   /   \     |
+  // text text  text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, text, member, text, member, binaryOp }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("sender", NodeType::kText),
+                          EqualsNodeInfo("name", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo(":", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, EmptyFunction) {
+  std::string query = "foo()";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //    function
+  //       |
+  // function_name
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { function_name, function }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("", NodeType::kFunction)));
+}
+
+TEST(ParserIntegrationTest, FunctionSingleArg) {
+  std::string query = "foo(\"bar\")";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //           function
+  //           /     \
+  // function_name  string
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { function_name, string, function }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("bar", NodeType::kString),
+                          EqualsNodeInfo("", NodeType::kFunction)));
+}
+
+TEST(ParserIntegrationTest, FunctionMultiArg) {
+  std::string query = "foo(\"bar\", \"baz\")";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //                function
+  //              /    |    \
+  // function_name  string  string
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { function_name, string, string, function }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("bar", NodeType::kString),
+                          EqualsNodeInfo("baz", NodeType::kString),
+                          EqualsNodeInfo("", NodeType::kFunction)));
+}
+
+TEST(ParserIntegrationTest, FunctionNested) {
+  std::string query = "foo(bar())";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //          function
+  //          /      \
+  // function_name  function
+  //                    |
+  //              function_name
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { function_name, function_name, function, function }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("bar", NodeType::kFunctionName),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("", NodeType::kFunction)));
+}
+
+TEST(ParserIntegrationTest, FunctionWithTrailingSequence) {
+  std::string query = "foo() OR bar";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //             OR
+  //          /      \
+  //     function   member
+  //        |         |
+  //  function_name  text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { function_name, function, text, member, OR }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("OR", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, Composite) {
+  std::string query = "foo OR (bar baz)";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //             OR
+  //          /      \
+  //     member      AND
+  //       |        /   \
+  //     text    member member
+  //                |     |
+  //              text   text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, text, member, AND, OR }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("baz", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("AND", NodeType::kNaryOperator),
+                          EqualsNodeInfo("OR", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, CompositeWithTrailingSequence) {
+  std::string query = "(bar baz) OR foo";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //             OR
+  //          /      \
+  //       AND      member
+  //      /   \       |
+  //  member member  text
+  //    |     |
+  //  text   text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, AND, text, member, OR }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("baz", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("AND", NodeType::kNaryOperator),
+                          EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("OR", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, Complex) {
+  std::string query = "foo bar:baz OR pal(\"bat\")";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //               AND
+  //            /        \
+  //     member            OR
+  //       |          /         \
+  //     text      :             function
+  //              / \            /       \
+  //         member member  function_name string
+  //           |       |
+  //          text    text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, text, member, :, function_name, string,
+  //     function, OR, AND }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("baz", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo(":", NodeType::kNaryOperator),
+                          EqualsNodeInfo("pal", NodeType::kFunctionName),
+                          EqualsNodeInfo("bat", NodeType::kString),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("OR", NodeType::kNaryOperator),
+                          EqualsNodeInfo("AND", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, InvalidHas) {
+  std::string query = "foo:";  //  No right hand operand to :
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserIntegrationTest, InvalidComposite) {
+  std::string query = "(foo bar";  // No terminating RPAREN
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserIntegrationTest, InvalidMember) {
+  std::string query = "foo.";  // DOT must have succeeding TEXT
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserIntegrationTest, InvalidOr) {
+  std::string query = "foo OR";  // No right hand operand to OR
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserIntegrationTest, InvalidAnd) {
+  std::string query = "foo AND";  // No right hand operand to AND
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserIntegrationTest, InvalidNot) {
+  std::string query = "NOT";  // No right hand operand to NOT
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserIntegrationTest, InvalidMinus) {
+  std::string query = "-";  // No right hand operand to -
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserIntegrationTest, InvalidFunctionCallNoRparen) {
+  std::string query = "foo(";  // No terminating RPAREN
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserIntegrationTest, InvalidFunctionArgsHangingComma) {
+  std::string query = "foo(\"bar\",)";  // no valid arg following COMMA
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserIntegrationTest, ScoringPlus) {
+  std::string scoring = "1 + 1 + 1";
+  Lexer lexer(scoring, Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //           PLUS
+  //     /      |       \
+  // member   member   member
+  //   |        |        |
+  //  text     text     text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("PLUS", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, ScoringMinus) {
+  std::string scoring = "1 - 1 - 1";
+  Lexer lexer(scoring, Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //          MINUS
+  //     /      |       \
+  // member   member   member
+  //   |        |        |
+  //  text     text     text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("MINUS", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, ScoringUnaryMinus) {
+  std::string scoring = "1 + -1 + 1";
+  Lexer lexer(scoring, Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //          PLUS
+  //     /      |      \
+  // member   MINUS   member
+  //   |        |        |
+  //  text    member    text
+  //            |
+  //           text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("MINUS", NodeType::kUnaryOperator),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("PLUS", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, ScoringPlusMinus) {
+  std::string scoring = "11 + 12 - 13 + 14";
+  Lexer lexer(scoring, Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //                          PLUS
+  //                       /        \
+  //                MINUS          member
+  //             /         \          |
+  //       PLUS           member     text
+  //     /      \            |
+  // member    member       text
+  //   |         |
+  //  text      text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("11", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("12", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("PLUS", NodeType::kNaryOperator),
+                          EqualsNodeInfo("13", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("MINUS", NodeType::kNaryOperator),
+                          EqualsNodeInfo("14", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("PLUS", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, ScoringTimes) {
+  std::string scoring = "1 * 1 * 1";
+  Lexer lexer(scoring, Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //           TIMES
+  //     /      |       \
+  // member   member   member
+  //   |        |        |
+  //  text     text     text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("TIMES", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, ScoringDiv) {
+  std::string scoring = "1 / 1 / 1";
+  Lexer lexer(scoring, Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //           DIV
+  //     /      |       \
+  // member   member   member
+  //   |        |        |
+  //  text     text     text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("DIV", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, ScoringTimesDiv) {
+  std::string scoring = "11 / 12 * 13 / 14 / 15";
+  Lexer lexer(scoring, Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //                                DIV
+  //                        /        |         \
+  //                 TIMES          member    member
+  //             /          \        |           |
+  //        DIV           member    text        text
+  //     /      \            |
+  // member    member       text
+  //   |         |
+  //  text      text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("11", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("12", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("DIV", NodeType::kNaryOperator),
+                          EqualsNodeInfo("13", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("TIMES", NodeType::kNaryOperator),
+                          EqualsNodeInfo("14", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("15", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("DIV", NodeType::kNaryOperator)));
+}
+
+TEST(ParserIntegrationTest, ComplexScoring) {
+  // With parentheses in function arguments.
+  std::string scoring = "1 + pow((2 * sin(3)), 4) + -5 / 6";
+  Lexer lexer(scoring, Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  std::vector<NodeInfo> node = visitor.nodes();
+  EXPECT_THAT(node,
+              ElementsAre(EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("pow", NodeType::kFunctionName),
+                          EqualsNodeInfo("2", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("sin", NodeType::kFunctionName),
+                          EqualsNodeInfo("3", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("TIMES", NodeType::kNaryOperator),
+                          EqualsNodeInfo("4", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("5", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("MINUS", NodeType::kUnaryOperator),
+                          EqualsNodeInfo("6", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("DIV", NodeType::kNaryOperator),
+                          EqualsNodeInfo("PLUS", NodeType::kNaryOperator)));
+
+  // Without parentheses in function arguments.
+  scoring = "1 + pow(2 * sin(3), 4) + -5 / 6";
+  lexer = Lexer(scoring, Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(lexer_tokens, lexer.ExtractTokens());
+  parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(tree_root, parser.ConsumeScoring());
+  visitor = SimpleVisitor();
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(), ElementsAreArray(node));
+}
+
+TEST(ParserIntegrationTest, ScoringMemberFunction) {
+  std::string scoring = "this.CreationTimestamp()";
+  Lexer lexer(scoring, Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //       member
+  //     /        \
+  //  text     function
+  //               |
+  //          function_name
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(
+      visitor.nodes(),
+      ElementsAre(EqualsNodeInfo("this", NodeType::kText),
+                  EqualsNodeInfo("CreationTimestamp", NodeType::kFunctionName),
+                  EqualsNodeInfo("", NodeType::kFunction),
+                  EqualsNodeInfo("", NodeType::kMember)));
+}
+
+TEST(ParserIntegrationTest, QueryMemberFunction) {
+  std::string query = "this.foo()";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //       member
+  //     /        \
+  //  text     function
+  //               |
+  //          function_name
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("this", NodeType::kText),
+                          EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("", NodeType::kMember)));
+}
+
+TEST(ParserIntegrationTest, ScoringComplexMemberFunction) {
+  std::string scoring = "a.b.fun(c, d)";
+  Lexer lexer(scoring, Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //                member
+  //         /        |          \
+  //  text          text         function
+  //                        /        |       \
+  //               function_name   member    member
+  //                                 |         |
+  //                                text      text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("a", NodeType::kText),
+                          EqualsNodeInfo("b", NodeType::kText),
+                          EqualsNodeInfo("fun", NodeType::kFunctionName),
+                          EqualsNodeInfo("c", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("d", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("", NodeType::kMember)));
+}
+
+TEST(ParserTest, QueryComplexMemberFunction) {
+  std::string query = "this.abc.fun(def, ghi)";
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //                member
+  //         /        |          \
+  //  text          text         function
+  //                        /        |       \
+  //               function_name   member    member
+  //                                 |         |
+  //                                text      text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("this", NodeType::kText),
+                          EqualsNodeInfo("abc", NodeType::kText),
+                          EqualsNodeInfo("fun", NodeType::kFunctionName),
+                          EqualsNodeInfo("def", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("ghi", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("", NodeType::kMember)));
+}
+
+TEST(ParserTest, QueryShouldNotStackOverflowAtMaxNumTokens) {
+  // query = "(( ... (foo bar) ... ))"
+  std::string query;
+  for (int i = 0; i < Lexer::kMaxNumTokens / 2 - 1; ++i) {
+    query.push_back('(');
+  }
+  query.append("foo bar");
+  for (int i = 0; i < Lexer::kMaxNumTokens / 2 - 1; ++i) {
+    query.push_back(')');
+  }
+
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  EXPECT_THAT(lexer_tokens, SizeIs(Lexer::kMaxNumTokens));
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(), IsOk());
+}
+
+TEST(ParserTest, ScoringShouldNotStackOverflowAtMaxNumTokens) {
+  // scoring = "(( ... (-1) ... ))"
+  std::string scoring;
+  for (int i = 0; i < Lexer::kMaxNumTokens / 2 - 1; ++i) {
+    scoring.push_back('(');
+  }
+  scoring.append("-1");
+  for (int i = 0; i < Lexer::kMaxNumTokens / 2 - 1; ++i) {
+    scoring.push_back(')');
+  }
+
+  Lexer lexer(scoring, Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  EXPECT_THAT(lexer_tokens, SizeIs(Lexer::kMaxNumTokens));
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeScoring(), IsOk());
+}
+
+TEST(ParserTest, InvalidQueryShouldNotStackOverflowAtMaxNumTokens) {
+  std::string query;
+  for (int i = 0; i < Lexer::kMaxNumTokens; ++i) {
+    query.push_back('(');
+  }
+  Lexer lexer(query, Lexer::Language::QUERY);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  EXPECT_THAT(lexer_tokens, SizeIs(Lexer::kMaxNumTokens));
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserTest, InvalidScoringShouldNotStackOverflowAtMaxNumTokens) {
+  std::string scoring;
+  for (int i = 0; i < Lexer::kMaxNumTokens; ++i) {
+    scoring.push_back('(');
+  }
+  Lexer lexer(scoring, Lexer::Language::SCORING);
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> lexer_tokens,
+                             lexer.ExtractTokens());
+  EXPECT_THAT(lexer_tokens, SizeIs(Lexer::kMaxNumTokens));
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeScoring(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/query/advanced_query_parser/parser_test.cc b/icing/query/advanced_query_parser/parser_test.cc
new file mode 100644
index 0000000..824c2ce
--- /dev/null
+++ b/icing/query/advanced_query_parser/parser_test.cc
@@ -0,0 +1,1087 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/advanced_query_parser/parser.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/query/advanced_query_parser/abstract-syntax-tree-test-utils.h"
+#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
+#include "icing/query/advanced_query_parser/lexer.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::IsNull;
+
+TEST(ParserTest, EmptyQuery) {
+  std::vector<Lexer::LexerToken> lexer_tokens;
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+  EXPECT_THAT(tree_root, IsNull());
+}
+
+TEST(ParserTest, EmptyScoring) {
+  std::vector<Lexer::LexerToken> lexer_tokens;
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeScoring(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserTest, SingleTerm) {
+  std::string_view query = "foo";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query, Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  // member
+  //   |
+  //  text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember)));
+}
+
+TEST(ParserTest, ImplicitAnd) {
+  std::string_view query = "foo bar";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+      {"bar", query.substr(4, 3), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //      AND
+  //     /   \
+  // member  member
+  //   |       |
+  //  text    text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, AND }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("AND", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, Or) {
+  std::string_view query = "foo OR bar";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+      {"", query.substr(4, 2), Lexer::TokenType::OR},
+      {"bar", query.substr(7, 3), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //      OR
+  //     /   \
+  // member  member
+  //   |       |
+  //  text    text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, OR }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("OR", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, And) {
+  std::string_view query = "foo AND bar";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+      {"", query.substr(4, 3), Lexer::TokenType::AND},
+      {"bar", query.substr(8, 4), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //      AND
+  //     /   \
+  // member  member
+  //   |       |
+  //  text    text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, AND }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("AND", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, Not) {
+  std::string_view query = "NOT foo";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"", query.substr(0, 3), Lexer::TokenType::NOT},
+      {"foo", query.substr(4, 3), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //  NOT
+  //   |
+  // member
+  //   |
+  //  text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, NOT }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("NOT", NodeType::kUnaryOperator)));
+}
+
+TEST(ParserTest, Minus) {
+  std::string_view query = "-foo";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"", query.substr(0, 1), Lexer::TokenType::MINUS},
+      {"foo", query.substr(1, 3), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //  MINUS
+  //   |
+  // member
+  //   |
+  //  text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, MINUS }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("MINUS", NodeType::kUnaryOperator)));
+}
+
+TEST(ParserTest, Has) {
+  std::string_view query = "subject:foo";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"subject", query.substr(0, 7), Lexer::TokenType::TEXT},
+      {":", query.substr(7, 1), Lexer::TokenType::COMPARATOR},
+      {"foo", query.substr(8, 3), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //        :
+  //      /   \
+  // member  member
+  //   |       |
+  //  text    text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, binaryOp }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("subject", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo(":", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, HasNested) {
+  std::string_view query = "sender.name:foo";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"sender", query.substr(0, 6), Lexer::TokenType::TEXT},
+      {"", query.substr(6, 1), Lexer::TokenType::DOT},
+      {"name", query.substr(7, 4), Lexer::TokenType::TEXT},
+      {":", query.substr(11, 1), Lexer::TokenType::COMPARATOR},
+      {"foo", query.substr(12, 3), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //        :
+  //      /   \
+  //   member  member
+  //   /   \     |
+  // text text  text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, text, member, text, member, binaryOp }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("sender", NodeType::kText),
+                          EqualsNodeInfo("name", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo(":", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, EmptyFunction) {
+  std::string_view query = "foo()";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+      {"", query.substr(4, 1), Lexer::TokenType::RPAREN}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //    function
+  //       |
+  // function_name
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { function_name, function }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("", NodeType::kFunction)));
+}
+
+TEST(ParserTest, FunctionSingleArg) {
+  std::string_view query = "foo(\"bar\")";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+      {"bar", query.substr(5, 3), Lexer::TokenType::STRING},
+      {"", query.substr(8, 1), Lexer::TokenType::RPAREN}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //           function
+  //           /     \
+  // function_name  string
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { function_name, string, function }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("bar", NodeType::kString),
+                          EqualsNodeInfo("", NodeType::kFunction)));
+}
+
+TEST(ParserTest, FunctionMultiArg) {
+  std::string_view query = "foo(\"bar\", \"baz\")";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+      {"bar", query.substr(5, 3), Lexer::TokenType::STRING},
+      {"", query.substr(9, 1), Lexer::TokenType::COMMA},
+      {"baz", query.substr(12, 3), Lexer::TokenType::STRING},
+      {"", query.substr(16, 1), Lexer::TokenType::RPAREN}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //                function
+  //              /    |    \
+  // function_name  string  string
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { function_name, string, string, function }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("bar", NodeType::kString),
+                          EqualsNodeInfo("baz", NodeType::kString),
+                          EqualsNodeInfo("", NodeType::kFunction)));
+}
+
+TEST(ParserTest, FunctionNested) {
+  std::string_view query = "foo(bar())";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+      {"bar", query.substr(4, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", query.substr(7, 1), Lexer::TokenType::LPAREN},
+      {"", query.substr(8, 1), Lexer::TokenType::RPAREN},
+      {"", query.substr(9, 1), Lexer::TokenType::RPAREN}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //          function
+  //          /      \
+  // function_name  function
+  //                    |
+  //              function_name
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { function_name, function_name, function, function }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("bar", NodeType::kFunctionName),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("", NodeType::kFunction)));
+}
+
+TEST(ParserTest, FunctionWithTrailingSequence) {
+  std::string_view query = "foo() OR bar";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+      {"", query.substr(4, 1), Lexer::TokenType::RPAREN},
+      {"", query.substr(6, 2), Lexer::TokenType::OR},
+      {"bar", query.substr(9, 3), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //             OR
+  //          /      \
+  //     function   member
+  //        |         |
+  //  function_name  text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { function_name, function, text, member, OR }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("OR", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, Composite) {
+  std::string_view query = "foo OR (bar baz)";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+      {"", query.substr(4, 2), Lexer::TokenType::OR},
+      {"", query.substr(7, 1), Lexer::TokenType::LPAREN},
+      {"bar", query.substr(8, 3), Lexer::TokenType::TEXT},
+      {"baz", query.substr(12, 3), Lexer::TokenType::TEXT},
+      {"", query.substr(15, 1), Lexer::TokenType::RPAREN}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //             OR
+  //          /      \
+  //     member      AND
+  //       |        /   \
+  //     text    member member
+  //                |     |
+  //              text   text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, text, member, AND, OR }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("baz", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("AND", NodeType::kNaryOperator),
+                          EqualsNodeInfo("OR", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, CompositeWithTrailingSequence) {
+  std::string_view query = "(bar baz) OR foo";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"", query.substr(0, 1), Lexer::TokenType::LPAREN},
+      {"bar", query.substr(1, 3), Lexer::TokenType::TEXT},
+      {"baz", query.substr(5, 3), Lexer::TokenType::TEXT},
+      {"", query.substr(8, 1), Lexer::TokenType::RPAREN},
+      {"", query.substr(10, 2), Lexer::TokenType::OR},
+      {"foo", query.substr(13, 3), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //             OR
+  //          /      \
+  //       AND      member
+  //      /   \       |
+  //  member member  text
+  //    |     |
+  //  text   text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, AND, text, member, OR }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("baz", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("AND", NodeType::kNaryOperator),
+                          EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("OR", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, Complex) {
+  std::string_view query = R"(foo bar:baz OR pal("bat"))";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+      {"bar", query.substr(4, 3), Lexer::TokenType::TEXT},
+      {":", query.substr(7, 1), Lexer::TokenType::COMPARATOR},
+      {"baz", query.substr(8, 3), Lexer::TokenType::TEXT},
+      {"", query.substr(12, 2), Lexer::TokenType::OR},
+      {"pal", query.substr(15, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", query.substr(18, 1), Lexer::TokenType::LPAREN},
+      {"bat", query.substr(20, 3), Lexer::TokenType::STRING},
+      {"", query.substr(24, 1), Lexer::TokenType::RPAREN}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //               AND
+  //            /        \
+  //     member            OR
+  //       |          /         \
+  //     text      :             function
+  //              / \            /       \
+  //         member member  function_name string
+  //           |       |
+  //          text    text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  // SimpleVisitor ordering
+  //   { text, member, text, member, text, member, :, function_name, string,
+  //     function, OR, AND }
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("foo", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("bar", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("baz", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo(":", NodeType::kNaryOperator),
+                          EqualsNodeInfo("pal", NodeType::kFunctionName),
+                          EqualsNodeInfo("bat", NodeType::kString),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("OR", NodeType::kNaryOperator),
+                          EqualsNodeInfo("AND", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, InvalidHas) {
+  std::string_view query = "foo:";  // No right hand operand to :
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+      {":", query.substr(3, 1), Lexer::TokenType::COMPARATOR}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserTest, InvalidComposite) {
+  std::string_view query = "(foo bar";  // No terminating RPAREN
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"", query.substr(0, 1), Lexer::TokenType::LPAREN},
+      {"foo", query.substr(1, 3), Lexer::TokenType::TEXT},
+      {"bar", query.substr(5, 3), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserTest, InvalidMember) {
+  std::string_view query = "foo.";  // DOT must have succeeding TEXT
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+      {"", query.substr(3, 1), Lexer::TokenType::DOT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserTest, InvalidOr) {
+  std::string_view query = "foo OR";  // No right hand operand to OR
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+      {"", query.substr(3, 2), Lexer::TokenType::OR}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserTest, InvalidAnd) {
+  std::string_view query = "foo AND";  // No right hand operand to AND
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::TEXT},
+      {"", query.substr(4, 3), Lexer::TokenType::AND}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserTest, InvalidNot) {
+  std::string_view query = "NOT";  // No right hand operand to NOT
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"", query.substr(0, 3), Lexer::TokenType::NOT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserTest, InvalidMinus) {
+  std::string_view query = "-";  // No right hand operand to -
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"", query.substr(0, 1), Lexer::TokenType::MINUS}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserTest, InvalidFunctionCallNoRparen) {
+  std::string_view query = "foo(";  // No terminating RPAREN
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", query.substr(3, 0), Lexer::TokenType::LPAREN}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserTest, InvalidFunctionCallNoLparen) {
+  std::string_view query =
+      "foo bar";  // foo labeled FUNCTION_NAME despite no LPAREN
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"bar", query.substr(4, 3), Lexer::TokenType::FUNCTION_NAME}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserTest, InvalidFunctionArgsHangingComma) {
+  std::string_view query = R"(foo("bar",))";  // no valid arg following COMMA
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"foo", query.substr(0, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", query.substr(3, 1), Lexer::TokenType::LPAREN},
+      {"bar", query.substr(5, 3), Lexer::TokenType::STRING},
+      {"", query.substr(9, 1), Lexer::TokenType::COMMA},
+      {"", query.substr(10, 1), Lexer::TokenType::RPAREN}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeQuery(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ParserTest, ScoringPlus) {
+  std::string_view scoring_exp = "1 + 1 + 1";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+      {"1", scoring_exp.substr(4, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(6, 1), Lexer::TokenType::PLUS},
+      {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //           PLUS
+  //     /      |       \
+  // member   member   member
+  //   |        |        |
+  //  text     text     text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("PLUS", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, ScoringMinus) {
+  std::string_view scoring_exp = "1 - 1 - 1";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(2, 1), Lexer::TokenType::MINUS},
+      {"1", scoring_exp.substr(4, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(6, 1), Lexer::TokenType::MINUS},
+      {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //          MINUS
+  //     /      |       \
+  // member   member   member
+  //   |        |        |
+  //  text     text     text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("MINUS", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, ScoringUnaryMinus) {
+  std::string_view scoring_exp = "1 + -1 + 1";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+      {"", scoring_exp.substr(4, 1), Lexer::TokenType::MINUS},
+      {"1", scoring_exp.substr(5, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(7, 1), Lexer::TokenType::PLUS},
+      {"1", scoring_exp.substr(9, 1), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //          PLUS
+  //     /      |      \
+  // member   MINUS   member
+  //   |        |        |
+  //  text    member    text
+  //            |
+  //           text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("MINUS", NodeType::kUnaryOperator),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("PLUS", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, ScoringPlusMinus) {
+  std::string_view scoring_exp = "11 + 12 - 13 + 14";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"11", scoring_exp.substr(0, 2), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(3, 1), Lexer::TokenType::PLUS},
+      {"12", scoring_exp.substr(5, 2), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(6, 1), Lexer::TokenType::MINUS},
+      {"13", scoring_exp.substr(8, 2), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(11, 1), Lexer::TokenType::PLUS},
+      {"14", scoring_exp.substr(13, 2), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //                          PLUS
+  //                       /        \
+  //                MINUS          member
+  //             /         \          |
+  //       PLUS           member     text
+  //     /      \            |
+  // member    member       text
+  //   |         |
+  //  text      text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("11", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("12", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("PLUS", NodeType::kNaryOperator),
+                          EqualsNodeInfo("13", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("MINUS", NodeType::kNaryOperator),
+                          EqualsNodeInfo("14", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("PLUS", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, ScoringTimes) {
+  std::string_view scoring_exp = "1 * 1 * 1";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(2, 1), Lexer::TokenType::TIMES},
+      {"1", scoring_exp.substr(4, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(6, 1), Lexer::TokenType::TIMES},
+      {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //           TIMES
+  //     /      |       \
+  // member   member   member
+  //   |        |        |
+  //  text     text     text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("TIMES", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, ScoringDiv) {
+  std::string_view scoring_exp = "1 / 1 / 1";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(2, 1), Lexer::TokenType::DIV},
+      {"1", scoring_exp.substr(4, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(6, 1), Lexer::TokenType::DIV},
+      {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //           DIV
+  //     /      |       \
+  // member   member   member
+  //   |        |        |
+  //  text     text     text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("DIV", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, ScoringTimesDiv) {
+  std::string_view scoring_exp = "11 / 12 * 13 / 14 / 15";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"11", scoring_exp.substr(0, 2), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(3, 1), Lexer::TokenType::DIV},
+      {"12", scoring_exp.substr(5, 2), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(8, 1), Lexer::TokenType::TIMES},
+      {"13", scoring_exp.substr(10, 2), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(13, 1), Lexer::TokenType::DIV},
+      {"14", scoring_exp.substr(15, 2), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(18, 1), Lexer::TokenType::DIV},
+      {"15", scoring_exp.substr(20, 2), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //                                DIV
+  //                        /        |         \
+  //                 TIMES          member    member
+  //             /          \        |           |
+  //        DIV           member    text        text
+  //     /      \            |
+  // member    member       text
+  //   |         |
+  //  text      text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("11", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("12", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("DIV", NodeType::kNaryOperator),
+                          EqualsNodeInfo("13", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("TIMES", NodeType::kNaryOperator),
+                          EqualsNodeInfo("14", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("15", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("DIV", NodeType::kNaryOperator)));
+}
+
+TEST(ParserTest, ComplexScoring) {
+  std::string_view scoring_exp = "1 + pow((2 * sin(3)), 4) + -5 / 6";
+  // With parentheses in function arguments.
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+      {"pow", scoring_exp.substr(4, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", scoring_exp.substr(7, 1), Lexer::TokenType::LPAREN},
+      {"", scoring_exp.substr(8, 1), Lexer::TokenType::LPAREN},
+      {"2", scoring_exp.substr(9, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(11, 1), Lexer::TokenType::TIMES},
+      {"sin", scoring_exp.substr(13, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", scoring_exp.substr(16, 1), Lexer::TokenType::LPAREN},
+      {"3", scoring_exp.substr(17, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(18, 1), Lexer::TokenType::RPAREN},
+      {"", scoring_exp.substr(19, 1), Lexer::TokenType::RPAREN},
+      {"", scoring_exp.substr(20, 1), Lexer::TokenType::COMMA},
+      {"4", scoring_exp.substr(22, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(23, 1), Lexer::TokenType::RPAREN},
+      {"", scoring_exp.substr(25, 1), Lexer::TokenType::PLUS},
+      {"", scoring_exp.substr(27, 1), Lexer::TokenType::MINUS},
+      {"5", scoring_exp.substr(28, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(30, 1), Lexer::TokenType::DIV},
+      {"6", scoring_exp.substr(32, 1), Lexer::TokenType::TEXT},
+  };
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  std::vector<NodeInfo> node = visitor.nodes();
+  EXPECT_THAT(node,
+              ElementsAre(EqualsNodeInfo("1", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("pow", NodeType::kFunctionName),
+                          EqualsNodeInfo("2", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("sin", NodeType::kFunctionName),
+                          EqualsNodeInfo("3", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("TIMES", NodeType::kNaryOperator),
+                          EqualsNodeInfo("4", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("5", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("MINUS", NodeType::kUnaryOperator),
+                          EqualsNodeInfo("6", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("DIV", NodeType::kNaryOperator),
+                          EqualsNodeInfo("PLUS", NodeType::kNaryOperator)));
+
+  scoring_exp = "1 + pow(2 * sin(3), 4) + -5 / 6";
+  // Without parentheses in function arguments.
+  lexer_tokens = {
+      {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+      {"pow", scoring_exp.substr(4, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", scoring_exp.substr(7, 1), Lexer::TokenType::LPAREN},
+      {"2", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(10, 1), Lexer::TokenType::TIMES},
+      {"sin", scoring_exp.substr(12, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", scoring_exp.substr(15, 1), Lexer::TokenType::LPAREN},
+      {"3", scoring_exp.substr(16, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(17, 1), Lexer::TokenType::RPAREN},
+      {"", scoring_exp.substr(18, 1), Lexer::TokenType::COMMA},
+      {"4", scoring_exp.substr(20, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(21, 1), Lexer::TokenType::RPAREN},
+      {"", scoring_exp.substr(23, 1), Lexer::TokenType::PLUS},
+      {"", scoring_exp.substr(25, 1), Lexer::TokenType::MINUS},
+      {"5", scoring_exp.substr(26, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(28, 1), Lexer::TokenType::DIV},
+      {"6", scoring_exp.substr(30, 1), Lexer::TokenType::TEXT},
+  };
+  parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(tree_root, parser.ConsumeScoring());
+  visitor = SimpleVisitor();
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(), ElementsAreArray(node));
+}
+
+TEST(ParserTest, ScoringMemberFunction) {
+  std::string_view scoring_exp = "this.CreationTimestamp()";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"this", scoring_exp.substr(0, 4), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(4, 1), Lexer::TokenType::DOT},
+      {"CreationTimestamp", scoring_exp.substr(5, 17),
+       Lexer::TokenType::FUNCTION_NAME},
+      {"", scoring_exp.substr(22, 1), Lexer::TokenType::LPAREN},
+      {"", scoring_exp.substr(23, 1), Lexer::TokenType::RPAREN}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //       member
+  //     /        \
+  //  text     function
+  //               |
+  //          function_name
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(
+      visitor.nodes(),
+      ElementsAre(EqualsNodeInfo("this", NodeType::kText),
+                  EqualsNodeInfo("CreationTimestamp", NodeType::kFunctionName),
+                  EqualsNodeInfo("", NodeType::kFunction),
+                  EqualsNodeInfo("", NodeType::kMember)));
+}
+
+TEST(ParserTest, QueryMemberFunction) {
+  std::string_view query = "this.foo()";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"this", query.substr(0, 4), Lexer::TokenType::TEXT},
+      {"", query.substr(4, 1), Lexer::TokenType::DOT},
+      {"foo", query.substr(5, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", query.substr(8, 1), Lexer::TokenType::LPAREN},
+      {"", query.substr(9, 1), Lexer::TokenType::RPAREN}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //       member
+  //     /        \
+  //  text     function
+  //               |
+  //          function_name
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("this", NodeType::kText),
+                          EqualsNodeInfo("foo", NodeType::kFunctionName),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("", NodeType::kMember)));
+}
+
+TEST(ParserTest, ScoringComplexMemberFunction) {
+  std::string_view scoring_exp = "a.b.fun(c, d)";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"a", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(1, 1), Lexer::TokenType::DOT},
+      {"b", scoring_exp.substr(2, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(3, 1), Lexer::TokenType::DOT},
+      {"fun", scoring_exp.substr(4, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", scoring_exp.substr(7, 1), Lexer::TokenType::LPAREN},
+      {"c", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(9, 1), Lexer::TokenType::COMMA},
+      {"d", scoring_exp.substr(11, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(12, 1), Lexer::TokenType::RPAREN}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeScoring());
+
+  // Expected AST:
+  //                member
+  //         /        |          \
+  //  text          text         function
+  //                        /        |       \
+  //               function_name   member    member
+  //                                 |         |
+  //                                text      text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("a", NodeType::kText),
+                          EqualsNodeInfo("b", NodeType::kText),
+                          EqualsNodeInfo("fun", NodeType::kFunctionName),
+                          EqualsNodeInfo("c", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("d", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("", NodeType::kMember)));
+}
+
+TEST(ParserTest, QueryComplexMemberFunction) {
+  std::string_view query = "this.abc.fun(def, ghi)";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"this", query.substr(0, 4), Lexer::TokenType::TEXT},
+      {"", query.substr(4, 1), Lexer::TokenType::DOT},
+      {"abc", query.substr(5, 3), Lexer::TokenType::TEXT},
+      {"", query.substr(8, 1), Lexer::TokenType::DOT},
+      {"fun", query.substr(9, 3), Lexer::TokenType::FUNCTION_NAME},
+      {"", query.substr(12, 1), Lexer::TokenType::LPAREN},
+      {"def", query.substr(13, 3), Lexer::TokenType::TEXT},
+      {"", query.substr(16, 1), Lexer::TokenType::COMMA},
+      {"ghi", query.substr(17, 3), Lexer::TokenType::TEXT},
+      {"", query.substr(20, 1), Lexer::TokenType::RPAREN}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> tree_root,
+                             parser.ConsumeQuery());
+
+  // Expected AST:
+  //                member
+  //         /        |          \
+  //  text          text         function
+  //                        /        |       \
+  //               function_name   member    member
+  //                                 |         |
+  //                                text      text
+  SimpleVisitor visitor;
+  tree_root->Accept(&visitor);
+  EXPECT_THAT(visitor.nodes(),
+              ElementsAre(EqualsNodeInfo("this", NodeType::kText),
+                          EqualsNodeInfo("abc", NodeType::kText),
+                          EqualsNodeInfo("fun", NodeType::kFunctionName),
+                          EqualsNodeInfo("def", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("ghi", NodeType::kText),
+                          EqualsNodeInfo("", NodeType::kMember),
+                          EqualsNodeInfo("", NodeType::kFunction),
+                          EqualsNodeInfo("", NodeType::kMember)));
+}
+
+TEST(ParserTest, InvalidScoringToken) {
+  std::string_view scoring_exp = "1 + NOT 1";
+  std::vector<Lexer::LexerToken> lexer_tokens = {
+      {"1", scoring_exp.substr(0, 1), Lexer::TokenType::TEXT},
+      {"", scoring_exp.substr(2, 1), Lexer::TokenType::PLUS},
+      {"", scoring_exp.substr(4, 3), Lexer::TokenType::NOT},
+      {"1", scoring_exp.substr(8, 1), Lexer::TokenType::TEXT}};
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  EXPECT_THAT(parser.ConsumeScoring(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/query/advanced_query_parser/pending-value.cc b/icing/query/advanced_query_parser/pending-value.cc
new file mode 100644
index 0000000..67bdc3a
--- /dev/null
+++ b/icing/query/advanced_query_parser/pending-value.cc
@@ -0,0 +1,44 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "icing/query/advanced_query_parser/pending-value.h"
+
+#include "icing/absl_ports/canonical_errors.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::Status PendingValue::ParseInt() {
+  if (data_type_ == DataType::kLong) {
+    return libtextclassifier3::Status::OK;
+  } else if (data_type_ != DataType::kText) {
+    return absl_ports::InvalidArgumentError("Cannot parse value as LONG");
+  }
+  if (query_term_.is_prefix_val) {
+    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+        "Cannot use prefix operator '*' with numeric value: ",
+        query_term_.term));
+  }
+  char* value_end;
+  long_val_ = std::strtoll(query_term_.term.c_str(), &value_end, /*base=*/10);
+  if (value_end != query_term_.term.c_str() + query_term_.term.length()) {
+    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+        "Unable to parse \"", query_term_.term, "\" as number."));
+  }
+  data_type_ = DataType::kLong;
+  query_term_ = {/*term=*/"", /*raw_term=*/"", /*is_prefix_val=*/false};
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/query/advanced_query_parser/pending-value.h b/icing/query/advanced_query_parser/pending-value.h
new file mode 100644
index 0000000..1a6717e
--- /dev/null
+++ b/icing/query/advanced_query_parser/pending-value.h
@@ -0,0 +1,160 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_PENDING_VALUE_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_PENDING_VALUE_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+enum class DataType {
+  kNone,
+  kLong,
+  kText,
+  kString,
+  kStringList,
+  kDocumentIterator,
+};
+
+struct QueryTerm {
+  std::string term;
+  std::string_view raw_term;
+  bool is_prefix_val;
+};
+
+// A holder for intermediate results when processing child nodes.
+struct PendingValue {
+  static PendingValue CreateStringPendingValue(QueryTerm str) {
+    return PendingValue(std::move(str), DataType::kString);
+  }
+
+  static PendingValue CreateTextPendingValue(QueryTerm text) {
+    return PendingValue(std::move(text), DataType::kText);
+  }
+
+  PendingValue() : data_type_(DataType::kNone) {}
+
+  explicit PendingValue(std::unique_ptr<DocHitInfoIterator> iterator)
+      : iterator_(std::move(iterator)),
+        data_type_(DataType::kDocumentIterator) {}
+
+  explicit PendingValue(std::vector<std::string> string_lists)
+      : string_vals_(std::move(string_lists)),
+        data_type_(DataType::kStringList) {}
+
+  PendingValue(const PendingValue&) = delete;
+  PendingValue(PendingValue&&) = default;
+
+  PendingValue& operator=(const PendingValue&) = delete;
+  PendingValue& operator=(PendingValue&&) = default;
+
+  // Placeholder is used to indicate where the children of a particular node
+  // begin.
+  bool is_placeholder() const { return data_type_ == DataType::kNone; }
+
+  libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+  iterator() && {
+    ICING_RETURN_IF_ERROR(CheckDataType(DataType::kDocumentIterator));
+    return std::move(iterator_);
+  }
+
+  libtextclassifier3::StatusOr<const std::vector<std::string>*> string_vals()
+      const& {
+    ICING_RETURN_IF_ERROR(CheckDataType(DataType::kStringList));
+    return &string_vals_;
+  }
+  libtextclassifier3::StatusOr<std::vector<std::string>> string_vals() && {
+    ICING_RETURN_IF_ERROR(CheckDataType(DataType::kStringList));
+    return std::move(string_vals_);
+  }
+
+  libtextclassifier3::StatusOr<const QueryTerm*> string_val() const& {
+    ICING_RETURN_IF_ERROR(CheckDataType(DataType::kString));
+    return &query_term_;
+  }
+  libtextclassifier3::StatusOr<QueryTerm> string_val() && {
+    ICING_RETURN_IF_ERROR(CheckDataType(DataType::kString));
+    return std::move(query_term_);
+  }
+
+  libtextclassifier3::StatusOr<const QueryTerm*> text_val() const& {
+    ICING_RETURN_IF_ERROR(CheckDataType(DataType::kText));
+    return &query_term_;
+  }
+  libtextclassifier3::StatusOr<QueryTerm> text_val() && {
+    ICING_RETURN_IF_ERROR(CheckDataType(DataType::kText));
+    return std::move(query_term_);
+  }
+
+  libtextclassifier3::StatusOr<int64_t> long_val() {
+    ICING_RETURN_IF_ERROR(ParseInt());
+    return long_val_;
+  }
+
+  // Attempts to interpret the value as an int. A pending value can be parsed as
+  // an int under two circumstances:
+  //   1. It holds a kText value which can be parsed to an int
+  //   2. It holds a kLong value
+  // If #1 is true, then the parsed value will be stored in long_value and
+  // data_type will be updated to kLong.
+  // RETURNS:
+  //   - OK, if able to successfully parse the value into a long
+  //   - INVALID_ARGUMENT if the value could not be parsed as a long
+  libtextclassifier3::Status ParseInt();
+
+  DataType data_type() const { return data_type_; }
+
+ private:
+  explicit PendingValue(QueryTerm query_term, DataType data_type)
+      : query_term_(std::move(query_term)), data_type_(data_type) {}
+
+  libtextclassifier3::Status CheckDataType(DataType required_data_type) const {
+    if (data_type_ == required_data_type) {
+      return libtextclassifier3::Status::OK;
+    }
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("Unable to retrieve value of type '",
+                           std::to_string(static_cast<int>(required_data_type)),
+                           "' from pending value of type '",
+                           std::to_string(static_cast<int>(data_type_)), "'"));
+  }
+
+  // iterator_ will be populated when data_type_ is kDocumentIterator.
+  std::unique_ptr<DocHitInfoIterator> iterator_;
+
+  // string_vals_ will be populated when data_type_ kStringList.
+  std::vector<std::string> string_vals_;
+
+  // query_term_ will be populated when data_type_ is kString or kText
+  QueryTerm query_term_;
+
+  // long_val_ will be populated when data_type_ is kLong - after a successful
+  // call to ParseInt.
+  int64_t long_val_;
+  DataType data_type_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_ADVANCED_QUERY_PARSER_PENDING_VALUE_H_
diff --git a/icing/query/advanced_query_parser/query-visitor.cc b/icing/query/advanced_query_parser/query-visitor.cc
new file mode 100644
index 0000000..31da959
--- /dev/null
+++ b/icing/query/advanced_query_parser/query-visitor.cc
@@ -0,0 +1,963 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/advanced_query_parser/query-visitor.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
+#include "icing/index/iterator/doc-hit-info-iterator-and.h"
+#include "icing/index/iterator/doc-hit-info-iterator-none.h"
+#include "icing/index/iterator/doc-hit-info-iterator-not.h"
+#include "icing/index/iterator/doc-hit-info-iterator-or.h"
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-document.h"
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h"
+#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/property-existence-indexing-handler.h"
+#include "icing/query/advanced_query_parser/lexer.h"
+#include "icing/query/advanced_query_parser/param.h"
+#include "icing/query/advanced_query_parser/parser.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
+#include "icing/query/advanced_query_parser/util/string-util.h"
+#include "icing/query/query-features.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/section.h"
+#include "icing/tokenization/token.h"
+#include "icing/tokenization/tokenizer.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+struct CreateList {
+  libtextclassifier3::StatusOr<PendingValue> operator()(
+      std::vector<PendingValue>&& args) const {
+    std::vector<std::string> values;
+    values.reserve(args.size());
+    for (PendingValue& arg : args) {
+      QueryTerm string_val = std::move(arg).string_val().ValueOrDie();
+      values.push_back(std::move(string_val.term));
+    }
+    return PendingValue(std::move(values));
+  }
+};
+
+bool IsNumericComparator(std::string_view operator_text) {
+  if (operator_text.length() < 1 || operator_text.length() > 2) {
+    return false;
+  }
+  // TODO(tjbarron) decide how/if to support !=
+  return operator_text == "<" || operator_text == ">" ||
+         operator_text == "==" || operator_text == "<=" ||
+         operator_text == ">=";
+}
+
+bool IsSupportedNaryOperator(std::string_view operator_text) {
+  return IsNumericComparator(operator_text) || operator_text == "AND" ||
+         operator_text == "OR" || operator_text == ":";
+}
+
+struct Int64Range {
+  int64_t low;
+  int64_t high;
+};
+
+libtextclassifier3::StatusOr<Int64Range> GetInt64Range(
+    std::string_view operator_text, int64_t int_value) {
+  Int64Range range = {std::numeric_limits<int64_t>::min(),
+                      std::numeric_limits<int64_t>::max()};
+  if (operator_text == "<") {
+    if (int_value == std::numeric_limits<int64_t>::min()) {
+      return absl_ports::InvalidArgumentError(
+          "Cannot specify < INT64_MIN in query expression.");
+    }
+    range.high = int_value - 1;
+  } else if (operator_text == "<=") {
+    range.high = int_value;
+  } else if (operator_text == "==") {
+    range.high = int_value;
+    range.low = int_value;
+  } else if (operator_text == ">=") {
+    range.low = int_value;
+  } else if (operator_text == ">") {
+    if (int_value == std::numeric_limits<int64_t>::max()) {
+      return absl_ports::InvalidArgumentError(
+          "Cannot specify > INT64_MAX in query expression.");
+    }
+    range.low = int_value + 1;
+  }
+  return range;
+}
+
+}  // namespace
+
+void QueryVisitor::PendingPropertyRestricts::AddValidRestricts(
+    std::set<std::string> new_restricts) {
+  if (!has_active_property_restricts()) {
+    pending_property_restricts_.push_back(std::move(new_restricts));
+    return;
+  }
+
+  // There is an active property restrict already in effect. To determine the
+  // updated active property restrict being applied at this level, we need to
+  // calculate the intersection of new_restricts and
+  // active_property_restricts.
+  const std::set<std::string>& active_restricts = active_property_restricts();
+  auto active_restricts_itr = active_restricts.begin();
+  for (auto new_restricts_itr = new_restricts.begin();
+       new_restricts_itr != new_restricts.end();) {
+    while (active_restricts_itr != active_restricts.end() &&
+           *active_restricts_itr < *new_restricts_itr) {
+      // new_restricts_itr is behind active_restricts_itr.
+      ++active_restricts_itr;
+    }
+    if (active_restricts_itr == active_restricts.end()) {
+      // There's nothing left in active restricts. Everything at
+      // new_restricts_itr and beyond should be removed
+      new_restricts_itr =
+          new_restricts.erase(new_restricts_itr, new_restricts.end());
+    } else if (*active_restricts_itr > *new_restricts_itr) {
+      // new_restricts_itr points to elements not present in
+      // active_restricts_itr
+      new_restricts_itr = new_restricts.erase(new_restricts_itr);
+    } else {
+      // the element that new_restricts_itr points to is present in
+      // active_restricts_itr.
+      ++new_restricts_itr;
+    }
+  }
+  pending_property_restricts_.push_back(std::move(new_restricts));
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+QueryVisitor::CreateTermIterator(const QueryTerm& query_term) {
+  if (query_term.is_prefix_val) {
+    // '*' prefix operator was added in list filters
+    features_.insert(kListFilterQueryLanguageFeature);
+  }
+  TermMatchType::Code match_type = GetTermMatchType(query_term.is_prefix_val);
+  int unnormalized_term_start =
+      query_term.raw_term.data() - raw_query_text_.data();
+  if (!processing_not_) {
+    // 1. Add term to property_query_terms_map
+    if (pending_property_restricts_.has_active_property_restricts()) {
+      for (const std::string& property_restrict :
+           pending_property_restricts_.active_property_restricts()) {
+        property_query_terms_map_[property_restrict].insert(query_term.term);
+      }
+    } else {
+      property_query_terms_map_[""].insert(query_term.term);
+    }
+
+    // 2. If needed add term iterator to query_term_iterators_ map.
+    if (needs_term_frequency_info_) {
+      ICING_ASSIGN_OR_RETURN(
+          std::unique_ptr<DocHitInfoIterator> term_iterator,
+          index_.GetIterator(query_term.term, unnormalized_term_start,
+                             query_term.raw_term.length(), kSectionIdMaskAll,
+                             match_type_, needs_term_frequency_info_));
+      query_term_iterators_[query_term.term] =
+          std::make_unique<DocHitInfoIteratorFilter>(
+              std::move(term_iterator), &document_store_, &schema_store_,
+              filter_options_, current_time_ms_);
+    }
+  }
+
+  // 3. Add the term iterator.
+  return index_.GetIterator(query_term.term, unnormalized_term_start,
+                            query_term.raw_term.length(), kSectionIdMaskAll,
+                            match_type, needs_term_frequency_info_);
+}
+
+void QueryVisitor::RegisterFunctions() {
+  // std::vector<std::string> createList(std::string...);
+  Function create_list_function_ =
+      Function::Create(DataType::kStringList, "createList",
+                       {Param(DataType::kString, Cardinality::kRequired),
+                        Param(DataType::kString, Cardinality::kVariable)},
+                       CreateList())
+          .ValueOrDie();
+  registered_functions_.insert(
+      {create_list_function_.name(), std::move(create_list_function_)});
+
+  // DocHitInfoIterator search(std::string);
+  // DocHitInfoIterator search(std::string, std::vector<std::string>);
+  auto search_eval = [this](std::vector<PendingValue>&& args) {
+    return this->SearchFunction(std::move(args));
+  };
+  Function search_function =
+      Function::Create(DataType::kDocumentIterator, "search",
+                       {Param(DataType::kString),
+                        Param(DataType::kStringList, Cardinality::kOptional)},
+                       std::move(search_eval))
+          .ValueOrDie();
+  registered_functions_.insert(
+      {search_function.name(), std::move(search_function)});
+
+  // DocHitInfoIterator propertyDefined(std::string);
+  auto property_defined = [this](std::vector<PendingValue>&& args) {
+    return this->PropertyDefinedFunction(std::move(args));
+  };
+  Function property_defined_function =
+      Function::Create(DataType::kDocumentIterator, "propertyDefined",
+                       {Param(DataType::kString)}, std::move(property_defined))
+          .ValueOrDie();
+  registered_functions_.insert(
+      {property_defined_function.name(), std::move(property_defined_function)});
+
+  // DocHitInfoIterator hasProperty(std::string);
+  auto has_property = [this](std::vector<PendingValue>&& args) {
+    return this->HasPropertyFunction(std::move(args));
+  };
+  Function has_property_function =
+      Function::Create(DataType::kDocumentIterator, "hasProperty",
+                       {Param(DataType::kString)}, std::move(has_property))
+          .ValueOrDie();
+  registered_functions_.insert(
+      {has_property_function.name(), std::move(has_property_function)});
+}
+
+libtextclassifier3::StatusOr<PendingValue> QueryVisitor::SearchFunction(
+    std::vector<PendingValue>&& args) {
+  // The second arg (if present) is a list of sections to restrict to.
+  if (args.size() == 2) {
+    std::set<std::string> new_restricts;
+    std::vector<std::string> property_restricts =
+        std::move(args.at(1)).string_vals().ValueOrDie();
+    for (std::string& property_restrict : property_restricts) {
+      new_restricts.insert(std::move(property_restrict));
+    }
+    pending_property_restricts_.AddValidRestricts(std::move(new_restricts));
+    if (pending_property_restricts_.active_property_restricts().empty()) {
+      pending_property_restricts_.PopRestricts();
+      return PendingValue(std::make_unique<DocHitInfoIteratorNone>());
+    }
+  }
+
+  // The first arg is guaranteed to be a STRING at this point. It should be safe
+  // to call ValueOrDie.
+  const QueryTerm* query = args.at(0).string_val().ValueOrDie();
+  Lexer lexer(query->term, Lexer::Language::QUERY);
+  ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
+                         lexer.ExtractTokens());
+
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> tree_root,
+                         parser.ConsumeQuery());
+
+  std::unique_ptr<DocHitInfoIterator> iterator;
+  QueryResults query_result;
+  if (tree_root == nullptr) {
+    iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+        document_store_.last_added_document_id());
+  } else {
+    QueryVisitor query_visitor(
+        &index_, &numeric_index_, &document_store_, &schema_store_,
+        &normalizer_, &tokenizer_, query->raw_term, filter_options_,
+        match_type_, needs_term_frequency_info_, pending_property_restricts_,
+        processing_not_, current_time_ms_);
+    tree_root->Accept(&query_visitor);
+    ICING_ASSIGN_OR_RETURN(query_result,
+                           std::move(query_visitor).ConsumeResults());
+    iterator = std::move(query_result.root_iterator);
+  }
+
+  // Update members based on results of processing the query.
+  if (args.size() == 2 &&
+      pending_property_restricts_.has_active_property_restricts()) {
+    iterator = DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+        std::move(iterator), &document_store_, &schema_store_,
+        pending_property_restricts_.active_property_restricts(),
+        current_time_ms_);
+    pending_property_restricts_.PopRestricts();
+  }
+  if (!processing_not_) {
+    std::move(
+        query_result.query_term_iterators.begin(),
+        query_result.query_term_iterators.end(),
+        std::inserter(query_term_iterators_, query_term_iterators_.end()));
+
+    std::move(query_result.query_terms.begin(), query_result.query_terms.end(),
+              std::inserter(property_query_terms_map_,
+                            property_query_terms_map_.end()));
+  }
+  std::move(query_result.features_in_use.begin(),
+            query_result.features_in_use.end(),
+            std::inserter(features_, features_.end()));
+  return PendingValue(std::move(iterator));
+}
+
+libtextclassifier3::StatusOr<PendingValue>
+QueryVisitor::PropertyDefinedFunction(std::vector<PendingValue>&& args) {
+  // The first arg is guaranteed to be a STRING at this point. It should be safe
+  // to call ValueOrDie.
+  const QueryTerm* member = args.at(0).string_val().ValueOrDie();
+
+  std::unique_ptr<DocHitInfoIterator> all_docs_iterator =
+      std::make_unique<DocHitInfoIteratorAllDocumentId>(
+          document_store_.last_added_document_id());
+
+  std::set<std::string> target_sections = {std::move(member->term)};
+  std::unique_ptr<DocHitInfoIterator> property_in_schema_iterator =
+      std::make_unique<DocHitInfoIteratorPropertyInSchema>(
+          std::move(all_docs_iterator), &document_store_, &schema_store_,
+          std::move(target_sections), current_time_ms_);
+
+  features_.insert(kListFilterQueryLanguageFeature);
+
+  return PendingValue(std::move(property_in_schema_iterator));
+}
+
+libtextclassifier3::StatusOr<PendingValue> QueryVisitor::HasPropertyFunction(
+    std::vector<PendingValue>&& args) {
+  // The first arg is guaranteed to be a STRING at this point. It should be safe
+  // to call ValueOrDie.
+  const std::string& property_path = args.at(0).string_val().ValueOrDie()->term;
+
+  // Perform an exact search for the property existence metadata token.
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<DocHitInfoIterator> meta_hit_iterator,
+      index_.GetIterator(
+          absl_ports::StrCat(kPropertyExistenceTokenPrefix, property_path),
+          /*term_start_index=*/0,
+          /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+          TermMatchType::EXACT_ONLY,
+          /*need_hit_term_frequency=*/false));
+
+  std::unique_ptr<DocHitInfoIterator> property_in_document_iterator =
+      std::make_unique<DocHitInfoIteratorPropertyInDocument>(
+          std::move(meta_hit_iterator));
+
+  features_.insert(kHasPropertyFunctionFeature);
+
+  return PendingValue(std::move(property_in_document_iterator));
+}
+
+libtextclassifier3::StatusOr<int64_t> QueryVisitor::PopPendingIntValue() {
+  if (pending_values_.empty()) {
+    return absl_ports::InvalidArgumentError("Unable to retrieve int value.");
+  }
+  ICING_ASSIGN_OR_RETURN(int64_t int_value, pending_values_.top().long_val());
+  pending_values_.pop();
+  return int_value;
+}
+
+libtextclassifier3::StatusOr<QueryTerm> QueryVisitor::PopPendingStringValue() {
+  if (pending_values_.empty()) {
+    return absl_ports::InvalidArgumentError("Unable to retrieve string value.");
+  }
+  ICING_ASSIGN_OR_RETURN(QueryTerm string_value,
+                         std::move(pending_values_.top()).string_val());
+  pending_values_.pop();
+  return string_value;
+}
+
+libtextclassifier3::StatusOr<QueryTerm> QueryVisitor::PopPendingTextValue() {
+  if (pending_values_.empty()) {
+    return absl_ports::InvalidArgumentError("Unable to retrieve text value.");
+  }
+  ICING_ASSIGN_OR_RETURN(QueryTerm text_value,
+                         std::move(pending_values_.top()).text_val());
+  pending_values_.pop();
+  return text_value;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+QueryVisitor::PopPendingIterator() {
+  if (pending_values_.empty() || pending_values_.top().is_placeholder()) {
+    return absl_ports::InvalidArgumentError("Unable to retrieve iterator.");
+  }
+  if (pending_values_.top().data_type() == DataType::kDocumentIterator) {
+    std::unique_ptr<DocHitInfoIterator> iterator =
+        std::move(pending_values_.top()).iterator().ValueOrDie();
+    pending_values_.pop();
+    return iterator;
+  } else if (pending_values_.top().data_type() == DataType::kString) {
+    features_.insert(kVerbatimSearchFeature);
+    ICING_ASSIGN_OR_RETURN(QueryTerm string_value, PopPendingStringValue());
+    return CreateTermIterator(std::move(string_value));
+  } else {
+    ICING_ASSIGN_OR_RETURN(QueryTerm text_value, PopPendingTextValue());
+    ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> token_itr,
+                           tokenizer_.Tokenize(text_value.term));
+    std::string normalized_term;
+    std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+    // The tokenizer will produce 1+ tokens out of the text. The prefix operator
+    // only applies to the final token.
+    bool reached_final_token = !token_itr->Advance();
+    // raw_text is the portion of text_value.raw_term that hasn't yet been
+    // matched to any of the tokens that we've processed. escaped_token will
+    // hold the portion of raw_text that corresponds to the current token that
+    // is being processed.
+    std::string_view raw_text = text_value.raw_term;
+    std::string_view raw_token;
+    while (!reached_final_token) {
+      std::vector<Token> tokens = token_itr->GetTokens();
+      if (tokens.size() > 1) {
+        // The tokenizer iterator iterates between token groups. In practice,
+        // the tokenizer used with QueryVisitor (PlainTokenizer) will always
+        // only produce a single token per token group.
+        return absl_ports::InvalidArgumentError(
+            "Encountered unexpected token group with >1 tokens.");
+      }
+
+      reached_final_token = !token_itr->Advance();
+      const Token& token = tokens.at(0);
+      if (reached_final_token && token.text.length() == raw_text.length()) {
+        // Unescaped tokens are strictly smaller than their escaped counterparts
+        // This means that if we're at the final token and token.length equals
+        // raw_text, then all of raw_text must correspond to this token.
+        raw_token = raw_text;
+      } else {
+        ICING_ASSIGN_OR_RETURN(raw_token, string_util::FindEscapedToken(
+                                                  raw_text, token.text));
+      }
+      normalized_term = normalizer_.NormalizeTerm(token.text);
+      QueryTerm term_value{std::move(normalized_term), raw_token,
+                           reached_final_token && text_value.is_prefix_val};
+      ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> iterator,
+                             CreateTermIterator(std::move(term_value)));
+      iterators.push_back(std::move(iterator));
+
+      // Remove escaped_token from raw_text now that we've processed
+      // raw_text.
+      const char* escaped_token_end = raw_token.data() + raw_token.length();
+      raw_text = raw_text.substr(escaped_token_end - raw_text.data());
+    }
+
+    // Finally, create an And Iterator. If there's only a single term here, then
+    // it will just return that term iterator. Otherwise, segmented text is
+    // treated as a group of terms AND'd together.
+    return CreateAndIterator(std::move(iterators));
+  }
+}
+
+libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DocHitInfoIterator>>>
+QueryVisitor::PopAllPendingIterators() {
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  while (!pending_values_.empty() && !pending_values_.top().is_placeholder()) {
+    ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> itr,
+                           PopPendingIterator());
+    iterators.push_back(std::move(itr));
+  }
+  if (pending_values_.empty()) {
+    return absl_ports::InvalidArgumentError(
+        "Unable to retrieve expected iterators.");
+  }
+  // Iterators will be in reverse order because we retrieved them from the
+  // stack. Reverse them to get back to the original ordering.
+  std::reverse(iterators.begin(), iterators.end());
+  return iterators;
+}
+
+libtextclassifier3::Status QueryVisitor::ProcessNumericComparator(
+    const NaryOperatorNode* node) {
+  if (node->children().size() != 2) {
+    return absl_ports::InvalidArgumentError("Expected 2 children.");
+  }
+
+  // 1. Put in a placeholder PendingValue
+  pending_values_.push(PendingValue());
+
+  // 2. The first child is the property to restrict by.
+  node->children().at(0)->Accept(this);
+  if (has_pending_error()) {
+    return std::move(pending_error_);
+  }
+  ICING_ASSIGN_OR_RETURN(QueryTerm text_value, PopPendingTextValue());
+
+  if (text_value.is_prefix_val) {
+    return absl_ports::InvalidArgumentError(
+        "Cannot use prefix operator '*' with a property name!");
+  }
+
+  // If there is an active property restrict and this property is not present in
+  // in the active restrict set, then it's not satisfiable.
+  if (pending_property_restricts_.has_active_property_restricts() &&
+      pending_property_restricts_.active_property_restricts().find(
+          text_value.term) ==
+          pending_property_restricts_.active_property_restricts().end()) {
+    // The property restrict can't be satisfiable. Pop the placeholder that was
+    // just added and push a FALSE iterator.
+    pending_property_restricts_.PopRestricts();
+    pending_values_.pop();
+    pending_values_.push(
+        PendingValue(std::make_unique<DocHitInfoIteratorNone>()));
+    return libtextclassifier3::Status::OK;
+  }
+
+  // 3. The second child should be parseable as an integer value.
+  expecting_numeric_arg_ = true;
+  node->children().at(1)->Accept(this);
+  expecting_numeric_arg_ = false;
+  ICING_ASSIGN_OR_RETURN(int64_t int_value, PopPendingIntValue());
+
+  // 4. Check for the placeholder.
+  if (!pending_values_.top().is_placeholder()) {
+    return absl_ports::InvalidArgumentError(
+        "Error processing arguments for node.");
+  }
+  pending_values_.pop();
+
+  // 5. Create the iterator and push it onto pending_values_.
+  ICING_ASSIGN_OR_RETURN(Int64Range range,
+                         GetInt64Range(node->operator_text(), int_value));
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> iterator,
+                         numeric_index_.GetIterator(
+                             text_value.term, range.low, range.high,
+                             document_store_, schema_store_, current_time_ms_));
+
+  features_.insert(kNumericSearchFeature);
+  pending_values_.push(PendingValue(std::move(iterator)));
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<PendingValue> QueryVisitor::ProcessAndOperator(
+    const NaryOperatorNode* node) {
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<std::unique_ptr<DocHitInfoIterator>> iterators,
+      PopAllPendingIterators());
+  return PendingValue(CreateAndIterator(std::move(iterators)));
+}
+
+libtextclassifier3::StatusOr<PendingValue> QueryVisitor::ProcessOrOperator(
+    const NaryOperatorNode* node) {
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<std::unique_ptr<DocHitInfoIterator>> iterators,
+      PopAllPendingIterators());
+  return PendingValue(CreateOrIterator(std::move(iterators)));
+}
+
+libtextclassifier3::Status QueryVisitor::ProcessNegationOperator(
+    const UnaryOperatorNode* node) {
+  // 1. Put in a placeholder PendingValue
+  pending_values_.push(PendingValue());
+
+  // 2. Visit child
+  node->child()->Accept(this);
+  if (has_pending_error()) {
+    return std::move(pending_error_);
+  }
+
+  if (pending_values_.size() < 2) {
+    return absl_ports::InvalidArgumentError(
+        "Visit unary operator child didn't correctly add pending values.");
+  }
+
+  // 3. We want to preserve the original text of the integer value, append our
+  // minus and *then* parse as an int.
+  ICING_ASSIGN_OR_RETURN(QueryTerm int_text_val, PopPendingTextValue());
+  int_text_val.term = absl_ports::StrCat("-", int_text_val.term);
+  PendingValue pending_value =
+      PendingValue::CreateTextPendingValue(std::move(int_text_val));
+  ICING_RETURN_IF_ERROR(pending_value.long_val());
+
+  // We've parsed our integer value successfully. Pop our placeholder, push it
+  // on to the stack and return successfully.
+  if (!pending_values_.top().is_placeholder()) {
+    return absl_ports::InvalidArgumentError(
+        "Error processing arguments for node.");
+  }
+  pending_values_.pop();
+  pending_values_.push(std::move(pending_value));
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QueryVisitor::ProcessNotOperator(
+    const UnaryOperatorNode* node) {
+  // TODO(b/265312785) Consider implementing query optimization when we run into
+  // nested NOTs. This would allow us to simplify a query like "NOT (-foo)" to
+  // just "foo". This would also require more complicate rewrites as we would
+  // need to do things like rewrite "NOT (-a OR b)" as "a AND -b" and
+  // "NOT (price < 5)" as "price >= 5".
+  // 1. Put in a placeholder PendingValue
+  pending_values_.push(PendingValue());
+  // Toggle whatever the current value of 'processing_not_' is before visiting
+  // the children.
+  processing_not_ = !processing_not_;
+
+  // 2. Visit child
+  node->child()->Accept(this);
+  if (has_pending_error()) {
+    return std::move(pending_error_);
+  }
+
+  if (pending_values_.size() < 2) {
+    return absl_ports::InvalidArgumentError(
+        "Visit unary operator child didn't correctly add pending values.");
+  }
+
+  // 3. Retrieve the delegate iterator
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> delegate,
+                         PopPendingIterator());
+
+  // 4. Check for the placeholder.
+  if (!pending_values_.top().is_placeholder()) {
+    return absl_ports::InvalidArgumentError(
+        "Error processing arguments for node.");
+  }
+  pending_values_.pop();
+
+  pending_values_.push(PendingValue(std::make_unique<DocHitInfoIteratorNot>(
+      std::move(delegate), document_store_.last_added_document_id())));
+
+  // Untoggle whatever the current value of 'processing_not_' is now that we've
+  // finished processing this NOT.
+  processing_not_ = !processing_not_;
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QueryVisitor::ProcessHasOperator(
+    const NaryOperatorNode* node) {
+  if (node->children().size() != 2) {
+    return absl_ports::InvalidArgumentError("Expected 2 children.");
+  }
+
+  // 1. Put in a placeholder PendingValue
+  pending_values_.push(PendingValue());
+
+  // 2. Visit the first child - the property.
+  node->children().at(0)->Accept(this);
+  if (has_pending_error()) {
+    return pending_error_;
+  }
+  ICING_ASSIGN_OR_RETURN(QueryTerm text_value, PopPendingTextValue());
+  if (text_value.is_prefix_val) {
+    return absl_ports::InvalidArgumentError(
+        "Cannot use prefix operator '*' with a property name!");
+  }
+  pending_property_restricts_.AddValidRestricts({text_value.term});
+
+  // Just added a restrict - if there are no active property restricts then that
+  // be because this restrict is unsatisfiable.
+  if (pending_property_restricts_.active_property_restricts().empty()) {
+    // The property restrict can't be satisfiable. Pop the placeholder that was
+    // just added and push a FALSE iterator.
+    pending_property_restricts_.PopRestricts();
+    pending_values_.pop();
+    pending_values_.push(
+        PendingValue(std::make_unique<DocHitInfoIteratorNone>()));
+    return libtextclassifier3::Status::OK;
+  }
+
+  // 3. Visit the second child - the argument.
+  node->children().at(1)->Accept(this);
+  if (has_pending_error()) {
+    return pending_error_;
+  }
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> delegate,
+                         PopPendingIterator());
+
+  // 4. Check for the placeholder.
+  if (!pending_values_.top().is_placeholder()) {
+    return absl_ports::InvalidArgumentError(
+        "Error processing arguments for node.");
+  }
+  pending_values_.pop();
+  pending_property_restricts_.PopRestricts();
+
+  std::set<std::string> property_restricts = {std::move(text_value.term)};
+  pending_values_.push(
+      PendingValue(DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+          std::move(delegate), &document_store_, &schema_store_,
+          std::move(property_restricts), current_time_ms_)));
+  return libtextclassifier3::Status::OK;
+}
+
+void QueryVisitor::VisitFunctionName(const FunctionNameNode* node) {
+  pending_error_ = absl_ports::UnimplementedError(
+      "Function Name node visiting not implemented yet.");
+}
+
+void QueryVisitor::VisitString(const StringNode* node) {
+  // A STRING node can only be a term. Create the iterator now.
+  auto unescaped_string_or = string_util::UnescapeStringValue(node->value());
+  if (!unescaped_string_or.ok()) {
+    pending_error_ = std::move(unescaped_string_or).status();
+    return;
+  }
+  std::string unescaped_string = std::move(unescaped_string_or).ValueOrDie();
+  QueryTerm val{std::move(unescaped_string), node->raw_value(),
+                node->is_prefix()};
+  pending_values_.push(PendingValue::CreateStringPendingValue(std::move(val)));
+}
+
+void QueryVisitor::VisitText(const TextNode* node) {
+  // TEXT nodes could either be a term (and will become DocHitInfoIteratorTerm)
+  // or a property name. As such, we just push the TEXT value into pending
+  // values and determine which it is at a later point.
+  QueryTerm val{std::move(node->value()), node->raw_value(), node->is_prefix()};
+  pending_values_.push(PendingValue::CreateTextPendingValue(std::move(val)));
+}
+
+void QueryVisitor::VisitMember(const MemberNode* node) {
+  if (node->children().empty()) {
+    pending_error_ =
+        absl_ports::InvalidArgumentError("Encountered malformed member node.");
+    return;
+  }
+
+  // 1. Put in a placeholder PendingValue
+  pending_values_.push(PendingValue());
+
+  // 2. Visit the children.
+  for (const std::unique_ptr<TextNode>& child : node->children()) {
+    child->Accept(this);
+    if (has_pending_error()) {
+      return;
+    }
+  }
+
+  // 3. Now process the results of the children and produce a single pending
+  //    value representing this member.
+  PendingValue pending_value;
+  if (node->children().size() == 1) {
+    // 3a. This member only has a single child, then the pending value produced
+    //    by that child is the final value produced by this member.
+    pending_value = std::move(pending_values_.top());
+    pending_values_.pop();
+  } else {
+    // 3b. Retrieve the values of all children and concatenate them into a
+    // single value.
+    libtextclassifier3::StatusOr<QueryTerm> member_or;
+    std::vector<std::string> members;
+    QueryTerm text_val;
+    const char* start = nullptr;
+    const char* end = nullptr;
+    while (!pending_values_.empty() &&
+           !pending_values_.top().is_placeholder()) {
+      member_or = PopPendingTextValue();
+      if (!member_or.ok()) {
+        pending_error_ = std::move(member_or).status();
+        return;
+      }
+      text_val = std::move(member_or).ValueOrDie();
+      if (text_val.is_prefix_val) {
+        pending_error_ = absl_ports::InvalidArgumentError(
+            "Cannot use prefix operator '*' within a property name!");
+        return;
+      }
+      if (start == nullptr) {
+        start = text_val.raw_term.data();
+        end = text_val.raw_term.data() + text_val.raw_term.length();
+      } else {
+        start = std::min(start, text_val.raw_term.data());
+        end = std::max(end, text_val.raw_term.data() + text_val.raw_term.length());
+      }
+      members.push_back(std::move(text_val.term));
+    }
+    QueryTerm member;
+    member.term = absl_ports::StrJoin(members.rbegin(), members.rend(),
+                                      property_util::kPropertyPathSeparator);
+    member.raw_term = std::string_view(start, end - start);
+    member.is_prefix_val = false;
+    pending_value = PendingValue::CreateTextPendingValue(std::move(member));
+  }
+
+  // 4. If pending_values_ is empty somehow, then our placeholder disappeared
+  // somehow.
+  if (pending_values_.empty()) {
+    pending_error_ = absl_ports::InvalidArgumentError(
+        "Error processing arguments for member node.");
+    return;
+  }
+  pending_values_.pop();
+
+  pending_values_.push(std::move(pending_value));
+}
+
+void QueryVisitor::VisitFunction(const FunctionNode* node) {
+  // 1. Get the associated function.
+  auto itr = registered_functions_.find(node->function_name()->value());
+  if (itr == registered_functions_.end()) {
+    pending_error_ = absl_ports::InvalidArgumentError(absl_ports::StrCat(
+        "Function ", node->function_name()->value(), " is not supported."));
+    return;
+  }
+
+  // 2. Put in a placeholder PendingValue
+  pending_values_.push(PendingValue());
+
+  // 3. Visit the children.
+  for (const std::unique_ptr<Node>& arg : node->args()) {
+    arg->Accept(this);
+    if (has_pending_error()) {
+      return;
+    }
+  }
+
+  // 4. Collect the arguments and evaluate the function.
+  std::vector<PendingValue> args;
+  while (!pending_values_.empty() && !pending_values_.top().is_placeholder()) {
+    args.push_back(std::move(pending_values_.top()));
+    pending_values_.pop();
+  }
+  std::reverse(args.begin(), args.end());
+  const Function& function = itr->second;
+  auto eval_result = function.Eval(std::move(args));
+  if (!eval_result.ok()) {
+    pending_error_ = std::move(eval_result).status();
+    return;
+  }
+
+  // 5. Pop placeholder in pending_values and add the result of our function.
+  pending_values_.pop();
+  pending_values_.push(std::move(eval_result).ValueOrDie());
+
+  // Support for custom functions was added in list filters.
+  features_.insert(kListFilterQueryLanguageFeature);
+}
+
+// TODO(b/265312785) Clarify handling of the interaction between HAS and NOT.
+// Currently, `prop1:(NOT foo bar)` will not match any documents. Likewise,
+// `search("NOT foo bar", createList("prop1"))` will not match any documents.
+//
+// We should either confirm that this is the desired behavior or consider
+// rewriting these queries so that they're interpreted as
+// `NOT prop1:foo AND prop1:bar` and
+// `NOT search("foo", createList("prop1"))
+//  AND search("bar", createList("prop1"))`
+void QueryVisitor::VisitUnaryOperator(const UnaryOperatorNode* node) {
+  bool is_minus = node->operator_text() == "MINUS";
+  if (node->operator_text() != "NOT" && !is_minus) {
+    pending_error_ = absl_ports::UnimplementedError(
+        absl_ports::StrCat("Visiting for unary operator ",
+                           node->operator_text(), " not implemented yet."));
+    return;
+  }
+
+  libtextclassifier3::Status status;
+  if (expecting_numeric_arg_ && is_minus) {
+    // If the operator is a MINUS ('-') and we're at the child of a numeric
+    // comparator, then this must be a negation ('-3')
+    status = ProcessNegationOperator(node);
+  } else {
+    status = ProcessNotOperator(node);
+  }
+
+  if (!status.ok()) {
+    pending_error_ = std::move(status);
+  }
+
+  if (!is_minus ||
+      pending_property_restricts_.has_active_property_restricts() ||
+      processing_not_) {
+    // 'NOT' operator was added in list filters.
+    // Likewise, mixing property restricts and NOTs were made valid in list
+    // filters.
+    features_.insert(kListFilterQueryLanguageFeature);
+  }
+}
+
+void QueryVisitor::VisitNaryOperator(const NaryOperatorNode* node) {
+  if (!IsSupportedNaryOperator(node->operator_text())) {
+    pending_error_ = absl_ports::UnimplementedError(
+        "No support for any non-numeric operators.");
+    return;
+  }
+
+  if (pending_property_restricts_.has_active_property_restricts() ||
+      processing_not_) {
+    // Likewise, mixing property restricts and NOT with compound statements was
+    // added in list filters.
+    features_.insert(kListFilterQueryLanguageFeature);
+  }
+
+  if (node->operator_text() == ":") {
+    libtextclassifier3::Status status = ProcessHasOperator(node);
+    if (!status.ok()) {
+      pending_error_ = std::move(status);
+    }
+    return;
+  } else if (IsNumericComparator(node->operator_text())) {
+    libtextclassifier3::Status status = ProcessNumericComparator(node);
+    if (!status.ok()) {
+      pending_error_ = std::move(status);
+    }
+    return;
+  }
+
+  // 1. Put in a placeholder PendingValue
+  pending_values_.push(PendingValue());
+
+  // 2. Visit the children.
+  for (int i = 0; i < node->children().size(); ++i) {
+    node->children().at(i)->Accept(this);
+    if (has_pending_error()) {
+      return;
+    }
+  }
+
+  // 3. Retrieve the pending value for this node.
+  libtextclassifier3::StatusOr<PendingValue> pending_value_or;
+  if (node->operator_text() == "AND") {
+    pending_value_or = ProcessAndOperator(node);
+  } else if (node->operator_text() == "OR") {
+    pending_value_or = ProcessOrOperator(node);
+  }
+  if (!pending_value_or.ok()) {
+    pending_error_ = std::move(pending_value_or).status();
+    return;
+  }
+  PendingValue pending_value = std::move(pending_value_or).ValueOrDie();
+
+  // 4. Check for the placeholder.
+  if (!pending_values_.top().is_placeholder()) {
+    pending_error_ = absl_ports::InvalidArgumentError(
+        "Error processing arguments for node.");
+    return;
+  }
+  pending_values_.pop();
+
+  pending_values_.push(std::move(pending_value));
+}
+
+libtextclassifier3::StatusOr<QueryResults> QueryVisitor::ConsumeResults() && {
+  if (has_pending_error()) {
+    return std::move(pending_error_);
+  }
+  if (pending_values_.size() != 1) {
+    return absl_ports::InvalidArgumentError(
+        "Visitor does not contain a single root iterator.");
+  }
+  auto iterator_or = PopPendingIterator();
+  if (!iterator_or.ok()) {
+    return std::move(iterator_or).status();
+  }
+
+  QueryResults results;
+  results.root_iterator = std::move(iterator_or).ValueOrDie();
+  results.query_term_iterators = std::move(query_term_iterators_);
+  results.query_terms = std::move(property_query_terms_map_);
+  results.features_in_use = std::move(features_);
+  return results;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/query/advanced_query_parser/query-visitor.h b/icing/query/advanced_query_parser/query-visitor.h
new file mode 100644
index 0000000..d090b3c
--- /dev/null
+++ b/icing/query/advanced_query_parser/query-visitor.h
@@ -0,0 +1,327 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_QUERY_VISITOR_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER_QUERY_VISITOR_H_
+
+#include <cstdint>
+#include <memory>
+#include <stack>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator-filter.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
+#include "icing/query/advanced_query_parser/function.h"
+#include "icing/query/advanced_query_parser/pending-value.h"
+#include "icing/query/query-features.h"
+#include "icing/query/query-results.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
+#include "icing/tokenization/tokenizer.h"
+#include "icing/transform/normalizer.h"
+
+namespace icing {
+namespace lib {
+
+// The Visitor used to create the DocHitInfoIterator tree from the AST output by
+// the parser.
+class QueryVisitor : public AbstractSyntaxTreeVisitor {
+ public:
+  explicit QueryVisitor(Index* index,
+                        const NumericIndex<int64_t>* numeric_index,
+                        const DocumentStore* document_store,
+                        const SchemaStore* schema_store,
+                        const Normalizer* normalizer,
+                        const Tokenizer* tokenizer,
+                        std::string_view raw_query_text,
+                        DocHitInfoIteratorFilter::Options filter_options,
+                        TermMatchType::Code match_type,
+                        bool needs_term_frequency_info, int64_t current_time_ms)
+      : QueryVisitor(index, numeric_index, document_store, schema_store,
+                     normalizer, tokenizer, raw_query_text, filter_options,
+                     match_type, needs_term_frequency_info,
+                     PendingPropertyRestricts(),
+                     /*processing_not=*/false, current_time_ms) {}
+
+  void VisitFunctionName(const FunctionNameNode* node) override;
+  void VisitString(const StringNode* node) override;
+  void VisitText(const TextNode* node) override;
+  void VisitMember(const MemberNode* node) override;
+  void VisitFunction(const FunctionNode* node) override;
+  void VisitUnaryOperator(const UnaryOperatorNode* node) override;
+  void VisitNaryOperator(const NaryOperatorNode* node) override;
+
+  // RETURNS:
+  //   - the QueryResults reflecting the AST that was visited
+  //   - INVALID_ARGUMENT if the AST does not conform to supported expressions
+  //   - NOT_FOUND if the AST refers to a property that does not exist
+  libtextclassifier3::StatusOr<QueryResults> ConsumeResults() &&;
+
+ private:
+  // An internal class to help manage property restricts being applied at
+  // different levels.
+  class PendingPropertyRestricts {
+   public:
+    // Add another set of property restricts. Elements of new_restricts that are
+    // not present in active_property_rest
+    void AddValidRestricts(std::set<std::string> new_restricts);
+
+    // Pops the most recently added set of property restricts.
+    void PopRestricts() {
+      if (has_active_property_restricts()) {
+        pending_property_restricts_.pop_back();
+      }
+    }
+
+    bool has_active_property_restricts() const {
+      return !pending_property_restricts_.empty();
+    }
+
+    // The set of all property restrictions that are currently being applied.
+    const std::set<std::string>& active_property_restricts() const {
+      return pending_property_restricts_.back();
+    }
+
+   private:
+    std::vector<std::set<std::string>> pending_property_restricts_;
+  };
+
+  explicit QueryVisitor(
+      Index* index, const NumericIndex<int64_t>* numeric_index,
+      const DocumentStore* document_store, const SchemaStore* schema_store,
+      const Normalizer* normalizer, const Tokenizer* tokenizer,
+      std::string_view raw_query_text,
+      DocHitInfoIteratorFilter::Options filter_options,
+      TermMatchType::Code match_type, bool needs_term_frequency_info,
+      PendingPropertyRestricts pending_property_restricts, bool processing_not,
+      int64_t current_time_ms)
+      : index_(*index),
+        numeric_index_(*numeric_index),
+        document_store_(*document_store),
+        schema_store_(*schema_store),
+        normalizer_(*normalizer),
+        tokenizer_(*tokenizer),
+        raw_query_text_(raw_query_text),
+        filter_options_(std::move(filter_options)),
+        match_type_(match_type),
+        needs_term_frequency_info_(needs_term_frequency_info),
+        pending_property_restricts_(std::move(pending_property_restricts)),
+        processing_not_(processing_not),
+        expecting_numeric_arg_(false),
+        current_time_ms_(current_time_ms) {
+    RegisterFunctions();
+  }
+
+  bool has_pending_error() const { return !pending_error_.ok(); }
+
+  // Creates a DocHitInfoIterator reflecting the provided term and whether the
+  // prefix operator has been applied to this term. Also populates,
+  // property_query_terms_map_ and query_term_iterators_ as appropriate.
+  // Returns:
+  //   - On success, a DocHitInfoIterator for the provided term
+  //   - INVALID_ARGUMENT if unable to create an iterator for the term.
+  libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+  CreateTermIterator(const QueryTerm& term);
+
+  // Processes the PendingValue at the top of pending_values_, parses it into a
+  // int64_t and pops the top.
+  // Returns:
+  //   - On success, the int value stored in the text at the top
+  //   - INVALID_ARGUMENT if pending_values_ is empty, doesn't hold a text or
+  //     can't be parsed as an int.
+  libtextclassifier3::StatusOr<int64_t> PopPendingIntValue();
+
+  // Processes the PendingValue at the top of pending_values_ and pops the top.
+  // Returns:
+  //   - On success, the string value stored in the text at the top and a bool
+  //     indicating whether or not the string value has a prefix operator.
+  //   - INVALID_ARGUMENT if pending_values_ is empty or doesn't hold a string.
+  libtextclassifier3::StatusOr<QueryTerm> PopPendingStringValue();
+
+  // Processes the PendingValue at the top of pending_values_ and pops the top.
+  // Returns:
+  //   - On success, the string value stored in the text at the top
+  //     indicating whether or not the string value has a prefix operator.
+  //   - INVALID_ARGUMENT if pending_values_ is empty or doesn't hold a text.
+  libtextclassifier3::StatusOr<QueryTerm> PopPendingTextValue();
+
+  // Processes the PendingValue at the top of pending_values_ and pops the top.
+  // Returns:
+  //   - On success, a DocHitInfoIterator representing for the term at the top
+  //   - INVALID_ARGUMENT if pending_values_ is empty or if unable to create an
+  //       iterator for the term.
+  libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+  PopPendingIterator();
+
+  // Processes all PendingValues at the top of pending_values_ until the first
+  // placeholder is encounter.
+  // Returns:
+  //   - On success, a vector containing all DocHitInfoIterators representing
+  //     the values at the top of pending_values_
+  //   - INVALID_ARGUMENT if pending_values_is empty or if unable to create an
+  //       iterator for any of the terms at the top of pending_values_
+  libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DocHitInfoIterator>>>
+  PopAllPendingIterators();
+
+  // Processes the unary operator node as a NOT operator. A NOT can have an
+  // operator type of "NOT" or "MINUS"
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INVALID_ARGUMENT if any errors are encountered while processing
+  //     node->child
+  libtextclassifier3::Status ProcessNotOperator(const UnaryOperatorNode* node);
+
+  // Processes the unary operator node as a negation operator. A negation
+  // operator should have an operator of type "MINUS" and it's children must
+  // resolve to a numeric value.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INVALID_ARGUMENT if the node->child can't be resolved to a numeric
+  //     value.
+  libtextclassifier3::Status ProcessNegationOperator(
+      const UnaryOperatorNode* node);
+
+  // Processes the NumericComparator represented by node. This must be called
+  // *after* this node's children have been visited. The PendingValues added by
+  // this node's children will be consumed by this function and the PendingValue
+  // for this node will be returned.
+  // Returns:
+  //   - On success, OK
+  //   - INVALID_ARGUMENT if unable to retrieve string value or int value
+  //   - NOT_FOUND if there is no entry in the numeric index for the property
+  libtextclassifier3::Status ProcessNumericComparator(
+      const NaryOperatorNode* node);
+
+  // Processes the AND and OR operators represented by the node. This must be
+  // called *after* this node's children have been visited. The PendingValues
+  // added by this node's children will be consumed by this function and the
+  // PendingValue for this node will be returned.
+  // Returns:
+  //   - On success, then PendingValue representing this node and it's children.
+  //   - INVALID_ARGUMENT if unable to retrieve iterators for any of this node's
+  //       children.
+  libtextclassifier3::StatusOr<PendingValue> ProcessAndOperator(
+      const NaryOperatorNode* node);
+
+  // Processes the OR operator represented by the node. This must be called
+  // *after* this node's children have been visited. The PendingValues added by
+  // this node's children will be consumed by this function and the PendingValue
+  // for this node will be returned.
+  // Returns:
+  //   - On success, then PendingValue representing this node and it's children.
+  //   - INVALID_ARGUMENT if unable to retrieve iterators for any of this node's
+  //       children.
+  libtextclassifier3::StatusOr<PendingValue> ProcessOrOperator(
+      const NaryOperatorNode* node);
+
+  // Populates registered_functions with the currently supported set of
+  // functions.
+  void RegisterFunctions();
+
+  // Implementation of `search` custom function in the query language.
+  // Returns:
+  //   - a PendingValue holding the DocHitInfoIterator reflecting the query
+  //     provided to SearchFunction
+  //   - any errors returned by Lexer::ExtractTokens, Parser::ConsumeQuery or
+  //     QueryVisitor::ConsumeResults.
+  libtextclassifier3::StatusOr<PendingValue> SearchFunction(
+      std::vector<PendingValue>&& args);
+
+  // Implementation of the propertyDefined(property_path) custom function.
+  // Returns:
+  //   - a Pending Value holding a DocHitIterator that returns hits for all
+  //     documents whose schema types have defined the property specified by
+  //     property_path.
+  //   - any errors returned by Lexer::ExtractTokens
+  libtextclassifier3::StatusOr<PendingValue> PropertyDefinedFunction(
+      std::vector<PendingValue>&& args);
+
+  // Implementation of the hasProperty(property_path) custom function.
+  // Returns:
+  //   - a Pending Value holding a DocHitIterator that returns hits for all
+  //     documents that have the property specified by property_path.
+  //   - any errors returned by Lexer::ExtractTokens
+  libtextclassifier3::StatusOr<PendingValue> HasPropertyFunction(
+      std::vector<PendingValue>&& args);
+
+  // Handles a NaryOperatorNode where the operator is HAS (':') and pushes an
+  // iterator with the proper section filter applied. If the current property
+  // restriction represented by pending_property_restricts and the first child
+  // of this node is unsatisfiable (ex. `prop1:(prop2:foo)`), then a NONE
+  // iterator is returned immediately and subtree represented by the second
+  // child is not traversed.
+  //
+  // Returns:
+  //  - OK on success
+  //  - INVALID_ARGUMENT node does not have exactly two children or the two
+  //    children cannot be resolved to a MEMBER or an iterator respectively.
+  libtextclassifier3::Status ProcessHasOperator(const NaryOperatorNode* node);
+
+  // Returns the correct match type to apply based on both the match type and
+  // whether the prefix operator is currently present.
+  TermMatchType::Code GetTermMatchType(bool is_prefix) const {
+    return (is_prefix) ? TermMatchType::PREFIX : match_type_;
+  }
+
+  std::stack<PendingValue> pending_values_;
+  libtextclassifier3::Status pending_error_;
+
+  // A map from function name to Function instance.
+  std::unordered_map<std::string, Function> registered_functions_;
+
+  SectionRestrictQueryTermsMap property_query_terms_map_;
+
+  QueryTermIteratorsMap query_term_iterators_;
+  // Set of features invoked in the query.
+  std::unordered_set<Feature> features_;
+
+  Index& index_;                                // Does not own!
+  const NumericIndex<int64_t>& numeric_index_;  // Does not own!
+  const DocumentStore& document_store_;         // Does not own!
+  const SchemaStore& schema_store_;             // Does not own!
+  const Normalizer& normalizer_;                // Does not own!
+  const Tokenizer& tokenizer_;                  // Does not own!
+
+  std::string_view raw_query_text_;
+  DocHitInfoIteratorFilter::Options filter_options_;
+  TermMatchType::Code match_type_;
+  // Whether or not term_frequency information is needed. This affects:
+  //  - how DocHitInfoIteratorTerms are constructed
+  //  - whether the QueryTermIteratorsMap is populated in the QueryResults.
+  bool needs_term_frequency_info_;
+
+  // The stack of property restricts currently being processed by the visitor.
+  PendingPropertyRestricts pending_property_restricts_;
+  bool processing_not_;
+
+  // Whether we are in the midst of processing a subtree that is expected to
+  // resolve to a numeric argument.
+  bool expecting_numeric_arg_;
+
+  int64_t current_time_ms_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_ADVANCED_QUERY_PARSER_QUERY_VISITOR_H_
diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc
new file mode 100644
index 0000000..9455baa
--- /dev/null
+++ b/icing/query/advanced_query_parser/query-visitor_test.cc
@@ -0,0 +1,4112 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/advanced_query_parser/query-visitor.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator-filter.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/property-existence-indexing-handler.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/portable/platform.h"
+#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
+#include "icing/query/advanced_query_parser/lexer.h"
+#include "icing/query/advanced_query_parser/parser.h"
+#include "icing/query/query-features.h"
+#include "icing/query/query-results.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/tokenizer-factory.h"
+#include "icing/tokenization/tokenizer.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+#include "icing/util/status-macros.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+using ::testing::UnorderedElementsAre;
+
+constexpr DocumentId kDocumentId0 = 0;
+constexpr DocumentId kDocumentId1 = 1;
+constexpr DocumentId kDocumentId2 = 2;
+
+constexpr SectionId kSectionId0 = 0;
+constexpr SectionId kSectionId1 = 1;
+constexpr SectionId kSectionId2 = 2;
+
+template <typename T, typename U>
+std::vector<T> ExtractKeys(const std::unordered_map<T, U>& map) {
+  std::vector<T> keys;
+  keys.reserve(map.size());
+  for (const auto& [key, value] : map) {
+    keys.push_back(key);
+  }
+  return keys;
+}
+
+enum class QueryType {
+  kPlain,
+  kSearch,
+};
+
+class QueryVisitorTest : public ::testing::TestWithParam<QueryType> {
+ protected:
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/icing";
+    index_dir_ = test_dir_ + "/index";
+    numeric_index_dir_ = test_dir_ + "/numeric_index";
+    store_dir_ = test_dir_ + "/store";
+    schema_store_dir_ = test_dir_ + "/schema_store";
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(index_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+    jni_cache_ = GetTestJniCache();
+
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      // If we've specified using the reverse-JNI method for segmentation (i.e.
+      // not ICU), then we won't have the ICU data file included to set up.
+      // Technically, we could choose to use reverse-JNI for segmentation AND
+      // include an ICU data file, but that seems unlikely and our current BUILD
+      // setup doesn't do this.
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &clock_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, store_dir_, &clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    document_store_ = std::move(create_result.document_store);
+
+    Index::Options options(index_dir_.c_str(),
+                           /*index_merge_size=*/1024 * 1024,
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/1024 * 8);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        numeric_index_,
+        DummyNumericIndex<int64_t>::Create(filesystem_, numeric_index_dir_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
+                                                /*max_term_byte_size=*/1000));
+
+    language_segmenter_factory::SegmenterOptions segmenter_options(
+        ULOC_US, jni_cache_.get());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        language_segmenter_,
+        language_segmenter_factory::Create(segmenter_options));
+
+    ICING_ASSERT_OK_AND_ASSIGN(tokenizer_,
+                               tokenizer_factory::CreateIndexingTokenizer(
+                                   StringIndexingConfig::TokenizerType::PLAIN,
+                                   language_segmenter_.get()));
+  }
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Node>> ParseQueryHelper(
+      std::string_view query) {
+    Lexer lexer(query, Lexer::Language::QUERY);
+    ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
+                           lexer.ExtractTokens());
+    Parser parser = Parser::Create(std::move(lexer_tokens));
+    return parser.ConsumeQuery();
+  }
+
+  std::string EscapeString(std::string_view str) {
+    std::string result;
+    result.reserve(str.size());
+    for (char c : str) {
+      if (c == '\\' || c == '"') {
+        result.push_back('\\');
+      }
+      result.push_back(c);
+    }
+    return result;
+  }
+
+  std::string CreateQuery(std::string query,
+                          std::string property_restrict = "") {
+    switch (GetParam()) {
+      case QueryType::kPlain:
+        if (property_restrict.empty()) {
+          // CreateQuery("foo bar") returns `foo bar`
+          return query;
+        }
+        // CreateQuery("foo", "subject") returns `subject:foo`
+        return absl_ports::StrCat(property_restrict, ":", query);
+      case QueryType::kSearch:
+        query = EscapeString(query);
+        property_restrict = EscapeString(property_restrict);
+        if (property_restrict.empty()) {
+          // CreateQuery("foo bar") returns `search("foo bar")`
+          return absl_ports::StrCat("search(\"", query, "\")");
+        }
+        // CreateQuery("foo", "subject") returns
+        // `search("foo bar", createList("subject"))`
+        return absl_ports::StrCat("search(\"", query, "\", createList(\"",
+                                  property_restrict, "\"))");
+    }
+  }
+
+  Filesystem filesystem_;
+  IcingFilesystem icing_filesystem_;
+  std::string test_dir_;
+  std::string index_dir_;
+  std::string numeric_index_dir_;
+  std::string schema_store_dir_;
+  std::string store_dir_;
+  Clock clock_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+  std::unique_ptr<Index> index_;
+  std::unique_ptr<DummyNumericIndex<int64_t>> numeric_index_;
+  std::unique_ptr<Normalizer> normalizer_;
+  std::unique_ptr<LanguageSegmenter> language_segmenter_;
+  std::unique_ptr<Tokenizer> tokenizer_;
+  std::unique_ptr<const JniCache> jni_cache_;
+};
+
+TEST_P(QueryVisitorTest, SimpleLessThan) {
+  // Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
+  // respectively.
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      numeric_index_->Edit("price", kDocumentId0, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(0));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
+  ICING_ASSERT_OK(editor->BufferKey(1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
+  ICING_ASSERT_OK(editor->BufferKey(2));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  std::string query = CreateQuery("price < 2");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature));
+  }
+  // "price" is a property restrict here and "2" isn't a "term" - its a numeric
+  // value. So QueryTermIterators should be empty.
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+  EXPECT_THAT(query_results.query_terms, IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, SimpleLessThanEq) {
+  // Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
+  // respectively.
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      numeric_index_->Edit("price", kDocumentId0, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(0));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
+  ICING_ASSERT_OK(editor->BufferKey(1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
+  ICING_ASSERT_OK(editor->BufferKey(2));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  std::string query = CreateQuery("price <= 1");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature));
+  }
+  // "price" is a property restrict here and "1" isn't a "term" - its a numeric
+  // value. So QueryTermIterators should be empty.
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+  EXPECT_THAT(query_results.query_terms, IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, SimpleEqual) {
+  // Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
+  // respectively.
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      numeric_index_->Edit("price", kDocumentId0, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(0));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
+  ICING_ASSERT_OK(editor->BufferKey(1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
+  ICING_ASSERT_OK(editor->BufferKey(2));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  std::string query = CreateQuery("price == 2");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature));
+  }
+  // "price" is a property restrict here and "2" isn't a "term" - its a numeric
+  // value. So QueryTermIterators should be empty.
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+  EXPECT_THAT(query_results.query_terms, IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2));
+}
+
+TEST_P(QueryVisitorTest, SimpleGreaterThanEq) {
+  // Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
+  // respectively.
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      numeric_index_->Edit("price", kDocumentId0, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(0));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
+  ICING_ASSERT_OK(editor->BufferKey(1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
+  ICING_ASSERT_OK(editor->BufferKey(2));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  std::string query = CreateQuery("price >= 1");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature));
+  }
+  // "price" is a property restrict here and "1" isn't a "term" - its a numeric
+  // value. So QueryTermIterators should be empty.
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+  EXPECT_THAT(query_results.query_terms, IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2, kDocumentId1));
+}
+
+TEST_P(QueryVisitorTest, SimpleGreaterThan) {
+  // Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
+  // respectively.
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      numeric_index_->Edit("price", kDocumentId0, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(0));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
+  ICING_ASSERT_OK(editor->BufferKey(1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
+  ICING_ASSERT_OK(editor->BufferKey(2));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  std::string query = CreateQuery("price > 1");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature));
+  }
+  // "price" is a property restrict here and "1" isn't a "term" - its a numeric
+  // value. So QueryTermIterators should be empty.
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+  EXPECT_THAT(query_results.query_terms, IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2));
+}
+
+TEST_P(QueryVisitorTest, IntMinLessThanEqual) {
+  // Setup the numeric index with docs 0, 1 and 2 holding the values INT_MIN,
+  // INT_MAX and INT_MIN + 1 respectively.
+  int64_t int_min = std::numeric_limits<int64_t>::min();
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      numeric_index_->Edit("price", kDocumentId0, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(int_min));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
+  ICING_ASSERT_OK(editor->BufferKey(std::numeric_limits<int64_t>::max()));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
+  ICING_ASSERT_OK(editor->BufferKey(int_min + 1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  std::string query = CreateQuery("price <= " + std::to_string(int_min));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature));
+  }
+  // "price" is a property restrict here and int_min isn't a "term" - its a
+  // numeric value. So QueryTermIterators should be empty.
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+  EXPECT_THAT(query_results.query_terms, IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, IntMaxGreaterThanEqual) {
+  // Setup the numeric index with docs 0, 1 and 2 holding the values INT_MIN,
+  // INT_MAX and INT_MAX - 1 respectively.
+  int64_t int_max = std::numeric_limits<int64_t>::max();
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      numeric_index_->Edit("price", kDocumentId0, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(std::numeric_limits<int64_t>::min()));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
+  ICING_ASSERT_OK(editor->BufferKey(int_max));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
+  ICING_ASSERT_OK(editor->BufferKey(int_max - 1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  std::string query = CreateQuery("price >= " + std::to_string(int_max));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature));
+  }
+  // "price" is a property restrict here and int_max isn't a "term" - its a
+  // numeric value. So QueryTermIterators should be empty.
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+  EXPECT_THAT(query_results.query_terms, IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1));
+}
+
+TEST_P(QueryVisitorTest, NestedPropertyLessThan) {
+  // Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
+  // respectively.
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      numeric_index_->Edit("subscription.price", kDocumentId0, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(0));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor =
+      numeric_index_->Edit("subscription.price", kDocumentId1, kSectionId1);
+  ICING_ASSERT_OK(editor->BufferKey(1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor =
+      numeric_index_->Edit("subscription.price", kDocumentId2, kSectionId2);
+  ICING_ASSERT_OK(editor->BufferKey(2));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  std::string query = CreateQuery("subscription.price < 2");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature));
+  }
+  // "subscription.price" is a property restrict here and int_max isn't a "term"
+  // - its a numeric value. So QueryTermIterators should be empty.
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+  EXPECT_THAT(query_results.query_terms, IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, IntParsingError) {
+  std::string query = CreateQuery("subscription.price < fruit");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, NotEqualsUnsupported) {
+  std::string query = CreateQuery("subscription.price != 3");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
+}
+
+TEST_P(QueryVisitorTest, LessThanTooManyOperandsInvalid) {
+  // Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
+  // respectively.
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      numeric_index_->Edit("subscription.price", kDocumentId0, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(0));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor =
+      numeric_index_->Edit("subscription.price", kDocumentId1, kSectionId1);
+  ICING_ASSERT_OK(editor->BufferKey(1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor =
+      numeric_index_->Edit("subscription.price", kDocumentId2, kSectionId2);
+  ICING_ASSERT_OK(editor->BufferKey(2));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  // Create an invalid AST for the query '3 < subscription.price 25' where '<'
+  // has three operands
+  std::string_view query = "3 < subscription.price 25";
+  auto property_node =
+      std::make_unique<TextNode>("subscription", query.substr(4, 12));
+  auto subproperty_node =
+      std::make_unique<TextNode>("price", query.substr(17, 5));
+  std::vector<std::unique_ptr<TextNode>> member_args;
+  member_args.push_back(std::move(property_node));
+  member_args.push_back(std::move(subproperty_node));
+  auto member_node = std::make_unique<MemberNode>(std::move(member_args),
+                                                  /*function=*/nullptr);
+
+  auto value_node = std::make_unique<TextNode>("3", query.substr(0, 1));
+  auto extra_value_node = std::make_unique<TextNode>("25", query.substr(23, 2));
+  std::vector<std::unique_ptr<Node>> args;
+  args.push_back(std::move(value_node));
+  args.push_back(std::move(member_node));
+  args.push_back(std::move(extra_value_node));
+  auto root_node = std::make_unique<NaryOperatorNode>("<", std::move(args));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, LessThanTooFewOperandsInvalid) {
+  // Create an invalid AST for the query 'subscription.price <' where '<'
+  // has a single operand
+  std::string_view query = "subscription.price <";
+  auto property_node =
+      std::make_unique<TextNode>("subscription", query.substr(0, 12));
+  auto subproperty_node =
+      std::make_unique<TextNode>("price", query.substr(13, 5));
+  std::vector<std::unique_ptr<TextNode>> member_args;
+  member_args.push_back(std::move(property_node));
+  member_args.push_back(std::move(subproperty_node));
+  auto member_node = std::make_unique<MemberNode>(std::move(member_args),
+                                                  /*function=*/nullptr);
+
+  std::vector<std::unique_ptr<Node>> args;
+  args.push_back(std::move(member_node));
+  auto root_node = std::make_unique<NaryOperatorNode>("<", std::move(args));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, LessThanNonExistentPropertyNotFound) {
+  // Setup the numeric index with docs 0, 1 and 2 holding the values 0, 1 and 2
+  // respectively.
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      numeric_index_->Edit("subscription.price", kDocumentId0, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(0));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor =
+      numeric_index_->Edit("subscription.price", kDocumentId1, kSectionId1);
+  ICING_ASSERT_OK(editor->BufferKey(1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor =
+      numeric_index_->Edit("subscription.price", kDocumentId2, kSectionId2);
+  ICING_ASSERT_OK(editor->BufferKey(2));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  std::string query = CreateQuery("time < 25");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature));
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+  EXPECT_THAT(query_results.query_terms, IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+}
+
+TEST_P(QueryVisitorTest, NeverVisitedReturnsInvalid) {
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), "",
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, IntMinLessThanInvalid) {
+  // Setup the numeric index with docs 0, 1 and 2 holding the values INT_MIN,
+  // INT_MAX and INT_MIN + 1 respectively.
+  int64_t int_min = std::numeric_limits<int64_t>::min();
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      numeric_index_->Edit("price", kDocumentId0, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(int_min));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
+  ICING_ASSERT_OK(editor->BufferKey(std::numeric_limits<int64_t>::max()));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
+  ICING_ASSERT_OK(editor->BufferKey(int_min + 1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  std::string query = CreateQuery("price <" + std::to_string(int_min));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, IntMaxGreaterThanInvalid) {
+  // Setup the numeric index with docs 0, 1 and 2 holding the values INT_MIN,
+  // INT_MAX and INT_MAX - 1 respectively.
+  int64_t int_max = std::numeric_limits<int64_t>::max();
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      numeric_index_->Edit("price", kDocumentId0, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(std::numeric_limits<int64_t>::min()));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId1, kSectionId1);
+  ICING_ASSERT_OK(editor->BufferKey(int_max));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  editor = numeric_index_->Edit("price", kDocumentId2, kSectionId2);
+  ICING_ASSERT_OK(editor->BufferKey(int_max - 1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  std::string query = CreateQuery("price >" + std::to_string(int_max));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, NumericComparisonPropertyStringIsInvalid) {
+  // "price" is a STRING token, which cannot be a property name.
+  std::string query = CreateQuery(R"("price" > 7)");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, NumericComparatorDoesntAffectLaterTerms) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("type"))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Index three documents:
+  // - Doc0: ["-2", "-1", "1", "2"] and [-2, -1, 1, 2]
+  // - Doc1: [-1]
+  // - Doc2: ["2"] and [-1]
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+      numeric_index_->Edit("price", kDocumentId0, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(-2));
+  ICING_ASSERT_OK(editor->BufferKey(-1));
+  ICING_ASSERT_OK(editor->BufferKey(1));
+  ICING_ASSERT_OK(editor->BufferKey(2));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+  Index::Editor term_editor = index_->Edit(
+      kDocumentId0, kSectionId1, TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(term_editor.BufferTerm("-2"));
+  ICING_ASSERT_OK(term_editor.BufferTerm("-1"));
+  ICING_ASSERT_OK(term_editor.BufferTerm("1"));
+  ICING_ASSERT_OK(term_editor.BufferTerm("2"));
+  ICING_ASSERT_OK(term_editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = numeric_index_->Edit("price", kDocumentId1, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(-1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = numeric_index_->Edit("price", kDocumentId2, kSectionId0);
+  ICING_ASSERT_OK(editor->BufferKey(-1));
+  ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
+  term_editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                             /*namespace_id=*/0);
+  ICING_ASSERT_OK(term_editor.BufferTerm("2"));
+  ICING_ASSERT_OK(term_editor.IndexAllBufferedTerms());
+
+  // Translating MINUS chars that are interpreted as NOTs, this query would be
+  // `price == -1 AND NOT 2`
+  // All documents should match `price == -1`
+  // Both docs 0 and 2 should be excluded because of the `NOT 2` clause
+  // doc0 has both a text and number entry for `-2`, neither of which should
+  // match.
+  std::string query = CreateQuery("price == -1 -2");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kNumericSearchFeature));
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+  EXPECT_THAT(query_results.query_terms, IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1));
+}
+
+TEST_P(QueryVisitorTest, SingleTermTermFrequencyEnabled) {
+  // Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
+  // "bar" respectively.
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("foo");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo"));
+
+  ASSERT_THAT(query_results.root_iterator->Advance(), IsOk());
+  std::vector<TermMatchInfo> match_infos;
+  query_results.root_iterator->PopulateMatchedTermsStats(&match_infos);
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{kSectionId1, 1}};
+  EXPECT_THAT(match_infos, ElementsAre(EqualsTermMatchInfo(
+                               "foo", expected_section_ids_tf_map)));
+
+  ASSERT_THAT(query_results.root_iterator->Advance(), IsOk());
+  match_infos.clear();
+  query_results.root_iterator->PopulateMatchedTermsStats(&match_infos);
+  EXPECT_THAT(match_infos, ElementsAre(EqualsTermMatchInfo(
+                               "foo", expected_section_ids_tf_map)));
+
+  EXPECT_THAT(query_results.root_iterator->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST_P(QueryVisitorTest, SingleTermTermFrequencyDisabled) {
+  // Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
+  // "bar" respectively.
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("foo");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/false, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+
+  ASSERT_THAT(query_results.root_iterator->Advance(), IsOk());
+  std::vector<TermMatchInfo> match_infos;
+  query_results.root_iterator->PopulateMatchedTermsStats(&match_infos);
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{kSectionId1, 0}};
+  EXPECT_THAT(match_infos, ElementsAre(EqualsTermMatchInfo(
+                               "foo", expected_section_ids_tf_map)));
+
+  ASSERT_THAT(query_results.root_iterator->Advance(), IsOk());
+  match_infos.clear();
+  query_results.root_iterator->PopulateMatchedTermsStats(&match_infos);
+  EXPECT_THAT(match_infos, ElementsAre(EqualsTermMatchInfo(
+                               "foo", expected_section_ids_tf_map)));
+
+  EXPECT_THAT(query_results.root_iterator->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST_P(QueryVisitorTest, SingleTermPrefix) {
+  // Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
+  // "bar" respectively.
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // An EXACT query for 'fo' won't match anything.
+  std::string query = CreateQuery("fo");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("fo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("fo"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+
+  query = CreateQuery("fo*");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_two).ConsumeResults());
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("fo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("fo"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, PrefixOperatorAfterPropertyReturnsInvalid) {
+  std::string query = "price* < 2";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, PrefixOperatorAfterNumericValueReturnsInvalid) {
+  std::string query = "price < 2*";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, PrefixOperatorAfterPropertyRestrictReturnsInvalid) {
+  std::string query = "subject*:foo";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, SegmentationWithPrefix) {
+  // Setup the index with docs 0, 1 and 2 holding the values ["foo", "ba"],
+  // ["foo", "ba"] and ["bar", "fo"] respectively.
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm("ba"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm("ba"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.BufferTerm("fo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // An EXACT query for `ba?fo` will be lexed into a single TEXT token.
+  // The visitor will tokenize it into `ba` and `fo` (`?` is dropped because it
+  // is punctuation). Each document will match one and only one of these exact
+  // tokens. Therefore, nothing will match this query.
+  std::string query = CreateQuery("ba?fo");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("ba", "fo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("ba", "fo"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+
+  // An EXACT query for `ba?fo*` will be lexed into a TEXT token and a TIMES
+  // token.
+  // The visitor will tokenize the TEXT into `ba` and `fo` (`?` is dropped
+  // because it is punctuation). The prefix operator should only apply to the
+  // final token `fo`. This will cause matches with docs 0 and 1 which contain
+  // "ba" and "foo". doc2 will not match because "ba" does not exactly match
+  // either "bar" or "fo".
+  query = CreateQuery("ba?fo*");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_two).ConsumeResults());
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("ba", "fo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("ba", "fo"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, SingleVerbatimTerm) {
+  // Setup the index with docs 0, 1 and 2 holding the values "foo:bar(baz)",
+  // "foo:bar(baz)" and "bar:baz(foo)" respectively.
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo:bar(baz)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo:bar(baz)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar:baz(foo)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("\"foo:bar(baz)\"");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature));
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre("foo:bar(baz)"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo:bar(baz)"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, SingleVerbatimTermPrefix) {
+  // Setup the index with docs 0, 1 and 2 holding the values "foo:bar(baz)",
+  // "foo:bar(abc)" and "bar:baz(foo)" respectively.
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo:bar(baz)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo:bar(abc)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar:baz(foo)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Query for `"foo:bar("*`. This should match docs 0 and 1.
+  std::string query = CreateQuery("\"foo:bar(\"*");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_EXACT,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kVerbatimSearchFeature,
+                                   kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foo:bar("));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo:bar("));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+// There are three primary cases to worry about for escaping:
+//
+// NOTE: The following comments use ` chars to denote the beginning and end of
+// the verbatim term rather than " chars to avoid confusion. Additionally, the
+// raw chars themselves are shown. So `foobar\\` in actual c++ would be written
+// as std::string verbatim_term = "foobar\\\\";
+//
+// 1. How does a user represent a quote char (") without terminating the
+//    verbatim term?
+//    Example: verbatim_term = `foobar"`
+//    Answer: quote char must be escaped. verbatim_query = `foobar\"`
+TEST_P(QueryVisitorTest, VerbatimTermEscapingQuote) {
+  // Setup the index with docs 0, 1 and 2 holding the values "foobary",
+  // "foobar\" and "foobar"" respectively.
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_EXACT, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foobary)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_EXACT,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foobar\)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_EXACT,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foobar")"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // From the comment above, verbatim_term = `foobar"` and verbatim_query =
+  // `foobar\"`
+  std::string query = CreateQuery(R"(("foobar\""))");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature));
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre(R"(foobar")"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre(R"(foobar")"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2));
+}
+
+// 2. How does a user represent a escape char (\) that immediately precedes the
+//    end of the verbatim term
+//    Example: verbatim_term = `foobar\`
+//    Answer: escape chars can be escaped. verbatim_query = `foobar\\`
+TEST_P(QueryVisitorTest, VerbatimTermEscapingEscape) {
+  // Setup the index with docs 0, 1 and 2 holding the values "foobary",
+  // "foobar\" and "foobar"" respectively.
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_EXACT, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foobary)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_EXACT,
+                        /*namespace_id=*/0);
+  // From the comment above, verbatim_term = `foobar\`.
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foobar\)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_EXACT,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foobar")"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Issue a query for the verbatim token `foobar\`.
+  std::string query = CreateQuery(R"(("foobar\\"))");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature));
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre(R"(foobar\)"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre(R"(foobar\)"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1));
+}
+
+// 3. How do we handle other escaped chars?
+//    Example: verbatim_query = `foobar\y`.
+//    Answer: all chars preceded by an escape character are blindly escaped (as
+//            in, consume the escape char and add the char like we do for the
+//            quote char). So the above query would match the verbatim_term
+//            `foobary`.
+TEST_P(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
+  // Setup the index with docs 0, 1 and 2 holding the values "foobary",
+  // "foobar\" and "foobar"" respectively.
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_EXACT, /*namespace_id=*/0);
+  // From the comment above, verbatim_term = `foobary`.
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foobary)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_EXACT,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foobar\)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_EXACT,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foobar\y)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Issue a query for the verbatim token `foobary`.
+  std::string query = CreateQuery(R"(("foobar\y"))");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature));
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre(R"(foobary)"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre(R"(foobary)"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId0));
+
+  // Issue a query for the verbatim token `foobar\y`.
+  query = CreateQuery(R"(("foobar\\y"))");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_two).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature));
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre(R"(foobar\y)"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre(R"(foobar\y)"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2));
+}
+
+// This isn't a special case, but is fairly useful for demonstrating. There are
+// a number of escaped sequences in c++, including the new line character '\n'.
+// It is worth emphasizing that the new line character, like the others in c++,
+// is its own separate ascii value. For a query `foobar\n`, the parser will see
+// the character sequence [`f`, `o`, `o`, `b`, `a`, `r`, `\n`] - it *won't* ever
+// see `\` and `n`.
+TEST_P(QueryVisitorTest, VerbatimTermNewLine) {
+  // Setup the index with docs 0, 1 and 2 holding the values "foobar\n",
+  // `foobar\` and `foobar\n` respectively.
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_EXACT, /*namespace_id=*/0);
+  // From the comment above, verbatim_term = `foobar` + '\n'.
+  ICING_ASSERT_OK(editor.BufferTerm("foobar\n"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_EXACT,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foobar\)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_EXACT,
+                        /*namespace_id=*/0);
+  // verbatim_term = `foobar\n`. This is distinct from the term added above.
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foobar\n)"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Issue a query for the verbatim token `foobar` + '\n'.
+  std::string query = CreateQuery("\"foobar\n\"");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature));
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foobar\n"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foobar\n"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId0));
+
+  // Now, issue a query for the verbatim token `foobar\n`.
+  query = CreateQuery(R"(("foobar\\n"))");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_two).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature));
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre(R"(foobar\n)"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre(R"(foobar\n)"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2));
+}
+
+TEST_P(QueryVisitorTest, VerbatimTermEscapingComplex) {
+  // Setup the index with docs 0, 1 and 2 holding the values `foo\"bar\nbaz"`,
+  // `foo\\\"bar\\nbaz\"` and `foo\\"bar\\nbaz"` respectively.
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_EXACT, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foo\"bar\nbaz")"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_EXACT,
+                        /*namespace_id=*/0);
+  // Add the verbatim_term from doc 0 but with all of the escapes left in
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foo\\\"bar\\nbaz\")"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_EXACT,
+                        /*namespace_id=*/0);
+  // Add the verbatim_term from doc 0 but with the escapes for '\' chars left in
+  ICING_ASSERT_OK(editor.BufferTerm(R"(foo\\"bar\\nbaz")"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Issue a query for the verbatim token `foo\"bar\nbaz"`.
+  std::string query = CreateQuery(R"(("foo\\\"bar\\nbaz\""))");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature,
+                                     kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kVerbatimSearchFeature));
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre(R"(foo\"bar\nbaz")"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre(R"(foo\"bar\nbaz")"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, SingleMinusTerm) {
+  // Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
+  // "bar" respectively.
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("type"))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("-foo");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
+  EXPECT_THAT(query_results.query_term_iterators, IsEmpty());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use, IsEmpty());
+  }
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2));
+}
+
+TEST_P(QueryVisitorTest, SingleNotTerm) {
+  // Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
+  // "bar" respectively.
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("type"))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("NOT foo");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.query_terms, IsEmpty());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(query_results.query_term_iterators, IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2));
+}
+
+TEST_P(QueryVisitorTest, NestedNotTerms) {
+  // Setup the index with docs 0, 1 and 2 holding the values
+  // ["foo", "bar", "baz"], ["foo", "baz"] and ["bar", "baz"] respectively.
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("type"))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.BufferTerm("baz"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm("baz"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.BufferTerm("baz"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Double negative could be rewritten as `(foo AND NOT bar) baz`
+  std::string query = CreateQuery("NOT (-foo OR bar) baz");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre("foo", "baz"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo", "baz"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1));
+}
+
+TEST_P(QueryVisitorTest, DeeplyNestedNotTerms) {
+  // Setup the index with docs 0, 1 and 2 holding the values
+  // ["foo", "bar", "baz"], ["foo", "baz"] and ["bar", "baz"] respectively.
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("type"))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.BufferTerm("baz"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm("baz"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.BufferTerm("baz"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Simplifying:
+  //   NOT (-(NOT (foo -bar) baz) -bat) NOT bass
+  //   NOT (-((-foo OR bar) baz) -bat) NOT bass
+  //   NOT (((foo -bar) OR -baz) -bat) NOT bass
+  //   (((-foo OR bar) baz) OR bat) NOT bass
+  //
+  // Doc 0 : (((-TRUE OR TRUE) TRUE) OR FALSE) NOT FALSE ->
+  //         ((FALSE OR TRUE) TRUE) TRUE -> ((TRUE) TRUE) TRUE -> TRUE
+  // Doc 1 : (((-TRUE OR FALSE) TRUE) OR FALSE) NOT FALSE
+  //         ((FALSE OR FALSE) TRUE) TRUE -> ((FALSE) TRUE) TRUE -> FALSE
+  // Doc 2 : (((-FALSE OR TRUE) TRUE) OR FALSE) NOT FALSE
+  //         ((TRUE OR TRUE) TRUE) TRUE -> ((TRUE) TRUE) TRUE -> TRUE
+  std::string query = CreateQuery("NOT (-(NOT (foo -bar) baz) -bat) NOT bass");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre("bar", "baz", "bat"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("bar", "baz", "bat"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, ImplicitAndTerms) {
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("foo bar");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use, IsEmpty());
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1));
+}
+
+TEST_P(QueryVisitorTest, ExplicitAndTerms) {
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("foo AND bar");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use, IsEmpty());
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1));
+}
+
+TEST_P(QueryVisitorTest, OrTerms) {
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("fo"));
+  ICING_ASSERT_OK(editor.BufferTerm("ba"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("foo OR bar");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use, IsEmpty());
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, AndOrTermPrecedence) {
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm("baz"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Should be interpreted like `foo (bar OR baz)`
+  std::string query = CreateQuery("foo bar OR baz");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use, IsEmpty());
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre("foo", "bar", "baz"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo", "bar", "baz"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2, kDocumentId1));
+
+  // Should be interpreted like `(bar OR baz) foo`
+  query = CreateQuery("bar OR baz foo");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_two).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use, IsEmpty());
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre("foo", "bar", "baz"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo", "bar", "baz"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2, kDocumentId1));
+
+  query = CreateQuery("(bar OR baz) foo");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor_three(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_three);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_three).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use, IsEmpty());
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre("foo", "bar", "baz"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo", "bar", "baz"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2, kDocumentId1));
+}
+
+TEST_P(QueryVisitorTest, AndOrNotPrecedence) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("type").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop1")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm("baz"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Should be interpreted like `foo ((NOT bar) OR baz)`
+  std::string query = CreateQuery("foo NOT bar OR baz");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre("foo", "baz"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo", "baz"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2, kDocumentId0));
+
+  query = CreateQuery("foo NOT (bar OR baz)");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_two).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, PropertyFilter) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("type")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Section ids are assigned alphabetically.
+  SectionId prop1_section_id = 0;
+  SectionId prop2_section_id = 1;
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("foo", /*property_restrict=*/"prop1");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop1"));
+  EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo"));
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use, IsEmpty());
+  }
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_F(QueryVisitorTest, MultiPropertyFilter) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("type")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop3")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Section ids are assigned alphabetically.
+  SectionId prop1_section_id = 0;
+  SectionId prop2_section_id = 1;
+  SectionId prop3_section_id = 2;
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, prop2_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, prop3_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = R"(search("foo", createList("prop1", "prop2")))";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop1", "prop2"));
+  EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop2"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, PropertyFilterStringIsInvalid) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("type")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // "prop1" is a STRING token, which cannot be a property name.
+  std::string query = CreateQuery(R"(("prop1":foo))");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, PropertyFilterNonNormalized) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("type")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("PROP1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("PROP2")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+  // Section ids are assigned alphabetically.
+  SectionId prop1_section_id = 0;
+  SectionId prop2_section_id = 1;
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("foo", /*property_restrict=*/"PROP1");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("PROP1"));
+  EXPECT_THAT(query_results.query_terms["PROP1"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo"));
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use, IsEmpty());
+  }
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, PropertyFilterWithGrouping) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("type")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Section ids are assigned alphabetically.
+  SectionId prop1_section_id = 0;
+  SectionId prop2_section_id = 1;
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query =
+      CreateQuery("(foo OR bar)", /*property_restrict=*/"prop1");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop1"));
+  EXPECT_THAT(query_results.query_terms["prop1"],
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, ValidNestedPropertyFilter) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("type")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Section ids are assigned alphabetically.
+  SectionId prop1_section_id = 0;
+  SectionId prop2_section_id = 1;
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("(prop1:foo)", /*property_restrict=*/"prop1");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop1"));
+  EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1));
+
+  query = CreateQuery("(prop1:(prop1:(prop1:(prop1:foo))))",
+                      /*property_restrict=*/"prop1");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_two).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop1"));
+  EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId1));
+}
+
+TEST_P(QueryVisitorTest, InvalidNestedPropertyFilter) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("type")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Section ids are assigned alphabetically.
+  SectionId prop1_section_id = 0;
+  SectionId prop2_section_id = 1;
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("(prop2:foo)", /*property_restrict=*/"prop1");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+
+  // Resulting queries:
+  // - kPlain: `prop1:(prop2:(prop1:(prop2:(prop1:foo))))`
+  // - kSearch: `-search("(prop2:(prop1:(prop2:(prop1:foo))))",
+  //                             createList("prop1"))`
+  query = CreateQuery("(prop2:(prop1:(prop2:(prop1:foo))))",
+                      /*property_restrict=*/"prop1");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_two).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+}
+
+TEST_P(QueryVisitorTest, NotWithPropertyFilter) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("type")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Section ids are assigned alphabetically.
+  SectionId prop1_section_id = 0;
+  SectionId prop2_section_id = 1;
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Resulting queries:
+  // - kPlain: `-prop1:(foo OR bar)`
+  // - kSearch: `-search("foo OR bar", createList("prop1"))`
+  std::string query = absl_ports::StrCat(
+      "-", CreateQuery("(foo OR bar)", /*property_restrict=*/"prop1"));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
+  EXPECT_THAT(query_results.query_term_iterators, IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2));
+
+  // Resulting queries:
+  // - kPlain: `NOT prop1:(foo OR bar)`
+  // - kSearch: `NOT search("foo OR bar", createList("prop1"))`
+  query = absl_ports::StrCat(
+      "NOT ", CreateQuery("(foo OR bar)", /*property_restrict=*/"prop1"));
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_two).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), IsEmpty());
+  EXPECT_THAT(query_results.query_term_iterators, IsEmpty());
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2));
+}
+
+TEST_P(QueryVisitorTest, PropertyFilterWithNot) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("type")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Section ids are assigned alphabetically.
+  SectionId prop1_section_id = 0;
+  SectionId prop2_section_id = 1;
+
+  // Create documents as follows:
+  //   Doc0:
+  //     prop1: "bar"
+  //     prop2: ""
+  //   Doc1:
+  //     prop1: "foo"
+  //     prop2: ""
+  //   Doc2:
+  //     prop1: ""
+  //     prop2: "foo"
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Resulting queries:
+  // - kPlain: `prop1:(-foo OR bar)`
+  // - kSearch: `search("-foo OR bar", createList("prop1"))`
+  //
+  // The query is equivalent to `-prop1:foo OR prop1:bar`, thus doc0 and doc2
+  // will be matched.
+  std::string query =
+      CreateQuery("(-foo OR bar)", /*property_restrict=*/"prop1");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop1"));
+  EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("bar"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("bar"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2, kDocumentId0));
+
+  // Resulting queries:
+  // - kPlain: `prop1:(-foo OR bar)`
+  // - kSearch: `search("-foo OR bar", createList("prop1"))`
+  //
+  // The query is equivalent to `-prop1:foo OR prop1:bar`, thus doc0 and doc2
+  // will be matched.
+  query = CreateQuery("(NOT foo OR bar)", /*property_restrict=*/"prop1");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_two).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop1"));
+  EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("bar"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("bar"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2, kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, SegmentationTest) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("type")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Section ids are assigned alphabetically.
+  SectionId prop1_section_id = 0;
+  SectionId prop2_section_id = 1;
+
+  // ICU segmentation will break this into "每天" and "上班".
+  // CFStringTokenizer (ios) will break this into "每", "天" and "上班"
+  std::string query = CreateQuery("每天上班");
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("上班"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+  editor = index_->Edit(kDocumentId0, prop2_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  if (IsCfStringTokenization()) {
+    ICING_ASSERT_OK(editor.BufferTerm("每"));
+    ICING_ASSERT_OK(editor.BufferTerm("天"));
+  } else {
+    ICING_ASSERT_OK(editor.BufferTerm("每天"));
+  }
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("上班"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  if (IsCfStringTokenization()) {
+    ICING_ASSERT_OK(editor.BufferTerm("每"));
+    ICING_ASSERT_OK(editor.BufferTerm("天"));
+  } else {
+    ICING_ASSERT_OK(editor.BufferTerm("每天"));
+  }
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use, IsEmpty());
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+  if (IsCfStringTokenization()) {
+    EXPECT_THAT(query_results.query_terms[""],
+                UnorderedElementsAre("每", "天", "上班"));
+    EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+                UnorderedElementsAre("每", "天", "上班"));
+  } else {
+    EXPECT_THAT(query_results.query_terms[""],
+                UnorderedElementsAre("每天", "上班"));
+    EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+                UnorderedElementsAre("每天", "上班"));
+  }
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest, PropertyRestrictsPopCorrectly) {
+  PropertyConfigProto prop =
+      PropertyConfigBuilder()
+          .SetName("prop0")
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("type")
+                  .AddProperty(prop)
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop2")))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  SectionId prop0_id = 0;
+  SectionId prop1_id = 1;
+  SectionId prop2_id = 2;
+  NamespaceId ns_id = 0;
+
+  // Create the following docs:
+  // - Doc 0: Contains 'val0', 'val1', 'val2' in 'prop0'. Shouldn't match.
+  DocumentProto doc =
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
+  Index::Editor editor =
+      index_->Edit(docid0, prop0_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val0"));
+  ICING_ASSERT_OK(editor.BufferTerm("val1"));
+  ICING_ASSERT_OK(editor.BufferTerm("val2"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // - Doc 1: Contains 'val0', 'val1', 'val2' in 'prop1'. Should match.
+  doc = DocumentBuilder(doc).SetUri("uri1").Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid1, document_store_->Put(doc));
+  editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val0"));
+  ICING_ASSERT_OK(editor.BufferTerm("val1"));
+  ICING_ASSERT_OK(editor.BufferTerm("val2"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // - Doc 2: Contains 'val0', 'val1', 'val2' in 'prop2'. Shouldn't match.
+  doc = DocumentBuilder(doc).SetUri("uri2").Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid2, document_store_->Put(doc));
+  editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val0"));
+  ICING_ASSERT_OK(editor.BufferTerm("val1"));
+  ICING_ASSERT_OK(editor.BufferTerm("val2"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // - Doc 3: Contains 'val0' in 'prop0', 'val1' in 'prop1' etc. Should match.
+  doc = DocumentBuilder(doc).SetUri("uri3").Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid3, document_store_->Put(doc));
+  editor = index_->Edit(docid3, prop0_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val0"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+  editor = index_->Edit(docid3, prop1_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val1"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+  editor = index_->Edit(docid3, prop2_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val2"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // - Doc 4: Contains 'val1' in 'prop0', 'val2' in 'prop1', 'val0' in 'prop2'.
+  //          Shouldn't match.
+  doc = DocumentBuilder(doc).SetUri("uri4").Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid4, document_store_->Put(doc));
+  editor = index_->Edit(docid4, prop0_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val1"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+  editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val2"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+  editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val0"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Now issue a query with 'val1' restricted to 'prop1'. This should match only
+  // docs 1 and 3.
+  // Resulting queries:
+  // - kPlain: `val0 prop1:val1 val2`
+  // - kSearch: `val0 search("val1", createList("prop1")) val2`
+  std::string query = absl_ports::StrCat(
+      "val0 ", CreateQuery("val1", /*property_restrict=*/"prop1"), " val2");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  if (GetParam() == QueryType::kSearch) {
+    EXPECT_THAT(query_results.features_in_use,
+                UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  } else {
+    EXPECT_THAT(query_results.features_in_use, IsEmpty());
+  }
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("", "prop1"));
+  EXPECT_THAT(query_results.query_terms[""],
+              UnorderedElementsAre("val0", "val2"));
+  EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("val1"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("val0", "val1", "val2"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(docid3, docid1));
+}
+
+TEST_P(QueryVisitorTest, UnsatisfiablePropertyRestrictsPopCorrectly) {
+  PropertyConfigProto prop =
+      PropertyConfigBuilder()
+          .SetName("prop0")
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("type")
+                  .AddProperty(prop)
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop2")))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  SectionId prop0_id = 0;
+  SectionId prop1_id = 1;
+  SectionId prop2_id = 2;
+  NamespaceId ns_id = 0;
+
+  // Create the following docs:
+  // - Doc 0: Contains 'val0', 'val1', 'val2' in 'prop0'. Should match.
+  DocumentProto doc =
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
+  Index::Editor editor =
+      index_->Edit(docid0, prop0_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val0"));
+  ICING_ASSERT_OK(editor.BufferTerm("val1"));
+  ICING_ASSERT_OK(editor.BufferTerm("val2"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // - Doc 1: Contains 'val0', 'val1', 'val2' in 'prop1'. Shouldn't match.
+  doc = DocumentBuilder(doc).SetUri("uri1").Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid1, document_store_->Put(doc));
+  editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val0"));
+  ICING_ASSERT_OK(editor.BufferTerm("val1"));
+  ICING_ASSERT_OK(editor.BufferTerm("val2"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // - Doc 2: Contains 'val0', 'val1', 'val2' in 'prop2'. Should match.
+  doc = DocumentBuilder(doc).SetUri("uri2").Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid2, document_store_->Put(doc));
+  editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val0"));
+  ICING_ASSERT_OK(editor.BufferTerm("val1"));
+  ICING_ASSERT_OK(editor.BufferTerm("val2"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // - Doc 3: Contains 'val0' in 'prop0', 'val1' in 'prop1' etc. Should match.
+  doc = DocumentBuilder(doc).SetUri("uri3").Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid3, document_store_->Put(doc));
+  editor = index_->Edit(docid3, prop0_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val0"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+  editor = index_->Edit(docid3, prop1_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val1"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+  editor = index_->Edit(docid3, prop2_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val2"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // - Doc 4: Contains 'val1' in 'prop0', 'val2' in 'prop1', 'val0' in 'prop2'.
+  //          Shouldn't match.
+  doc = DocumentBuilder(doc).SetUri("uri4").Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid4, document_store_->Put(doc));
+  editor = index_->Edit(docid4, prop0_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val1"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+  editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val2"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+  editor = index_->Edit(docid4, prop1_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("val0"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Now issue a query with 'val1' restricted to 'prop1'. This should match only
+  // docs 1 and 3.
+  // Resulting queries:
+  // - kPlain: `val0 OR prop1:(prop2:val1) OR val2`
+  // - kSearch: `prop0:val0 OR search("(prop2:val1)", createList("prop1")) OR
+  // prop2:val2`
+  std::string query = absl_ports::StrCat(
+      "prop0:val0 OR prop1:(",
+      CreateQuery("val1", /*property_restrict=*/"prop2"), ") OR prop2:val2");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop0", "prop2"));
+  EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("val0"));
+  EXPECT_THAT(query_results.query_terms["prop2"], UnorderedElementsAre("val2"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("val0", "val2"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(docid3, docid2, docid0));
+}
+
+TEST_F(QueryVisitorTest, UnsupportedFunctionReturnsInvalidArgument) {
+  std::string query = "unsupportedFunction()";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest, SearchFunctionTooFewArgumentsReturnsInvalidArgument) {
+  std::string query = "search()";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest, SearchFunctionTooManyArgumentsReturnsInvalidArgument) {
+  std::string query = R"(search("foo", createList("subject"), "bar"))";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+       SearchFunctionWrongFirstArgumentTypeReturnsInvalidArgument) {
+  // First argument type=TEXT, expected STRING.
+  std::string query = "search(7)";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // First argument type=string list, expected STRING.
+  query = R"(search(createList("subject")))";
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  EXPECT_THAT(std::move(query_visitor_two).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+       SearchFunctionWrongSecondArgumentTypeReturnsInvalidArgument) {
+  // Second argument type=STRING, expected string list.
+  std::string query = R"(search("foo", "bar"))";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // Second argument type=TEXT, expected string list.
+  query = R"(search("foo", 7))";
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  EXPECT_THAT(std::move(query_visitor_two).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+       SearchFunctionCreateListZeroPropertiesReturnsInvalidArgument) {
+  std::string query = R"(search("foo", createList()))";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest, SearchFunctionNestedFunctionCalls) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("type")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Section ids are assigned alphabetically.
+  SectionId prop1_section_id = 0;
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+  editor = index_->Edit(kDocumentId2, prop1_section_id, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // *If* nested function calls were allowed, then this would simplify as:
+  // `search("search(\"foo\") bar")` -> `search("foo bar")` -> `foo bar`
+  // But nested function calls are disallowed. So this is rejected.
+  std::string level_one_query = R"(search("foo", createList("prop1")) bar)";
+  std::string level_two_query =
+      absl_ports::StrCat(R"(search(")", EscapeString(level_one_query),
+                         R"(", createList("prop1")))");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(level_two_query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_two_query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop1"));
+  EXPECT_THAT(query_results.query_terms["prop1"],
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2));
+
+  std::string level_three_query =
+      absl_ports::StrCat(R"(search(")", EscapeString(level_two_query),
+                         R"(", createList("prop1")))");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_three_query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+      level_three_query, DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_two).ConsumeResults());
+
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop1"));
+  EXPECT_THAT(query_results.query_terms["prop1"],
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2));
+
+  std::string level_four_query =
+      absl_ports::StrCat(R"(search(")", EscapeString(level_three_query),
+                         R"(", createList("prop1")))");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_four_query));
+  QueryVisitor query_visitor_three(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+      level_four_query, DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_three);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_three).ConsumeResults());
+
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop1"));
+  EXPECT_THAT(query_results.query_terms["prop1"],
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo", "bar"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(kDocumentId2));
+}
+
+// This test will nest `search` calls together with the set of restricts
+// narrowing at each level so that the set of docs matching the query shrinks.
+TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsNarrowing) {
+  PropertyConfigProto prop =
+      PropertyConfigBuilder()
+          .SetName("prop0")
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("type")
+                  .AddProperty(prop)
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop2"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop3"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop4"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop5"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop6"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop7")))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Section ids are assigned alphabetically.
+  SectionId prop0_id = 0;
+  SectionId prop1_id = 1;
+  SectionId prop2_id = 2;
+  SectionId prop3_id = 3;
+  SectionId prop4_id = 4;
+  SectionId prop5_id = 5;
+  SectionId prop6_id = 6;
+  SectionId prop7_id = 7;
+
+  NamespaceId ns_id = 0;
+  DocumentProto doc =
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
+  Index::Editor editor =
+      index_->Edit(kDocumentId0, prop0_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid1,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+  editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid2,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+  editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid3,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+  editor = index_->Edit(docid3, prop3_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid4,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+  editor = index_->Edit(docid4, prop4_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid5,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+  editor = index_->Edit(docid5, prop5_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid6,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+  editor = index_->Edit(docid6, prop6_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid7,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+  editor = index_->Edit(docid7, prop7_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // *If* nested function calls were allowed, then this would simplify as:
+  // `search("search(\"foo\") bar")` -> `search("foo bar")` -> `foo bar`
+  // But nested function calls are disallowed. So this is rejected.
+  std::string level_one_query =
+      R"(search("foo", createList("prop2", "prop5", "prop1", "prop3", "prop0", "prop6", "prop4", "prop7")))";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(level_one_query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_one_query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop0", "prop1", "prop2", "prop3", "prop4",
+                                   "prop5", "prop6", "prop7"));
+  EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop1"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop2"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop3"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop4"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop5"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop7"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(docid7, docid6, docid5, docid4, docid3, docid2,
+                          docid1, docid0));
+
+  std::string level_two_query = absl_ports::StrCat(
+      R"(search(")", EscapeString(level_one_query),
+      R"(", createList("prop6", "prop0", "prop4", "prop2")))");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_two_query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_two_query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_two).ConsumeResults());
+
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop0", "prop2", "prop4", "prop6"));
+  EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop2"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop4"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(docid6, docid4, docid2, docid0));
+
+  std::string level_three_query =
+      absl_ports::StrCat(R"(search(")", EscapeString(level_two_query),
+                         R"(", createList("prop0", "prop6")))");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_three_query));
+  QueryVisitor query_visitor_three(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+      level_three_query, DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_three);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_three).ConsumeResults());
+
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop0", "prop6"));
+  EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(docid6, docid0));
+}
+
+// This test will nest `search` calls together with the set of restricts
+// narrowing at each level so that the set of docs matching the query shrinks.
+TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsExpanding) {
+  PropertyConfigProto prop =
+      PropertyConfigBuilder()
+          .SetName("prop0")
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("type")
+                  .AddProperty(prop)
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop2"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop3"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop4"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop5"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop6"))
+                  .AddProperty(PropertyConfigBuilder(prop).SetName("prop7")))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Section ids are assigned alphabetically.
+  SectionId prop0_id = 0;
+  SectionId prop1_id = 1;
+  SectionId prop2_id = 2;
+  SectionId prop3_id = 3;
+  SectionId prop4_id = 4;
+  SectionId prop5_id = 5;
+  SectionId prop6_id = 6;
+  SectionId prop7_id = 7;
+
+  NamespaceId ns_id = 0;
+  DocumentProto doc =
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId docid0, document_store_->Put(doc));
+  Index::Editor editor =
+      index_->Edit(kDocumentId0, prop0_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid1,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri1").Build()));
+  editor = index_->Edit(docid1, prop1_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid2,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri2").Build()));
+  editor = index_->Edit(docid2, prop2_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid3,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri3").Build()));
+  editor = index_->Edit(docid3, prop3_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid4,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri4").Build()));
+  editor = index_->Edit(docid4, prop4_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid5,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri5").Build()));
+  editor = index_->Edit(docid5, prop5_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid6,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri6").Build()));
+  editor = index_->Edit(docid6, prop6_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId docid7,
+      document_store_->Put(DocumentBuilder(doc).SetUri("uri7").Build()));
+  editor = index_->Edit(docid7, prop7_id, TERM_MATCH_PREFIX, ns_id);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // *If* nested function calls were allowed, then this would simplify as:
+  // `search("search(\"foo\") bar")` -> `search("foo bar")` -> `foo bar`
+  // But nested function calls are disallowed. So this is rejected.
+  std::string level_one_query =
+      R"(search("foo", createList("prop0", "prop6")))";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(level_one_query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_one_query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop0", "prop6"));
+  EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(docid6, docid0));
+
+  std::string level_two_query = absl_ports::StrCat(
+      R"(search(")", EscapeString(level_one_query),
+      R"(", createList("prop6", "prop0", "prop4", "prop2")))");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_two_query));
+  QueryVisitor query_visitor_two(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), level_two_query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_two);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_two).ConsumeResults());
+
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop0", "prop6"));
+  EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(docid6, docid0));
+
+  std::string level_three_query =
+      absl_ports::StrCat(R"(search(")", EscapeString(level_two_query),
+                         R"(", createList("prop2", "prop5", "prop1", "prop3",)",
+                         R"( "prop0", "prop6", "prop4", "prop7")))");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(level_three_query));
+  QueryVisitor query_visitor_three(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+      level_three_query, DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor_three);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor_three).ConsumeResults());
+
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+  EXPECT_THAT(ExtractKeys(query_results.query_terms),
+              UnorderedElementsAre("prop0", "prop6"));
+  EXPECT_THAT(query_results.query_terms["prop0"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(query_results.query_terms["prop6"], UnorderedElementsAre("foo"));
+  EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+              UnorderedElementsAre("foo"));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              ElementsAre(docid6, docid0));
+}
+
+TEST_F(QueryVisitorTest,
+       PropertyDefinedFunctionWithNoArgumentReturnsInvalidArgument) {
+  std::string query = "propertyDefined()";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(
+    QueryVisitorTest,
+    PropertyDefinedFunctionWithMoreThanOneTextArgumentReturnsInvalidArgument) {
+  std::string query = "propertyDefined(\"foo\", \"bar\")";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+       PropertyDefinedFunctionWithTextArgumentReturnsInvalidArgument) {
+  // The argument type is TEXT, not STRING here.
+  std::string query = "propertyDefined(foo)";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+       PropertyDefinedFunctionWithNonTextArgumentReturnsInvalidArgument) {
+  std::string query = "propertyDefined(1 < 2)";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, PropertyDefinedFunctionReturnsMatchingDocuments) {
+  // Set up two schemas, one with a "url" field and one without.
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("typeWithUrl")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("url")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("typeWithoutUrl"))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Document 0 has the term "foo" and its schema has the url property.
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Document 1 has the term "foo" and its schema DOESN'T have the url property.
+  ICING_ASSERT_OK(document_store_->Put(DocumentBuilder()
+                                           .SetKey("ns", "uri1")
+                                           .SetSchema("typeWithoutUrl")
+                                           .Build()));
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Document 2 has the term "bar" and its schema has the url property.
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("typeWithUrl").Build()));
+  editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("foo propertyDefined(\"url\")");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              UnorderedElementsAre(kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest,
+       PropertyDefinedFunctionReturnsNothingIfNoMatchingProperties) {
+  // Set up two schemas, one with a "url" field and one without.
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("typeWithUrl")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("url")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("typeWithoutUrl"))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Document 0 has the term "foo" and its schema has the url property.
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Document 1 has the term "foo" and its schema DOESN'T have the url property.
+  ICING_ASSERT_OK(document_store_->Put(DocumentBuilder()
+                                           .SetKey("ns", "uri1")
+                                           .SetSchema("typeWithoutUrl")
+                                           .Build()));
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Attempt to query a non-existent property.
+  std::string query = CreateQuery("propertyDefined(\"nonexistentproperty\")");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+}
+
+TEST_P(QueryVisitorTest,
+       PropertyDefinedFunctionWithNegationMatchesDocsWithNoSuchProperty) {
+  // Set up two schemas, one with a "url" field and one without.
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("typeWithUrl")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("url")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("typeWithoutUrl"))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Document 0 has the term "foo" and its schema has the url property.
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Document 1 has the term "foo" and its schema DOESN'T have the url property.
+  ICING_ASSERT_OK(document_store_->Put(DocumentBuilder()
+                                           .SetKey("ns", "uri1")
+                                           .SetSchema("typeWithoutUrl")
+                                           .Build()));
+  editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  std::string query = CreateQuery("foo AND NOT propertyDefined(\"url\")");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info_=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kListFilterQueryLanguageFeature));
+
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              UnorderedElementsAre(kDocumentId1));
+}
+
+TEST_F(QueryVisitorTest,
+       HasPropertyFunctionWithNoArgumentReturnsInvalidArgument) {
+  std::string query = "hasProperty()";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+       HasPropertyFunctionWithMoreThanOneStringArgumentReturnsInvalidArgument) {
+  std::string query = "hasProperty(\"foo\", \"bar\")";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+       HasPropertyFunctionWithTextArgumentReturnsInvalidArgument) {
+  // The argument type is TEXT, not STRING here.
+  std::string query = "hasProperty(foo)";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest,
+       HasPropertyFunctionWithNonStringArgumentReturnsInvalidArgument) {
+  std::string query = "hasProperty(1 < 2)";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryVisitorTest, HasPropertyFunctionReturnsMatchingDocuments) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Simple")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("price")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Document 0 has the term "foo" and has the "price" property.
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("Simple").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId0,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm(
+      absl_ports::StrCat(kPropertyExistenceTokenPrefix, "price").c_str()));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Document 1 has the term "foo" and doesn't have the "price" property.
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("Simple").Build()));
+  editor = index_->Edit(kDocumentId1, kSectionId0, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Document 2 has the term "bar" and has the "price" property.
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri2").SetSchema("Simple").Build()));
+  editor = index_->Edit(kDocumentId2, kSectionId0, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("bar"));
+  ICING_ASSERT_OK(editor.BufferTerm(
+      absl_ports::StrCat(kPropertyExistenceTokenPrefix, "price").c_str()));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Test that `foo hasProperty("price")` matches document 0 only.
+  std::string query = CreateQuery("foo hasProperty(\"price\")");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor1(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor1);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor1).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kHasPropertyFunctionFeature,
+                                   kListFilterQueryLanguageFeature));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              UnorderedElementsAre(kDocumentId0));
+
+  // Test that `bar OR NOT hasProperty("price")` matches document 1 and
+  // document 2.
+  query = CreateQuery("bar OR NOT hasProperty(\"price\")");
+  ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
+  QueryVisitor query_visitor2(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor2);
+  ICING_ASSERT_OK_AND_ASSIGN(query_results,
+                             std::move(query_visitor2).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kHasPropertyFunctionFeature,
+                                   kListFilterQueryLanguageFeature));
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+              UnorderedElementsAre(kDocumentId1, kDocumentId2));
+}
+
+TEST_P(QueryVisitorTest,
+       HasPropertyFunctionReturnsNothingIfNoMatchingProperties) {
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Simple")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("price")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build(),
+      /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Document 0 has the term "foo" and has the "price" property.
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri0").SetSchema("Simple").Build()));
+  Index::Editor editor = index_->Edit(kDocumentId0, kSectionId0,
+                                      TERM_MATCH_PREFIX, /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.BufferTerm(
+      absl_ports::StrCat(kPropertyExistenceTokenPrefix, "price").c_str()));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Document 1 has the term "foo" and doesn't have the "price" property.
+  ICING_ASSERT_OK(document_store_->Put(
+      DocumentBuilder().SetKey("ns", "uri1").SetSchema("Simple").Build()));
+  editor = index_->Edit(kDocumentId1, kSectionId0, TERM_MATCH_PREFIX,
+                        /*namespace_id=*/0);
+  ICING_ASSERT_OK(editor.BufferTerm("foo"));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
+
+  // Attempt to query a non-existent property.
+  std::string query = CreateQuery("hasProperty(\"nonexistentproperty\")");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+                             ParseQueryHelper(query));
+  QueryVisitor query_visitor(
+      index_.get(), numeric_index_.get(), document_store_.get(),
+      schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+      DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+      /*needs_term_frequency_info=*/true, clock_.GetSystemTimeMilliseconds());
+  root_node->Accept(&query_visitor);
+  ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+                             std::move(query_visitor).ConsumeResults());
+  EXPECT_THAT(query_results.features_in_use,
+              UnorderedElementsAre(kHasPropertyFunctionFeature,
+                                   kListFilterQueryLanguageFeature));
+
+  EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+}
+
+INSTANTIATE_TEST_SUITE_P(QueryVisitorTest, QueryVisitorTest,
+                         testing::Values(QueryType::kPlain,
+                                         QueryType::kSearch));
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/query/advanced_query_parser/util/string-util.cc b/icing/query/advanced_query_parser/util/string-util.cc
new file mode 100644
index 0000000..9af2ed6
--- /dev/null
+++ b/icing/query/advanced_query_parser/util/string-util.cc
@@ -0,0 +1,106 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/advanced_query_parser/util/string-util.h"
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+
+namespace icing {
+namespace lib {
+
+namespace string_util {
+
+libtextclassifier3::StatusOr<std::string> UnescapeStringValue(
+    std::string_view value) {
+  std::string result;
+  bool in_escape = false;
+  for (char c : value) {
+    if (in_escape) {
+      in_escape = false;
+    } else if (c == '\\') {
+      in_escape = true;
+      continue;
+    } else if (c == '"') {
+      return absl_ports::InvalidArgumentError(
+          "Encountered an unescaped quotation mark!");
+    }
+    result += c;
+  }
+  return result;
+}
+
+libtextclassifier3::StatusOr<std::string_view> FindEscapedToken(
+    std::string_view escaped_string, std::string_view unescaped_token) {
+  if (unescaped_token.empty()) {
+    return absl_ports::InvalidArgumentError(
+        "Cannot find escaped token in empty unescaped token.");
+  }
+
+  // Find the start of unescaped_token within the escaped_string
+  const char* esc_string_end = escaped_string.data() + escaped_string.length();
+  size_t pos = escaped_string.find(unescaped_token[0]);
+  const char* esc_token_start = (pos == std::string_view::npos)
+                                    ? esc_string_end
+                                    : escaped_string.data() + pos;
+  const char* esc_token_cur = esc_token_start;
+  const char* possible_next_start = nullptr;
+  bool is_escaped = false;
+  int i = 0;
+  for (; i < unescaped_token.length() && esc_token_cur < esc_string_end;
+       ++esc_token_cur) {
+    if (esc_token_cur != esc_token_start &&
+        *esc_token_cur == unescaped_token[0] &&
+        possible_next_start == nullptr) {
+      possible_next_start = esc_token_cur;
+    }
+
+    // Every char in unescaped_token should either be an escape or match the
+    // next char in unescaped_token.
+    if (!is_escaped && *esc_token_cur == '\\') {
+      is_escaped = true;
+    } else if (*esc_token_cur == unescaped_token[i]) {
+      is_escaped = false;
+      ++i;
+    } else {
+      // No match. If we don't have a possible_next_start, then try to find one.
+      if (possible_next_start == nullptr) {
+        pos = escaped_string.find(unescaped_token[0],
+                                  esc_token_cur - escaped_string.data());
+        if (pos == std::string_view::npos) {
+          break;
+        }
+        esc_token_start = escaped_string.data() + pos;
+      } else {
+        esc_token_start = possible_next_start;
+        possible_next_start = nullptr;
+      }
+      // esc_token_start has been reset to a char that equals unescaped_token[0]
+      // The for loop above will advance esc_token_cur so set i to 1.
+      i = 1;
+      esc_token_cur = esc_token_start;
+    }
+  }
+  if (i != unescaped_token.length()) {
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("Couldn't match chars at token=", unescaped_token,
+                           ") and raw_text=", escaped_string));
+  }
+  return std::string_view(esc_token_start, esc_token_cur - esc_token_start);
+}
+
+}  // namespace string_util
+
+}  // namespace lib
+}  // namespace icing
+\ No newline at end of file
diff --git a/icing/query/advanced_query_parser/util/string-util.h b/icing/query/advanced_query_parser/util/string-util.h
new file mode 100644
index 0000000..09fb451
--- /dev/null
+++ b/icing/query/advanced_query_parser/util/string-util.h
@@ -0,0 +1,49 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_ADVANCED_QUERY_PARSER__STRING_UTIL_H_
+#define ICING_QUERY_ADVANCED_QUERY_PARSER__STRING_UTIL_H_
+
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+
+namespace icing {
+namespace lib {
+
+namespace string_util {
+
+// Returns:
+//   - On success, value with the escapes removed.
+//   - INVALID_ARGUMENT if an non-escaped quote is encountered.
+//  Ex. "fo\\\\o" -> "fo\\o"
+libtextclassifier3::StatusOr<std::string> UnescapeStringValue(
+    std::string_view value);
+
+// Returns:
+//   - On success, string_view pointing to the segment of escaped_string that,
+//     if unescaped, would match unescaped_token.
+//   - INVALID_ARGUMENT
+//  Ex. escaped_string="foo b\\a\\\"r baz", unescaped_token="ba\"r"
+//      returns "b\\a\\\"r"
+libtextclassifier3::StatusOr<std::string_view> FindEscapedToken(
+    std::string_view escaped_string, std::string_view unescaped_token);
+
+}  // namespace string_util
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_ADVANCED_QUERY_PARSER__STRING_UTIL_H_
diff --git a/icing/query/advanced_query_parser/util/string-util_test.cc b/icing/query/advanced_query_parser/util/string-util_test.cc
new file mode 100644
index 0000000..a7ccf3e
--- /dev/null
+++ b/icing/query/advanced_query_parser/util/string-util_test.cc
@@ -0,0 +1,125 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/advanced_query_parser/util/string-util.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+TEST(StringUtilTest, UnescapeStringEmptyString) {
+  EXPECT_THAT(string_util::UnescapeStringValue(""), IsOkAndHolds(IsEmpty()));
+}
+
+TEST(StringUtilTest, UnescapeStringStringWithNoEscapes) {
+  EXPECT_THAT(string_util::UnescapeStringValue("foo"), IsOkAndHolds("foo"));
+  EXPECT_THAT(string_util::UnescapeStringValue("f o o"), IsOkAndHolds("f o o"));
+  EXPECT_THAT(string_util::UnescapeStringValue("f\to\to"),
+              IsOkAndHolds("f\to\to"));
+  EXPECT_THAT(string_util::UnescapeStringValue("f.o.o"), IsOkAndHolds("f.o.o"));
+}
+
+TEST(StringUtilTest, UnescapeStringStringWithEscapes) {
+  EXPECT_THAT(string_util::UnescapeStringValue("f\\oo"), IsOkAndHolds("foo"));
+  EXPECT_THAT(string_util::UnescapeStringValue("f\\\\oo"),
+              IsOkAndHolds("f\\oo"));
+  EXPECT_THAT(string_util::UnescapeStringValue("f\\\"oo"),
+              IsOkAndHolds("f\"oo"));
+  EXPECT_THAT(string_util::UnescapeStringValue("foo\\"), IsOkAndHolds("foo"));
+  EXPECT_THAT(string_util::UnescapeStringValue("foo b\\a\\\"r baz"),
+              IsOkAndHolds("foo ba\"r baz"));
+  EXPECT_THAT(string_util::UnescapeStringValue("bar b\\aar bar\\s bart"),
+              IsOkAndHolds("bar baar bars bart"));
+  EXPECT_THAT(string_util::UnescapeStringValue("\\\\\\\\a"),
+              IsOkAndHolds("\\\\a"));
+}
+
+TEST(StringUtilTest, UnescapeStringQuoteWithoutEscape) {
+  EXPECT_THAT(string_util::UnescapeStringValue("f\\o\"o"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(string_util::UnescapeStringValue("f\"oo"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(StringUtilTest, FindEscapedTokenEmptyUnescapedToken) {
+  EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", ""),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(StringUtilTest, FindEscapedTokenTokenNotPresent) {
+  EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "elephant"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "bat"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "taz"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "bazz"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(StringUtilTest, FindEscapedTokenMatchInMiddleToken) {
+  EXPECT_THAT(string_util::FindEscapedToken("babar", "bar"),
+              IsOkAndHolds("bar"));
+}
+
+TEST(StringUtilTest, FindEscapedTokenMatches) {
+  EXPECT_THAT(string_util::FindEscapedToken("foo b\\a\\\"r baz", "ba\"r"),
+              IsOkAndHolds("b\\a\\\"r"));
+  EXPECT_THAT(string_util::FindEscapedToken("\\\\\\\\a", "\\\\a"),
+              IsOkAndHolds("\\\\\\\\a"));
+}
+
+TEST(StringUtilTest, FindEscapedTokenTraversesThroughEscapedText) {
+  std::string_view escaped_text = "bar b\\aar bar\\s bart";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::string_view result,
+      string_util::FindEscapedToken(escaped_text, "bar"));
+  // escaped_text = "bar b\\aar bar\\s bart";
+  // escaped_token   ^  ^
+  EXPECT_THAT(result, Eq("bar"));
+
+  // escaped_text = "b\\aar bar\\s bart";
+  // escaped_token          ^  ^
+  const char* result_end = result.data() + result.length();
+  escaped_text = escaped_text.substr(result_end - escaped_text.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      result, string_util::FindEscapedToken(escaped_text, "bar"));
+  EXPECT_THAT(result, Eq("bar"));
+
+  // escaped_text = "\\s bart";
+  // escaped_token       ^  ^
+  result_end = result.data() + result.length();
+  escaped_text = escaped_text.substr(result_end - escaped_text.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      result, string_util::FindEscapedToken(escaped_text, "bar"));
+  EXPECT_THAT(result, Eq("bar"));
+
+  result_end = result.data() + result.length();
+  escaped_text = escaped_text.substr(result_end - escaped_text.data());
+  EXPECT_THAT(string_util::FindEscapedToken(escaped_text, "bar"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
+\ No newline at end of file
diff --git a/icing/query/query-features.h b/icing/query/query-features.h
new file mode 100644
index 0000000..d829cd7
--- /dev/null
+++ b/icing/query/query-features.h
@@ -0,0 +1,63 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_QUERY_FEATURES_H_
+#define ICING_QUERY_QUERY_FEATURES_H_
+
+#include <string_view>
+#include <unordered_set>
+
+namespace icing {
+namespace lib {
+
+// A feature used in a query.
+// All feature values here must be kept in sync with its counterpart in:
+// androidx-main/frameworks/support/appsearch/appsearch/src/main/java/androidx/appsearch/app/Features.java
+using Feature = std::string_view;
+
+// This feature relates to the use of the numeric comparison operators in the
+// advanced query language. Ex. `price < 10`.
+constexpr Feature kNumericSearchFeature =
+    "NUMERIC_SEARCH";  // Features#NUMERIC_SEARCH
+
+// This feature relates to the use of the STRING terminal in the advanced query
+// language. Ex. `"foo?bar"` is treated as a single term - `foo?bar`.
+constexpr Feature kVerbatimSearchFeature =
+    "VERBATIM_SEARCH";  // Features#VERBATIM_SEARCH
+
+// This feature covers all additions (other than numeric search and verbatim
+// search) to the query language to bring it into better alignment with the list
+// filters spec.
+// This includes:
+//   - support for function calls
+//   - expanding support for negation and property restriction expressions
+//   - prefix operator '*'
+//   - 'NOT' operator
+//   - propertyDefined("url")
+constexpr Feature kListFilterQueryLanguageFeature =
+    "LIST_FILTER_QUERY_LANGUAGE";  // Features#LIST_FILTER_QUERY_LANGUAGE
+
+// This feature relates to the use of the "hasProperty(property_path)" function.
+constexpr Feature kHasPropertyFunctionFeature =
+    "HAS_PROPERTY_FUNCTION";  // Features#HAS_PROPERTY_FUNCTION
+
+inline std::unordered_set<Feature> GetQueryFeaturesSet() {
+  return {kNumericSearchFeature, kVerbatimSearchFeature,
+          kListFilterQueryLanguageFeature, kHasPropertyFunctionFeature};
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_QUERY_FEATURES_H_
diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 4d714f8..bbfbf3c 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc
@@ -18,8 +18,8 @@
 #include <memory>
 #include <stack>
 #include <string>
-#include <string_view>
 #include <unordered_map>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -35,7 +35,15 @@
 #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/proto/search.pb.h"
+#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
+#include "icing/query/advanced_query_parser/lexer.h"
+#include "icing/query/advanced_query_parser/parser.h"
+#include "icing/query/advanced_query_parser/query-visitor.h"
+#include "icing/query/query-features.h"
+#include "icing/query/query-processor.h"
+#include "icing/query/query-results.h"
 #include "icing/query/query-terms.h"
+#include "icing/query/query-utils.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
@@ -46,7 +54,6 @@
 #include "icing/tokenization/tokenizer-factory.h"
 #include "icing/tokenization/tokenizer.h"
 #include "icing/transform/normalizer.h"
-#include "icing/util/clock.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
@@ -70,7 +77,7 @@ struct ParserStateFrame {
 
   // If the last independent token was a property/section filter, then we need
   // to save the section name so we can create a section filter iterator.
-  std::string_view section_restrict = "";
+  std::string section_restrict;
 };
 
 // Combines any OR and AND iterators together into one iterator.
@@ -101,63 +108,124 @@ std::unique_ptr<DocHitInfoIterator> ProcessParserStateFrame(
 }  // namespace
 
 libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>>
-QueryProcessor::Create(Index* index,
+QueryProcessor::Create(Index* index, const NumericIndex<int64_t>* numeric_index,
                        const LanguageSegmenter* language_segmenter,
                        const Normalizer* normalizer,
                        const DocumentStore* document_store,
-                       const SchemaStore* schema_store, const Clock* clock) {
+                       const SchemaStore* schema_store) {
   ICING_RETURN_ERROR_IF_NULL(index);
+  ICING_RETURN_ERROR_IF_NULL(numeric_index);
   ICING_RETURN_ERROR_IF_NULL(language_segmenter);
   ICING_RETURN_ERROR_IF_NULL(normalizer);
   ICING_RETURN_ERROR_IF_NULL(document_store);
   ICING_RETURN_ERROR_IF_NULL(schema_store);
-  ICING_RETURN_ERROR_IF_NULL(clock);
 
   return std::unique_ptr<QueryProcessor>(
-      new QueryProcessor(index, language_segmenter, normalizer, document_store,
-                         schema_store, clock));
+      new QueryProcessor(index, numeric_index, language_segmenter, normalizer,
+                         document_store, schema_store));
 }
 
 QueryProcessor::QueryProcessor(Index* index,
+                               const NumericIndex<int64_t>* numeric_index,
                                const LanguageSegmenter* language_segmenter,
                                const Normalizer* normalizer,
                                const DocumentStore* document_store,
-                               const SchemaStore* schema_store,
-                               const Clock* clock)
+                               const SchemaStore* schema_store)
     : index_(*index),
+      numeric_index_(*numeric_index),
       language_segmenter_(*language_segmenter),
       normalizer_(*normalizer),
       document_store_(*document_store),
-      schema_store_(*schema_store),
-      clock_(*clock) {}
-
-libtextclassifier3::StatusOr<QueryProcessor::QueryResults>
-QueryProcessor::ParseSearch(const SearchSpecProto& search_spec) {
-  ICING_ASSIGN_OR_RETURN(QueryResults results, ParseRawQuery(search_spec));
-
-  DocHitInfoIteratorFilter::Options options;
-
-  if (search_spec.namespace_filters_size() > 0) {
-    options.namespaces =
-        std::vector<std::string_view>(search_spec.namespace_filters().begin(),
-                                      search_spec.namespace_filters().end());
+      schema_store_(*schema_store) {}
+
+libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
+    const SearchSpecProto& search_spec,
+    ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+    int64_t current_time_ms) {
+  if (search_spec.search_type() == SearchSpecProto::SearchType::UNDEFINED) {
+    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+        "Search type ",
+        SearchSpecProto::SearchType::Code_Name(search_spec.search_type()),
+        " is not supported."));
+  }
+  QueryResults results;
+  if (search_spec.search_type() ==
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+    ICING_VLOG(1) << "Using EXPERIMENTAL_ICING_ADVANCED_QUERY parser!";
+    ICING_ASSIGN_OR_RETURN(
+        results,
+        ParseAdvancedQuery(search_spec, ranking_strategy, current_time_ms));
+  } else {
+    ICING_ASSIGN_OR_RETURN(
+        results, ParseRawQuery(search_spec, ranking_strategy, current_time_ms));
   }
 
-  if (search_spec.schema_type_filters_size() > 0) {
-    options.schema_types =
-        std::vector<std::string_view>(search_spec.schema_type_filters().begin(),
-                                      search_spec.schema_type_filters().end());
+  // Check that all new features used in the search have been enabled in the
+  // SearchSpec.
+  const std::unordered_set<Feature> enabled_features(
+      search_spec.enabled_features().begin(),
+      search_spec.enabled_features().end());
+  for (const Feature feature : results.features_in_use) {
+    if (enabled_features.find(feature) == enabled_features.end()) {
+      return absl_ports::InvalidArgumentError(
+          absl_ports::StrCat("Attempted use of unenabled feature ", feature));
+    }
   }
 
+  DocHitInfoIteratorFilter::Options options = GetFilterOptions(search_spec);
   results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>(
       std::move(results.root_iterator), &document_store_, &schema_store_,
-      &clock_, options);
+      options, current_time_ms);
+  if (!search_spec.type_property_filters().empty()) {
+    results.root_iterator =
+        DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
+            std::move(results.root_iterator), &document_store_, &schema_store_,
+            search_spec, current_time_ms);
+  }
   return results;
 }
 
+libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery(
+    const SearchSpecProto& search_spec,
+    ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+    int64_t current_time_ms) const {
+  QueryResults results;
+  Lexer lexer(search_spec.query(), Lexer::Language::QUERY);
+  ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
+                         lexer.ExtractTokens());
+
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> tree_root,
+                         parser.ConsumeQuery());
+
+  if (tree_root == nullptr) {
+    results.root_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+        document_store_.last_added_document_id());
+    return results;
+  }
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<Tokenizer> plain_tokenizer,
+      tokenizer_factory::CreateIndexingTokenizer(
+          StringIndexingConfig::TokenizerType::PLAIN, &language_segmenter_));
+  DocHitInfoIteratorFilter::Options options = GetFilterOptions(search_spec);
+  bool needs_term_frequency_info =
+      ranking_strategy == ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE;
+  QueryVisitor query_visitor(&index_, &numeric_index_, &document_store_,
+                             &schema_store_, &normalizer_,
+                             plain_tokenizer.get(), search_spec.query(),
+                             std::move(options), search_spec.term_match_type(),
+                             needs_term_frequency_info, current_time_ms);
+  tree_root->Accept(&query_visitor);
+  return std::move(query_visitor).ConsumeResults();
+}
+
 // TODO(cassiewang): Collect query stats to populate the SearchResultsProto
-libtextclassifier3::StatusOr<QueryProcessor::QueryResults>
-QueryProcessor::ParseRawQuery(const SearchSpecProto& search_spec) {
+libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery(
+    const SearchSpecProto& search_spec,
+    ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+    int64_t current_time_ms) {
+  DocHitInfoIteratorFilter::Options options = GetFilterOptions(search_spec);
+
   // Tokenize the incoming raw query
   //
   // TODO(cassiewang): Consider caching/creating a tokenizer factory that will
@@ -173,14 +241,13 @@ QueryProcessor::ParseRawQuery(const SearchSpecProto& search_spec) {
 
   std::stack<ParserStateFrame> frames;
   frames.emplace();
-
   QueryResults results;
   // Process all the tokens
   for (int i = 0; i < tokens.size(); i++) {
     const Token& token = tokens.at(i);
     std::unique_ptr<DocHitInfoIterator> result_iterator;
 
-    // TODO(cassiewang): Handle negation tokens
+    // TODO(b/202076890): Handle negation tokens
     switch (token.type) {
       case Token::Type::QUERY_LEFT_PARENTHESES: {
         frames.emplace(ParserStateFrame());
@@ -218,7 +285,7 @@ QueryProcessor::ParseRawQuery(const SearchSpecProto& search_spec) {
               "Encountered empty stack of ParserStateFrames");
         }
 
-        frames.top().section_restrict = token.text;
+        frames.top().section_restrict = std::string(token.text);
         break;
       }
       case Token::Type::REGULAR: {
@@ -252,18 +319,38 @@ QueryProcessor::ParseRawQuery(const SearchSpecProto& search_spec) {
         // We do the same amount of disk reads, so it may be dependent on how
         // big the schema is and/or how popular schema type filtering and
         // section filtering is.
-
         ICING_ASSIGN_OR_RETURN(
             result_iterator,
-            index_.GetIterator(normalized_text, kSectionIdMaskAll,
-                               search_spec.term_match_type()));
-
-        // Add terms to match if this is not a negation term.
+            index_.GetIterator(
+                normalized_text,
+                token.text.data() - search_spec.query().c_str(),
+                token.text.length(), kSectionIdMaskAll,
+                search_spec.term_match_type(),
+                /*need_hit_term_frequency=*/ranking_strategy ==
+                    ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+
+        // Add term iterator and terms to match if this is not a negation term.
         // WARNING: setting query terms at this point is not compatible with
         // group-level excludes, group-level sections restricts or excluded
         // section restricts. Those are not currently supported. If they became
         // supported, this handling for query terms would need to be altered.
         if (!frames.top().saw_exclude) {
+          if (ranking_strategy ==
+              ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE) {
+            ICING_ASSIGN_OR_RETURN(
+                std::unique_ptr<DocHitInfoIterator> term_iterator,
+                index_.GetIterator(
+                    normalized_text,
+                    token.text.data() - search_spec.query().c_str(),
+                    token.text.length(), kSectionIdMaskAll,
+                    search_spec.term_match_type(),
+                    /*need_hit_term_frequency=*/ranking_strategy ==
+                        ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE));
+            results.query_term_iterators[normalized_text] =
+                std::make_unique<DocHitInfoIteratorFilter>(
+                    std::move(term_iterator), &document_store_, &schema_store_,
+                    options, current_time_ms);
+          }
           results.query_terms[frames.top().section_restrict].insert(
               std::move(normalized_text));
         }
@@ -316,9 +403,11 @@ QueryProcessor::ParseRawQuery(const SearchSpecProto& search_spec) {
       if (!frames.top().section_restrict.empty()) {
         // We saw a section restrict earlier, wrap the result iterator in
         // the section restrict
-        result_iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>(
+        std::set<std::string> section_restricts;
+        section_restricts.insert(std::move(frames.top().section_restrict));
+        result_iterator = DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
             std::move(result_iterator), &document_store_, &schema_store_,
-            frames.top().section_restrict);
+            std::move(section_restricts), current_time_ms);
 
         frames.top().section_restrict = "";
       }
diff --git a/icing/query/query-processor.h b/icing/query/query-processor.h
index fa98627..d4c22dd 100644
--- a/icing/query/query-processor.h
+++ b/icing/query/query-processor.h
@@ -15,18 +15,21 @@
 #ifndef ICING_QUERY_QUERY_PROCESSOR_H_
 #define ICING_QUERY_QUERY_PROCESSOR_H_
 
+#include <cstdint>
 #include <memory>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator-filter.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/numeric-index.h"
 #include "icing/proto/search.pb.h"
+#include "icing/query/query-results.h"
 #include "icing/query/query-terms.h"
 #include "icing/schema/schema-store.h"
 #include "icing/store/document-store.h"
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/transform/normalizer.h"
-#include "icing/util/clock.h"
 
 namespace icing {
 namespace lib {
@@ -44,19 +47,18 @@ class QueryProcessor {
   //   An QueryProcessor on success
   //   FAILED_PRECONDITION if any of the pointers is null.
   static libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>> Create(
-      Index* index, const LanguageSegmenter* language_segmenter,
-      const Normalizer* normalizer, const DocumentStore* document_store,
-      const SchemaStore* schema_store, const Clock* clock);
+      Index* index, const NumericIndex<int64_t>* numeric_index,
+      const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
+      const DocumentStore* document_store, const SchemaStore* schema_store);
 
-  struct QueryResults {
-    std::unique_ptr<DocHitInfoIterator> root_iterator;
-    // A map from section names to sets of terms restricted to those sections.
-    // Query terms that are not restricted are found at the entry with key "".
-    SectionRestrictQueryTermsMap query_terms;
-  };
   // Parse the search configurations (including the query, any additional
   // filters, etc.) in the SearchSpecProto into one DocHitInfoIterator.
   //
+  // When ranking_strategy == RELEVANCE_SCORE, the root_iterator and the
+  // query_term_iterators returned will keep term frequency information
+  // internally, so that term frequency stats will be collected when calling
+  // PopulateMatchedTermsStats to the iterators.
+  //
   // Returns:
   //   On success,
   //     - One iterator that represents the entire query
@@ -64,14 +66,29 @@ class QueryProcessor {
   //   INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized
   //   INTERNAL_ERROR on all other errors
   libtextclassifier3::StatusOr<QueryResults> ParseSearch(
-      const SearchSpecProto& search_spec);
+      const SearchSpecProto& search_spec,
+      ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+      int64_t current_time_ms);
 
  private:
   explicit QueryProcessor(Index* index,
+                          const NumericIndex<int64_t>* numeric_index,
                           const LanguageSegmenter* language_segmenter,
                           const Normalizer* normalizer,
                           const DocumentStore* document_store,
-                          const SchemaStore* schema_store, const Clock* clock);
+                          const SchemaStore* schema_store);
+
+  // Parse the query into a one DocHitInfoIterator that represents the root of a
+  // query tree in our new Advanced Query Language.
+  //
+  // Returns:
+  //   On success,
+  //     - One iterator that represents the entire query
+  //   INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized
+  libtextclassifier3::StatusOr<QueryResults> ParseAdvancedQuery(
+      const SearchSpecProto& search_spec,
+      ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+      int64_t current_time_ms) const;
 
   // Parse the query into a one DocHitInfoIterator that represents the root of a
   // query tree.
@@ -83,16 +100,18 @@ class QueryProcessor {
   //   INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized
   //   INTERNAL_ERROR on all other errors
   libtextclassifier3::StatusOr<QueryResults> ParseRawQuery(
-      const SearchSpecProto& search_spec);
+      const SearchSpecProto& search_spec,
+      ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+      int64_t current_time_ms);
 
   // Not const because we could modify/sort the hit buffer in the lite index at
   // query time.
   Index& index_;
+  const NumericIndex<int64_t>& numeric_index_;
   const LanguageSegmenter& language_segmenter_;
   const Normalizer& normalizer_;
   const DocumentStore& document_store_;
   const SchemaStore& schema_store_;
-  const Clock& clock_;
 };
 
 }  // namespace lib
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index 000bf3a..025e8e6 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -16,27 +16,32 @@
 #include "gmock/gmock.h"
 #include "third_party/absl/flags/flag.h"
 #include "icing/document-builder.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/index/index.h"
+#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/query/query-processor.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/transform/normalizer-factory.h"
+#include "icing/util/clock.h"
 #include "icing/util/logging.h"
+#include "unicode/uloc.h"
 
 // Run on a Linux workstation:
 //    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
 //    //icing/query:query-processor_benchmark
 //
 //    $ blaze-bin/icing/query/query-processor_benchmark
-//    --benchmarks=all
+//    --benchmark_filter=all
 //
 // Run on an Android device:
 //    Make target //icing/tokenization:language-segmenter depend on
@@ -52,8 +57,8 @@
 //    $ adb push blaze-bin/icing/query/query-processor_benchmark
 //    /data/local/tmp/
 //
-//    $ adb shell /data/local/tmp/query-processor_benchmark --benchmarks=all
-//    --adb
+//    $ adb shell /data/local/tmp/query-processor_benchmark
+//    --benchmark_filter=all --adb
 
 // Flag to tell the benchmark that it'll be run on an Android device via adb,
 // the benchmark will set up data files accordingly.
@@ -69,13 +74,17 @@ void AddTokenToIndex(Index* index, DocumentId document_id, SectionId section_id,
                      const std::string& token) {
   Index::Editor editor =
       index->Edit(document_id, section_id, term_match_type, /*namespace_id=*/0);
-  ICING_ASSERT_OK(editor.AddHit(token.c_str()));
+  ICING_ASSERT_OK(editor.BufferTerm(token.c_str()));
+  ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
 }
 
-std::unique_ptr<Index> CreateIndex(const IcingFilesystem& filesystem,
+std::unique_ptr<Index> CreateIndex(const IcingFilesystem& icing_filesystem,
+                                   const Filesystem& filesystem,
                                    const std::string& index_dir) {
-  Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10);
-  return Index::Create(options, &filesystem).ValueOrDie();
+  Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10,
+                         /*lite_index_sort_at_indexing=*/true,
+                         /*lite_index_sort_size=*/1024 * 8);
+  return Index::Create(options, &filesystem, &icing_filesystem).ValueOrDie();
 }
 
 std::unique_ptr<Normalizer> CreateNormalizer() {
@@ -85,6 +94,18 @@ std::unique_ptr<Normalizer> CreateNormalizer() {
       .ValueOrDie();
 }
 
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+    const Filesystem* filesystem, const std::string& base_dir,
+    const Clock* clock, const SchemaStore* schema_store) {
+  return DocumentStore::Create(
+      filesystem, base_dir, clock, schema_store,
+      /*force_recovery_and_revalidate_documents=*/false,
+      /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+      /*use_persistent_hash_map=*/false,
+      PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+      /*initialize_stats=*/nullptr);
+}
+
 void BM_QueryOneTerm(benchmark::State& state) {
   bool run_via_adb = absl::GetFlag(FLAGS_adb);
   if (!run_via_adb) {
@@ -96,6 +117,7 @@ void BM_QueryOneTerm(benchmark::State& state) {
   Filesystem filesystem;
   const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
   const std::string index_dir = base_dir + "/index";
+  const std::string numeric_index_dir = base_dir + "/numeric_index";
   const std::string schema_dir = base_dir + "/schema";
   const std::string doc_store_dir = base_dir + "/store";
 
@@ -106,23 +128,35 @@ void BM_QueryOneTerm(benchmark::State& state) {
     ICING_LOG(ERROR) << "Failed to create test directories";
   }
 
-  std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
+  std::unique_ptr<Index> index =
+      CreateIndex(icing_filesystem, filesystem, index_dir);
+  // TODO(b/249829533): switch to use persistent numeric index.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto numeric_index,
+      DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
+
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
   std::unique_ptr<LanguageSegmenter> language_segmenter =
-      language_segmenter_factory::Create().ValueOrDie();
+      language_segmenter_factory::Create(std::move(options)).ValueOrDie();
   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
-  FakeClock fake_clock;
 
   SchemaProto schema;
   auto type_config = schema.add_types();
   type_config->set_schema_type("type1");
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem, schema_dir));
-  ICING_ASSERT_OK(schema_store->SetSchema(schema));
-
-  std::unique_ptr<DocumentStore> document_store =
-      DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
-                            schema_store.get())
+  Clock clock;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem, schema_dir, &clock));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  DocumentStore::CreateResult create_result =
+      CreateDocumentStore(&filesystem, doc_store_dir, &clock,
+                          schema_store.get())
           .ValueOrDie();
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   DocumentId document_id = document_store
                                ->Put(DocumentBuilder()
@@ -137,17 +171,21 @@ void BM_QueryOneTerm(benchmark::State& state) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index.get(), language_segmenter.get(),
-                             normalizer.get(), document_store.get(),
-                             schema_store.get(), &fake_clock));
+      QueryProcessor::Create(index.get(), numeric_index.get(),
+                             language_segmenter.get(), normalizer.get(),
+                             document_store.get(), schema_store.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query(input_string);
   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
 
   for (auto _ : state) {
-    QueryProcessor::QueryResults results =
-        query_processor->ParseSearch(search_spec).ValueOrDie();
+    QueryResults results =
+        query_processor
+            ->ParseSearch(search_spec,
+                          ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+                          clock.GetSystemTimeMilliseconds())
+            .ValueOrDie();
     while (results.root_iterator->Advance().ok()) {
       results.root_iterator->doc_hit_info();
     }
@@ -208,6 +246,7 @@ void BM_QueryFiveTerms(benchmark::State& state) {
   Filesystem filesystem;
   const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
   const std::string index_dir = base_dir + "/index";
+  const std::string numeric_index_dir = base_dir + "/numeric_index";
   const std::string schema_dir = base_dir + "/schema";
   const std::string doc_store_dir = base_dir + "/store";
 
@@ -218,23 +257,35 @@ void BM_QueryFiveTerms(benchmark::State& state) {
     ICING_LOG(ERROR) << "Failed to create test directories";
   }
 
-  std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
+  std::unique_ptr<Index> index =
+      CreateIndex(icing_filesystem, filesystem, index_dir);
+  // TODO(b/249829533): switch to use persistent numeric index.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto numeric_index,
+      DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
+
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
   std::unique_ptr<LanguageSegmenter> language_segmenter =
-      language_segmenter_factory::Create().ValueOrDie();
+      language_segmenter_factory::Create(std::move(options)).ValueOrDie();
   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
-  FakeClock fake_clock;
 
   SchemaProto schema;
   auto type_config = schema.add_types();
   type_config->set_schema_type("type1");
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem, schema_dir));
-  ICING_ASSERT_OK(schema_store->SetSchema(schema));
-
-  std::unique_ptr<DocumentStore> document_store =
-      DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
-                            schema_store.get())
+  Clock clock;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem, schema_dir, &clock));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  DocumentStore::CreateResult create_result =
+      CreateDocumentStore(&filesystem, doc_store_dir, &clock,
+                          schema_store.get())
           .ValueOrDie();
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   DocumentId document_id = document_store
                                ->Put(DocumentBuilder()
@@ -263,9 +314,9 @@ void BM_QueryFiveTerms(benchmark::State& state) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index.get(), language_segmenter.get(),
-                             normalizer.get(), document_store.get(),
-                             schema_store.get(), &fake_clock));
+      QueryProcessor::Create(index.get(), numeric_index.get(),
+                             language_segmenter.get(), normalizer.get(),
+                             document_store.get(), schema_store.get()));
 
   const std::string query_string = absl_ports::StrCat(
       input_string_a, " ", input_string_b, " ", input_string_c, " ",
@@ -276,8 +327,12 @@ void BM_QueryFiveTerms(benchmark::State& state) {
   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
 
   for (auto _ : state) {
-    QueryProcessor::QueryResults results =
-        query_processor->ParseSearch(search_spec).ValueOrDie();
+    QueryResults results =
+        query_processor
+            ->ParseSearch(search_spec,
+                          ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+                          clock.GetSystemTimeMilliseconds())
+            .ValueOrDie();
     while (results.root_iterator->Advance().ok()) {
       results.root_iterator->doc_hit_info();
     }
@@ -338,6 +393,7 @@ void BM_QueryDiacriticTerm(benchmark::State& state) {
   Filesystem filesystem;
   const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
   const std::string index_dir = base_dir + "/index";
+  const std::string numeric_index_dir = base_dir + "/numeric_index";
   const std::string schema_dir = base_dir + "/schema";
   const std::string doc_store_dir = base_dir + "/store";
 
@@ -348,23 +404,35 @@ void BM_QueryDiacriticTerm(benchmark::State& state) {
     ICING_LOG(ERROR) << "Failed to create test directories";
   }
 
-  std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
+  std::unique_ptr<Index> index =
+      CreateIndex(icing_filesystem, filesystem, index_dir);
+  // TODO(b/249829533): switch to use persistent numeric index.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto numeric_index,
+      DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
+
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
   std::unique_ptr<LanguageSegmenter> language_segmenter =
-      language_segmenter_factory::Create().ValueOrDie();
+      language_segmenter_factory::Create(std::move(options)).ValueOrDie();
   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
-  FakeClock fake_clock;
 
   SchemaProto schema;
   auto type_config = schema.add_types();
   type_config->set_schema_type("type1");
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem, schema_dir));
-  ICING_ASSERT_OK(schema_store->SetSchema(schema));
-
-  std::unique_ptr<DocumentStore> document_store =
-      DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
-                            schema_store.get())
+  Clock clock;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem, schema_dir, &clock));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  DocumentStore::CreateResult create_result =
+      CreateDocumentStore(&filesystem, doc_store_dir, &clock,
+                          schema_store.get())
           .ValueOrDie();
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   DocumentId document_id = document_store
                                ->Put(DocumentBuilder()
@@ -382,17 +450,21 @@ void BM_QueryDiacriticTerm(benchmark::State& state) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index.get(), language_segmenter.get(),
-                             normalizer.get(), document_store.get(),
-                             schema_store.get(), &fake_clock));
+      QueryProcessor::Create(index.get(), numeric_index.get(),
+                             language_segmenter.get(), normalizer.get(),
+                             document_store.get(), schema_store.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query(input_string);
   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
 
   for (auto _ : state) {
-    QueryProcessor::QueryResults results =
-        query_processor->ParseSearch(search_spec).ValueOrDie();
+    QueryResults results =
+        query_processor
+            ->ParseSearch(search_spec,
+                          ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+                          clock.GetSystemTimeMilliseconds())
+            .ValueOrDie();
     while (results.root_iterator->Advance().ok()) {
       results.root_iterator->doc_hit_info();
     }
@@ -453,6 +525,7 @@ void BM_QueryHiragana(benchmark::State& state) {
   Filesystem filesystem;
   const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
   const std::string index_dir = base_dir + "/index";
+  const std::string numeric_index_dir = base_dir + "/numeric_index";
   const std::string schema_dir = base_dir + "/schema";
   const std::string doc_store_dir = base_dir + "/store";
 
@@ -463,23 +536,35 @@ void BM_QueryHiragana(benchmark::State& state) {
     ICING_LOG(ERROR) << "Failed to create test directories";
   }
 
-  std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
+  std::unique_ptr<Index> index =
+      CreateIndex(icing_filesystem, filesystem, index_dir);
+  // TODO(b/249829533): switch to use persistent numeric index.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto numeric_index,
+      DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
+
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
   std::unique_ptr<LanguageSegmenter> language_segmenter =
-      language_segmenter_factory::Create().ValueOrDie();
+      language_segmenter_factory::Create(std::move(options)).ValueOrDie();
   std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
-  FakeClock fake_clock;
 
   SchemaProto schema;
   auto type_config = schema.add_types();
   type_config->set_schema_type("type1");
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem, schema_dir));
-  ICING_ASSERT_OK(schema_store->SetSchema(schema));
-
-  std::unique_ptr<DocumentStore> document_store =
-      DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
-                            schema_store.get())
+  Clock clock;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem, schema_dir, &clock));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  DocumentStore::CreateResult create_result =
+      CreateDocumentStore(&filesystem, doc_store_dir, &clock,
+                          schema_store.get())
           .ValueOrDie();
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   DocumentId document_id = document_store
                                ->Put(DocumentBuilder()
@@ -497,17 +582,21 @@ void BM_QueryHiragana(benchmark::State& state) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index.get(), language_segmenter.get(),
-                             normalizer.get(), document_store.get(),
-                             schema_store.get(), &fake_clock));
+      QueryProcessor::Create(index.get(), numeric_index.get(),
+                             language_segmenter.get(), normalizer.get(),
+                             document_store.get(), schema_store.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query(input_string);
   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
 
   for (auto _ : state) {
-    QueryProcessor::QueryResults results =
-        query_processor->ParseSearch(search_spec).ValueOrDie();
+    QueryResults results =
+        query_processor
+            ->ParseSearch(search_spec,
+                          ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+                          clock.GetSystemTimeMilliseconds())
+            .ValueOrDie();
     while (results.root_iterator->Advance().ok()) {
       results.root_iterator->doc_hit_info();
     }
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index dc94a72..53e3035 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -14,30 +14,38 @@
 
 #include "icing/query/query-processor.h"
 
+#include <cstdint>
 #include <memory>
 #include <string>
+#include <vector>
 
-#include "icing/jni/jni-cache.h"
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/index/hit/doc-hit-info.h"
 #include "icing/index/index.h"
 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/jni/jni-cache.h"
 #include "icing/legacy/index/icing-filesystem.h"
+#include "icing/portable/platform.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/search.pb.h"
 #include "icing/proto/term.pb.h"
+#include "icing/query/query-features.h"
+#include "icing/query/query-results.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
@@ -55,62 +63,67 @@ namespace {
 using ::testing::ElementsAre;
 using ::testing::IsEmpty;
 using ::testing::SizeIs;
-using ::testing::Test;
 using ::testing::UnorderedElementsAre;
 
-SchemaTypeConfigProto* AddSchemaType(SchemaProto* schema,
-                                     std::string schema_type) {
-  SchemaTypeConfigProto* type_config = schema->add_types();
-  type_config->set_schema_type(schema_type);
-  return type_config;
-}
-
-void AddIndexedProperty(SchemaTypeConfigProto* type_config, std::string name) {
-  PropertyConfigProto* property_config = type_config->add_properties();
-  property_config->set_property_name(name);
-  property_config->set_data_type(PropertyConfigProto::DataType::STRING);
-  property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property_config->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  property_config->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+    const Filesystem* filesystem, const std::string& base_dir,
+    const Clock* clock, const SchemaStore* schema_store) {
+  return DocumentStore::Create(
+      filesystem, base_dir, clock, schema_store,
+      /*force_recovery_and_revalidate_documents=*/false,
+      /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+      /*use_persistent_hash_map=*/false,
+      PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+      /*initialize_stats=*/nullptr);
 }
 
-void AddUnindexedProperty(SchemaTypeConfigProto* type_config,
-                          std::string name) {
-  PropertyConfigProto* property_config = type_config->add_properties();
-  property_config->set_property_name(name);
-  property_config->set_data_type(PropertyConfigProto::DataType::STRING);
-}
-
-class QueryProcessorTest : public Test {
+class QueryProcessorTest
+    : public ::testing::TestWithParam<SearchSpecProto::SearchType::Code> {
  protected:
   QueryProcessorTest()
       : test_dir_(GetTestTempDir() + "/icing"),
         store_dir_(test_dir_ + "/store"),
-        index_dir_(test_dir_ + "/index") {}
+        schema_store_dir_(test_dir_ + "/schema_store"),
+        index_dir_(test_dir_ + "/index"),
+        numeric_index_dir_(test_dir_ + "/numeric_index") {}
 
   void SetUp() override {
     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
     filesystem_.CreateDirectoryRecursively(index_dir_.c_str());
     filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      // If we've specified using the reverse-JNI method for segmentation (i.e.
+      // not ICU), then we won't have the ICU data file included to set up.
+      // Technically, we could choose to use reverse-JNI for segmentation AND
+      // include an ICU data file, but that seems unlikely and our current BUILD
+      // setup doesn't do this.
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
-#ifndef ICING_REVERSE_JNI_SEGMENTATION
-    // If we've specified using the reverse-JNI method for segmentation (i.e.
-    // not ICU), then we won't have the ICU data file included to set up.
-    // Technically, we could choose to use reverse-JNI for segmentation AND
-    // include an ICU data file, but that seems unlikely and our current BUILD
-    // setup doesn't do this.
-    ICING_ASSERT_OK(
-        // File generated via icu_data_file rule in //icing/BUILD.
-        icu_data_file_helper::SetUpICUDataFile(
-            GetTestFilePath("icing/icu.dat")));
-#endif  // ICING_REVERSE_JNI_SEGMENTATION
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, store_dir_, &fake_clock_,
+                            schema_store_.get()));
+    document_store_ = std::move(create_result.document_store);
 
     Index::Options options(index_dir_,
-                           /*index_merge_size=*/1024 * 1024);
-    ICING_ASSERT_OK_AND_ASSIGN(index_,
-                               Index::Create(options, &icing_filesystem_));
+                           /*index_merge_size=*/1024 * 1024,
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/1024 * 8);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+    // TODO(b/249829533): switch to use persistent numeric index.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        numeric_index_,
+        DummyNumericIndex<int64_t>::Create(filesystem_, numeric_index_dir_));
 
     language_segmenter_factory::SegmenterOptions segmenter_options(
         ULOC_US, jni_cache_.get());
@@ -120,6 +133,12 @@ class QueryProcessorTest : public Test {
 
     ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
                                                 /*max_term_byte_size=*/1000));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        query_processor_,
+        QueryProcessor::Create(index_.get(), numeric_index_.get(),
+                               language_segmenter_.get(), normalizer_.get(),
+                               document_store_.get(), schema_store_.get()));
   }
 
   libtextclassifier3::Status AddTokenToIndex(
@@ -127,7 +146,18 @@ class QueryProcessorTest : public Test {
       TermMatchType::Code term_match_type, const std::string& token) {
     Index::Editor editor = index_->Edit(document_id, section_id,
                                         term_match_type, /*namespace_id=*/0);
-    return editor.AddHit(token.c_str());
+    auto status = editor.BufferTerm(token.c_str());
+    return status.ok() ? editor.IndexAllBufferedTerms() : status;
+  }
+
+  libtextclassifier3::Status AddToNumericIndex(DocumentId document_id,
+                                               const std::string& property,
+                                               SectionId section_id,
+                                               int64_t value) {
+    std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
+        numeric_index_->Edit(property, document_id, section_id);
+    ICING_RETURN_IF_ERROR(editor->BufferKey(value));
+    return std::move(*editor).IndexAllBufferedKeys();
   }
 
   void TearDown() override {
@@ -135,67 +165,70 @@ class QueryProcessorTest : public Test {
     schema_store_.reset();
     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
   }
-
   Filesystem filesystem_;
   const std::string test_dir_;
   const std::string store_dir_;
+  const std::string schema_store_dir_;
+
+ private:
+  IcingFilesystem icing_filesystem_;
+  const std::string index_dir_;
+  const std::string numeric_index_dir_;
+
+ protected:
   std::unique_ptr<Index> index_;
+  std::unique_ptr<NumericIndex<int64_t>> numeric_index_;
   std::unique_ptr<LanguageSegmenter> language_segmenter_;
   std::unique_ptr<Normalizer> normalizer_;
-  std::unique_ptr<SchemaStore> schema_store_;
-  std::unique_ptr<DocumentStore> document_store_;
   FakeClock fake_clock_;
   std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
-
- private:
-  IcingFilesystem icing_filesystem_;
-  const std::string index_dir_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+  std::unique_ptr<QueryProcessor> query_processor_;
 };
 
-TEST_F(QueryProcessorTest, CreationWithNullPointerShouldFail) {
+TEST_P(QueryProcessorTest, CreationWithNullPointerShouldFail) {
   EXPECT_THAT(
-      QueryProcessor::Create(/*index=*/nullptr, language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_),
+      QueryProcessor::Create(/*index=*/nullptr, numeric_index_.get(),
+                             language_segmenter_.get(), normalizer_.get(),
+                             document_store_.get(), schema_store_.get()),
       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
   EXPECT_THAT(
-      QueryProcessor::Create(index_.get(), /*language_segmenter=*/nullptr,
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_),
+      QueryProcessor::Create(index_.get(), /*numeric_index_=*/nullptr,
+                             language_segmenter_.get(), normalizer_.get(),
+                             document_store_.get(), schema_store_.get()),
       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
   EXPECT_THAT(
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             /*normalizer=*/nullptr, document_store_.get(),
-                             schema_store_.get(), &fake_clock_),
+      QueryProcessor::Create(index_.get(), numeric_index_.get(),
+                             /*language_segmenter=*/nullptr, normalizer_.get(),
+                             document_store_.get(), schema_store_.get()),
       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
   EXPECT_THAT(
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), /*document_store=*/nullptr,
-                             schema_store_.get(), &fake_clock_),
+      QueryProcessor::Create(
+          index_.get(), numeric_index_.get(), language_segmenter_.get(),
+          /*normalizer=*/nullptr, document_store_.get(), schema_store_.get()),
       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-  EXPECT_THAT(QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                                     normalizer_.get(), document_store_.get(),
-                                     /*schema_store=*/nullptr, &fake_clock_),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-  EXPECT_THAT(QueryProcessor::Create(index_.get(), language_segmenter_.get(),
+  EXPECT_THAT(
+      QueryProcessor::Create(index_.get(), numeric_index_.get(),
+                             language_segmenter_.get(), normalizer_.get(),
+                             /*document_store=*/nullptr, schema_store_.get()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(QueryProcessor::Create(index_.get(), numeric_index_.get(),
+                                     language_segmenter_.get(),
                                      normalizer_.get(), document_store_.get(),
-                                     schema_store_.get(), /*clock=*/nullptr),
+                                     /*schema_store=*/nullptr),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
-TEST_F(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
+TEST_P(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
@@ -210,39 +243,41 @@ TEST_F(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
 
   // We don't need to insert anything in the index since the empty query will
   // match all DocumentIds from the DocumentStore
-
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("()");
+  search_spec.set_search_type(GetParam());
+  if (GetParam() !=
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        QueryResults results,
+        query_processor_->ParseSearch(search_spec,
+                                      ScoringSpecProto::RankingStrategy::NONE,
+                                      fake_clock_.GetSystemTimeMilliseconds()));
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
-
-  // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
-              ElementsAre(document_id2, document_id1));
-  EXPECT_THAT(results.query_terms, IsEmpty());
+    // Descending order of valid DocumentIds
+    EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
+                ElementsAre(document_id2, document_id1));
+    EXPECT_THAT(results.query_terms, IsEmpty());
+    EXPECT_THAT(results.query_term_iterators, IsEmpty());
+  } else {
+    // TODO(b/208654892): Resolve the difference between RAW_QUERY and ADVANCED
+    // regarding empty composite expressions.
+    EXPECT_THAT(query_processor_->ParseSearch(
+                    search_spec, ScoringSpecProto::RankingStrategy::NONE,
+                    fake_clock_.GetSystemTimeMilliseconds()),
+                StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  }
 }
 
-TEST_F(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
+TEST_P(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
@@ -257,39 +292,32 @@ TEST_F(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
 
   // We don't need to insert anything in the index since the empty query will
   // match all DocumentIds from the DocumentStore
-
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("");
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(search_spec,
+                                    ScoringSpecProto::RankingStrategy::NONE,
+                                    fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
   EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
               ElementsAre(document_id2, document_id1));
   EXPECT_THAT(results.query_terms, IsEmpty());
+  EXPECT_THAT(results.query_term_iterators, IsEmpty());
 }
 
-TEST_F(QueryProcessorTest, QueryTermNormalized) {
+TEST_P(QueryProcessorTest, QueryTermNormalized) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
@@ -312,40 +340,45 @@ TEST_F(QueryProcessorTest, QueryTermNormalized) {
       AddTokenToIndex(document_id, section_id, term_match_type, "world"),
       IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("hElLo WORLD");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
+
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{section_id, 1}};
+  std::vector<TermMatchInfo> matched_terms_stats;
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(
+      matched_terms_stats,
+      ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map),
+                  EqualsTermMatchInfo("world", expected_section_ids_tf_map)));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
 
-  // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
 }
 
-TEST_F(QueryProcessorTest, OneTermPrefixMatch) {
+TEST_P(QueryProcessorTest, OneTermPrefixMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
@@ -365,40 +398,101 @@ TEST_F(QueryProcessorTest, OneTermPrefixMatch) {
       AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
       IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("he");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{section_id, 1}};
+  std::vector<TermMatchInfo> matched_terms_stats;
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "he", expected_section_ids_tf_map)));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 
-  // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
 }
 
-TEST_F(QueryProcessorTest, OneTermExactMatch) {
+TEST_P(QueryProcessorTest, OneTermPrefixMatchWithMaxSectionID) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
 
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+  // Populate the index
+  SectionId section_id = kMaxSectionId;
+  SectionIdMask section_id_mask = UINT64_C(1) << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::PREFIX;
+  std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{};
+  term_frequencies[kMaxSectionId] = 1;
+
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
+      IsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("he");
+  search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
+
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{section_id, 1}};
+  std::vector<TermMatchInfo> matched_terms_stats;
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "he", expected_section_ids_tf_map)));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
+}
+
+TEST_P(QueryProcessorTest, OneTermExactMatch) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
@@ -418,44 +512,105 @@ TEST_F(QueryProcessorTest, OneTermExactMatch) {
       AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
       IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("hello");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
+
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{section_id, 1}};
+  std::vector<TermMatchInfo> matched_terms_stats;
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "hello", expected_section_ids_tf_map)));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 
-  // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello"));
 }
 
-TEST_F(QueryProcessorTest, AndTwoTermExactMatch) {
+TEST_P(QueryProcessorTest, AndSameTermExactMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Populate the index
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
+      IsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("hello hello");
+  search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
+
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{section_id, 1}};
+  std::vector<TermMatchInfo> matched_terms_stats;
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "hello", expected_section_ids_tf_map)));
+
+  ASSERT_FALSE(results.root_iterator->Advance().ok());
+
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello"));
+}
+
+TEST_P(QueryProcessorTest, AndTwoTermExactMatch) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that the DocHitInfoIterators will see that the
-  // document exists and not filter out the DocumentId as deleted.
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -474,44 +629,107 @@ TEST_F(QueryProcessorTest, AndTwoTermExactMatch) {
       AddTokenToIndex(document_id, section_id, term_match_type, "world"),
       IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("hello world");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
+
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{section_id, 1}};
+  std::vector<TermMatchInfo> matched_terms_stats;
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(
+      matched_terms_stats,
+      ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map),
+                  EqualsTermMatchInfo("world", expected_section_ids_tf_map)));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
 
-  // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
 }
 
-TEST_F(QueryProcessorTest, AndTwoTermPrefixMatch) {
+TEST_P(QueryProcessorTest, AndSameTermPrefixMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Populate the index
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::PREFIX;
+
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
+      IsOk());
 
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+  SearchSpecProto search_spec;
+  search_spec.set_query("he he");
+  search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
+
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{section_id, 1}};
+  std::vector<TermMatchInfo> matched_terms_stats;
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "he", expected_section_ids_tf_map)));
+
+  ASSERT_FALSE(results.root_iterator->Advance().ok());
+
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
+}
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that the DocHitInfoIterators will see that the
-  // document exists and not filter out the DocumentId as deleted.
+TEST_P(QueryProcessorTest, AndTwoTermPrefixMatch) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -530,44 +748,50 @@ TEST_F(QueryProcessorTest, AndTwoTermPrefixMatch) {
       AddTokenToIndex(document_id, section_id, term_match_type, "world"),
       IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("he wo");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{section_id, 1}};
+  std::vector<TermMatchInfo> matched_terms_stats;
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(
+      matched_terms_stats,
+      ElementsAre(EqualsTermMatchInfo("he", expected_section_ids_tf_map),
+                  EqualsTermMatchInfo("wo", expected_section_ids_tf_map)));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he", "wo"));
 }
 
-TEST_F(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
+TEST_P(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that the DocHitInfoIterators will see that the
-  // document exists and not filter out the DocumentId as deleted.
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -586,44 +810,50 @@ TEST_F(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
       AddTokenToIndex(document_id, section_id, term_match_type, "world"),
       IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("hello wo");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{section_id, 1}};
+  std::vector<TermMatchInfo> matched_terms_stats;
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(
+      matched_terms_stats,
+      ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map),
+                  EqualsTermMatchInfo("wo", expected_section_ids_tf_map)));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "wo"));
 }
 
-TEST_F(QueryProcessorTest, OrTwoTermExactMatch) {
+TEST_P(QueryProcessorTest, OrTwoTermExactMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that the DocHitInfoIterators will see that the
-  // document exists and not filter out the DocumentId as deleted.
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -647,45 +877,58 @@ TEST_F(QueryProcessorTest, OrTwoTermExactMatch) {
       AddTokenToIndex(document_id2, section_id, term_match_type, "world"),
       IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("hello OR world");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                          DocHitInfo(document_id1, section_id_mask)));
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id2);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{section_id, 1}};
+  std::vector<TermMatchInfo> matched_terms_stats;
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "world", expected_section_ids_tf_map)));
+
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  matched_terms_stats.clear();
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "hello", expected_section_ids_tf_map)));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
 }
 
-TEST_F(QueryProcessorTest, OrTwoTermPrefixMatch) {
+TEST_P(QueryProcessorTest, OrTwoTermPrefixMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that the DocHitInfoIterators will see that the
-  // document exists and not filter out the DocumentId as deleted.
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -709,45 +952,58 @@ TEST_F(QueryProcessorTest, OrTwoTermPrefixMatch) {
       AddTokenToIndex(document_id2, section_id, term_match_type, "world"),
       IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("he OR wo");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                          DocHitInfo(document_id1, section_id_mask)));
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id2);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{section_id, 1}};
+  std::vector<TermMatchInfo> matched_terms_stats;
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "wo", expected_section_ids_tf_map)));
+
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  matched_terms_stats.clear();
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "he", expected_section_ids_tf_map)));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he", "wo"));
 }
 
-TEST_F(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
+TEST_P(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that the DocHitInfoIterators will see that the
-  // document exists and not filter out the DocumentId as deleted.
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -770,45 +1026,57 @@ TEST_F(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
       AddTokenToIndex(document_id2, section_id, TermMatchType::PREFIX, "world"),
       IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("hello OR wo");
   search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                          DocHitInfo(document_id1, section_id_mask)));
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id2);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{section_id, 1}};
+  std::vector<TermMatchInfo> matched_terms_stats;
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "wo", expected_section_ids_tf_map)));
+
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  matched_terms_stats.clear();
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+                                       "hello", expected_section_ids_tf_map)));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "wo"));
 }
 
-TEST_F(QueryProcessorTest, CombinedAndOrTerms) {
+TEST_P(QueryProcessorTest, CombinedAndOrTerms) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that the DocHitInfoIterators will see that the
-  // document exists and not filter out the DocumentId as deleted.
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -833,6 +1101,7 @@ TEST_F(QueryProcessorTest, CombinedAndOrTerms) {
       IsOk());
   EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
               IsOk());
+  ICING_ASSERT_OK(index_->Merge());
 
   // Document 2 has content "animal kitten cat"
   EXPECT_THAT(
@@ -844,26 +1113,37 @@ TEST_F(QueryProcessorTest, CombinedAndOrTerms) {
   EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   {
     // OR gets precedence over AND, this is parsed as ((puppy OR kitten) AND
     // dog)
     SearchSpecProto search_spec;
     search_spec.set_query("puppy OR kitten dog");
     search_spec.set_term_match_type(term_match_type);
+    search_spec.set_search_type(GetParam());
 
-    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                               query_processor->ParseSearch(search_spec));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        QueryResults results,
+        query_processor_->ParseSearch(
+            search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+            fake_clock_.GetSystemTimeMilliseconds()));
 
     // Only Document 1 matches since it has puppy AND dog
-    EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-                ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+    ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+    EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(),
+              document_id1);
+    EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+              section_id_mask);
+
+    std::unordered_map<SectionId, Hit::TermFrequency>
+        expected_section_ids_tf_map = {{section_id, 1}};
+    std::vector<TermMatchInfo> matched_terms_stats;
+    results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(
+        matched_terms_stats,
+        ElementsAre(EqualsTermMatchInfo("puppy", expected_section_ids_tf_map),
+                    EqualsTermMatchInfo("dog", expected_section_ids_tf_map)));
+    EXPECT_THAT(results.query_term_iterators, SizeIs(3));
+
     EXPECT_THAT(results.query_terms, SizeIs(1));
     EXPECT_THAT(results.query_terms[""],
                 UnorderedElementsAre("puppy", "kitten", "dog"));
@@ -875,57 +1155,102 @@ TEST_F(QueryProcessorTest, CombinedAndOrTerms) {
     SearchSpecProto search_spec;
     search_spec.set_query("animal puppy OR kitten");
     search_spec.set_term_match_type(term_match_type);
+    search_spec.set_search_type(GetParam());
 
-    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                               query_processor->ParseSearch(search_spec));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        QueryResults results,
+        query_processor_->ParseSearch(
+            search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+            fake_clock_.GetSystemTimeMilliseconds()));
 
-    // Both Document 1 and 2 match since Document 1 has puppy AND dog, and
-    // Document 2 has kitten
+    // Both Document 1 and 2 match since Document 1 has animal AND puppy, and
+    // Document 2 has animal AND kitten
     // Descending order of valid DocumentIds
-    EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-                ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                            DocHitInfo(document_id1, section_id_mask)));
+    ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+    EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(),
+              document_id2);
+    EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+              section_id_mask);
+
+    std::unordered_map<SectionId, Hit::TermFrequency>
+        expected_section_ids_tf_map = {{section_id, 1}};
+    std::vector<TermMatchInfo> matched_terms_stats;
+    results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(
+        matched_terms_stats,
+        ElementsAre(
+            EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
+            EqualsTermMatchInfo("kitten", expected_section_ids_tf_map)));
+
+    ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+    EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(),
+              document_id1);
+    EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+              section_id_mask);
+
+    matched_terms_stats.clear();
+    results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(
+        matched_terms_stats,
+        ElementsAre(EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
+                    EqualsTermMatchInfo("puppy", expected_section_ids_tf_map)));
+    EXPECT_THAT(results.query_term_iterators, SizeIs(3));
+
     EXPECT_THAT(results.query_terms, SizeIs(1));
     EXPECT_THAT(results.query_terms[""],
                 UnorderedElementsAre("animal", "puppy", "kitten"));
   }
 
   {
-    // OR gets precedence over AND, this is parsed as (kitten AND ((foo OR bar)
-    // OR cat))
+    // OR gets precedence over AND, this is parsed as (kitten AND ((foo OR
+    // bar) OR cat))
     SearchSpecProto search_spec;
     search_spec.set_query("kitten foo OR bar OR cat");
     search_spec.set_term_match_type(term_match_type);
+    search_spec.set_search_type(GetParam());
 
-    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                               query_processor->ParseSearch(search_spec));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        QueryResults results,
+        query_processor_->ParseSearch(
+            search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+            fake_clock_.GetSystemTimeMilliseconds()));
 
     // Only Document 2 matches since it has both kitten and cat
-    EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-                ElementsAre(DocHitInfo(document_id2, section_id_mask)));
+    ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+    EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(),
+              document_id2);
+    EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+              section_id_mask);
+
+    std::unordered_map<SectionId, Hit::TermFrequency>
+        expected_section_ids_tf_map = {{section_id, 1}};
+    std::vector<TermMatchInfo> matched_terms_stats;
+    results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(
+        matched_terms_stats,
+        ElementsAre(EqualsTermMatchInfo("kitten", expected_section_ids_tf_map),
+                    EqualsTermMatchInfo("cat", expected_section_ids_tf_map)));
+    EXPECT_THAT(results.query_term_iterators, SizeIs(4));
+
     EXPECT_THAT(results.query_terms, SizeIs(1));
     EXPECT_THAT(results.query_terms[""],
                 UnorderedElementsAre("kitten", "foo", "bar", "cat"));
   }
 }
 
-TEST_F(QueryProcessorTest, OneGroup) {
+TEST_P(QueryProcessorTest, OneGroup) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that the DocHitInfoIterators will see that the
-  // document exists and not filter out the DocumentId as deleted.
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -939,7 +1264,6 @@ TEST_F(QueryProcessorTest, OneGroup) {
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "puppy dog"
@@ -956,47 +1280,44 @@ TEST_F(QueryProcessorTest, OneGroup) {
   EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   // Without grouping, this would be parsed as ((puppy OR kitten) AND foo) and
   // no documents would match. But with grouping, Document 1 matches puppy
   SearchSpecProto search_spec;
   search_spec.set_query("puppy OR (kitten foo)");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo(document_id1);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(3));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""],
               UnorderedElementsAre("puppy", "kitten", "foo"));
 }
 
-TEST_F(QueryProcessorTest, TwoGroups) {
+TEST_P(QueryProcessorTest, TwoGroups) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that the DocHitInfoIterators will see that the
-  // document exists and not filter out the DocumentId as deleted.
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -1010,7 +1331,6 @@ TEST_F(QueryProcessorTest, TwoGroups) {
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "puppy dog"
@@ -1027,48 +1347,47 @@ TEST_F(QueryProcessorTest, TwoGroups) {
   EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
               IsOk());
 
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   // Without grouping, this would be parsed as (puppy AND (dog OR kitten) AND
   // cat) and wouldn't match any documents. But with grouping, Document 1
   // matches (puppy AND dog) and Document 2 matches (kitten and cat).
   SearchSpecProto search_spec;
   search_spec.set_query("(puppy dog) OR (kitten cat)");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo1(document_id1);
+  expectedDocHitInfo1.UpdateSection(/*section_id=*/0);
+  DocHitInfo expectedDocHitInfo2(document_id2);
+  expectedDocHitInfo2.UpdateSection(/*section_id=*/0);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                          DocHitInfo(document_id1, section_id_mask)));
+              ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(4));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""],
               UnorderedElementsAre("puppy", "dog", "kitten", "cat"));
 }
 
-TEST_F(QueryProcessorTest, ManyLevelNestedGrouping) {
+TEST_P(QueryProcessorTest, ManyLevelNestedGrouping) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that the DocHitInfoIterators will see that the
-  // document exists and not filter out the DocumentId as deleted.
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -1082,7 +1401,6 @@ TEST_F(QueryProcessorTest, ManyLevelNestedGrouping) {
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "puppy dog"
@@ -1099,47 +1417,44 @@ TEST_F(QueryProcessorTest, ManyLevelNestedGrouping) {
   EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   // Without grouping, this would be parsed as ((puppy OR kitten) AND foo) and
   // no documents would match. But with grouping, Document 1 matches puppy
   SearchSpecProto search_spec;
   search_spec.set_query("puppy OR ((((kitten foo))))");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo(document_id1);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(3));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""],
               UnorderedElementsAre("puppy", "kitten", "foo"));
 }
 
-TEST_F(QueryProcessorTest, OneLevelNestedGrouping) {
+TEST_P(QueryProcessorTest, OneLevelNestedGrouping) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that the DocHitInfoIterators will see that the
-  // document exists and not filter out the DocumentId as deleted.
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -1153,7 +1468,6 @@ TEST_F(QueryProcessorTest, OneLevelNestedGrouping) {
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "puppy dog"
@@ -1170,47 +1484,46 @@ TEST_F(QueryProcessorTest, OneLevelNestedGrouping) {
   EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   // Document 1 will match puppy and Document 2 matches (kitten AND (cat))
   SearchSpecProto search_spec;
-  search_spec.set_query("puppy OR (kitten(cat))");
+  // TODO(b/208654892) decide how we want to handle queries of the form foo(...)
+  search_spec.set_query("puppy OR (kitten (cat))");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo1(document_id1);
+  expectedDocHitInfo1.UpdateSection(/*section_id=*/0);
+  DocHitInfo expectedDocHitInfo2(document_id2);
+  expectedDocHitInfo2.UpdateSection(/*section_id=*/0);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                          DocHitInfo(document_id1, section_id_mask)));
+              ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(3));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""],
               UnorderedElementsAre("puppy", "kitten", "cat"));
 }
 
-TEST_F(QueryProcessorTest, ExcludeTerm) {
+TEST_P(QueryProcessorTest, ExcludeTerm) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that they'll bump the last_added_document_id,
-  // which will give us the proper exclusion results
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that they'll bump the
+  // last_added_document_id, which will give us the proper exclusion results
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -1233,45 +1546,39 @@ TEST_F(QueryProcessorTest, ExcludeTerm) {
       AddTokenToIndex(document_id2, section_id, term_match_type, "world"),
       IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("-hello");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(search_spec,
+                                    ScoringSpecProto::RankingStrategy::NONE,
+                                    fake_clock_.GetSystemTimeMilliseconds()));
 
-  // We don't know have the section mask to indicate what section "world" came.
-  // It doesn't matter which section it was in since the query doesn't care.  It
-  // just wanted documents that didn't have "hello"
+  // We don't know have the section mask to indicate what section "world"
+  // came. It doesn't matter which section it was in since the query doesn't
+  // care.  It just wanted documents that didn't have "hello"
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
               ElementsAre(DocHitInfo(document_id2, kSectionIdMaskNone)));
   EXPECT_THAT(results.query_terms, IsEmpty());
+  EXPECT_THAT(results.query_term_iterators, IsEmpty());
 }
 
-TEST_F(QueryProcessorTest, ExcludeNonexistentTerm) {
+TEST_P(QueryProcessorTest, ExcludeNonexistentTerm) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that they'll bump the last_added_document_id,
-  // which will give us the proper exclusion results
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that they'll bump the
+  // last_added_document_id, which will give us the proper exclusion results
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -1293,44 +1600,38 @@ TEST_F(QueryProcessorTest, ExcludeNonexistentTerm) {
       AddTokenToIndex(document_id2, section_id, term_match_type, "world"),
       IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("-foo");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(search_spec,
+                                    ScoringSpecProto::RankingStrategy::NONE,
+                                    fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
               ElementsAre(DocHitInfo(document_id2, kSectionIdMaskNone),
                           DocHitInfo(document_id1, kSectionIdMaskNone)));
   EXPECT_THAT(results.query_terms, IsEmpty());
+  EXPECT_THAT(results.query_term_iterators, IsEmpty());
 }
 
-TEST_F(QueryProcessorTest, ExcludeAnd) {
+TEST_P(QueryProcessorTest, ExcludeAnd) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that they'll bump the last_added_document_id,
-  // which will give us the proper exclusion results
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that they'll bump the
+  // last_added_document_id, which will give us the proper exclusion results
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -1360,25 +1661,24 @@ TEST_F(QueryProcessorTest, ExcludeAnd) {
   ASSERT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   {
     SearchSpecProto search_spec;
     search_spec.set_query("-dog -cat");
     search_spec.set_term_match_type(term_match_type);
+    search_spec.set_search_type(GetParam());
 
-    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                               query_processor->ParseSearch(search_spec));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        QueryResults results,
+        query_processor_->ParseSearch(
+            search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+            fake_clock_.GetSystemTimeMilliseconds()));
 
-    // The query is interpreted as "exclude all documents that have animal, and
-    // exclude all documents that have cat". Since both documents contain
+    // The query is interpreted as "exclude all documents that have animal,
+    // and exclude all documents that have cat". Since both documents contain
     // animal, there are no results.
     EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+    EXPECT_THAT(results.query_term_iterators, IsEmpty());
+
     EXPECT_THAT(results.query_terms, IsEmpty());
   }
 
@@ -1386,36 +1686,38 @@ TEST_F(QueryProcessorTest, ExcludeAnd) {
     SearchSpecProto search_spec;
     search_spec.set_query("-animal cat");
     search_spec.set_term_match_type(term_match_type);
+    search_spec.set_search_type(GetParam());
 
-    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                               query_processor->ParseSearch(search_spec));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        QueryResults results,
+        query_processor_->ParseSearch(
+            search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+            fake_clock_.GetSystemTimeMilliseconds()));
 
-    // The query is interpreted as "exclude all documents that have animal, and
-    // include all documents that have cat". Since both documents contain
+    // The query is interpreted as "exclude all documents that have animal,
+    // and include all documents that have cat". Since both documents contain
     // animal, there are no results.
     EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+    EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
     EXPECT_THAT(results.query_terms, SizeIs(1));
     EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("cat"));
   }
 }
 
-TEST_F(QueryProcessorTest, ExcludeOr) {
+TEST_P(QueryProcessorTest, ExcludeOr) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  // These documents don't actually match to the tokens in the index. We're just
-  // inserting the documents so that they'll bump the last_added_document_id,
-  // which will give us the proper exclusion results
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that they'll bump the
+  // last_added_document_id, which will give us the proper exclusion results
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
@@ -1429,7 +1731,6 @@ TEST_F(QueryProcessorTest, ExcludeOr) {
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "animal dog"
@@ -1446,26 +1747,25 @@ TEST_F(QueryProcessorTest, ExcludeOr) {
   ASSERT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   {
     SearchSpecProto search_spec;
     search_spec.set_query("-animal OR -cat");
     search_spec.set_term_match_type(term_match_type);
+    search_spec.set_search_type(GetParam());
 
-    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                               query_processor->ParseSearch(search_spec));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        QueryResults results,
+        query_processor_->ParseSearch(
+            search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+            fake_clock_.GetSystemTimeMilliseconds()));
 
     // We don't have a section mask indicating which sections in this document
     // matched the query since it's not based on section-term matching. It's
     // more based on the fact that the query excluded all the other documents.
     EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
                 ElementsAre(DocHitInfo(document_id1, kSectionIdMaskNone)));
+    EXPECT_THAT(results.query_term_iterators, IsEmpty());
+
     EXPECT_THAT(results.query_terms, IsEmpty());
   }
 
@@ -1473,32 +1773,136 @@ TEST_F(QueryProcessorTest, ExcludeOr) {
     SearchSpecProto search_spec;
     search_spec.set_query("animal OR -cat");
     search_spec.set_term_match_type(term_match_type);
+    search_spec.set_search_type(GetParam());
 
-    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                               query_processor->ParseSearch(search_spec));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        QueryResults results,
+        query_processor_->ParseSearch(
+            search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+            fake_clock_.GetSystemTimeMilliseconds()));
 
     // Descending order of valid DocumentIds
+    DocHitInfo expectedDocHitInfo1(document_id1);
+    expectedDocHitInfo1.UpdateSection(/*section_id=*/0);
+    DocHitInfo expectedDocHitInfo2(document_id2);
+    expectedDocHitInfo2.UpdateSection(/*section_id=*/0);
     EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-                ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                            DocHitInfo(document_id1, section_id_mask)));
+                ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1));
+
     EXPECT_THAT(results.query_terms, SizeIs(1));
     EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
   }
 }
 
-TEST_F(QueryProcessorTest, DeletedFilter) {
+TEST_P(QueryProcessorTest, WithoutTermFrequency) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
 
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+  // Populate the index
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // Document 1 has content "animal puppy dog", which is added to the main
+  // index.
+  EXPECT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
+      IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "puppy"),
+      IsOk());
+  EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
+              IsOk());
+  ASSERT_THAT(index_->Merge(), IsOk());
+
+  // Document 2 has content "animal kitten cat", which is added to the lite
+  // index.
+  EXPECT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
+      IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "kitten"),
+      IsOk());
+  EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
+              IsOk());
+
+  // OR gets precedence over AND, this is parsed as (animal AND (puppy OR
+  // kitten))
+  SearchSpecProto search_spec;
+  search_spec.set_query("animal puppy OR kitten");
+  search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      QueryResults results,
+      query_processor_->ParseSearch(search_spec,
+                                    ScoringSpecProto::RankingStrategy::NONE,
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+
+  // Descending order of valid DocumentIds
+  // The first Document to match (Document 2) matches on 'animal' AND 'kitten'
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id2);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  // Since need_hit_term_frequency is false, the expected term frequency for
+  // the section with the hit should be 0.
+  std::unordered_map<SectionId, Hit::TermFrequency>
+      expected_section_ids_tf_map = {{section_id, 0}};
+  std::vector<TermMatchInfo> matched_terms_stats;
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(
+      matched_terms_stats,
+      ElementsAre(EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
+                  EqualsTermMatchInfo("kitten", expected_section_ids_tf_map)));
+
+  // The second Document to match (Document 1) matches on 'animal' AND 'puppy'
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+
+  matched_terms_stats.clear();
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(
+      matched_terms_stats,
+      ElementsAre(EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
+                  EqualsTermMatchInfo("puppy", expected_section_ids_tf_map)));
+
+  // This should be empty because ranking_strategy != RELEVANCE_SCORE
+  EXPECT_THAT(results.query_term_iterators, IsEmpty());
+}
+
+TEST_P(QueryProcessorTest, DeletedFilter) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
@@ -1513,11 +1917,12 @@ TEST_F(QueryProcessorTest, DeletedFilter) {
                                                       .SetKey("namespace", "2")
                                                       .SetSchema("email")
                                                       .Build()));
-  EXPECT_THAT(document_store_->Delete("namespace", "1"), IsOk());
+  EXPECT_THAT(document_store_->Delete("namespace", "1",
+                                      fake_clock_.GetSystemTimeMilliseconds()),
+              IsOk());
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "animal dog"
@@ -1534,40 +1939,37 @@ TEST_F(QueryProcessorTest, DeletedFilter) {
   ASSERT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("animal");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo(document_id2);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id2, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
 }
 
-TEST_F(QueryProcessorTest, NamespaceFilter) {
+TEST_P(QueryProcessorTest, NamespaceFilter) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
@@ -1585,7 +1987,6 @@ TEST_F(QueryProcessorTest, NamespaceFilter) {
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "animal dog"
@@ -1602,42 +2003,40 @@ TEST_F(QueryProcessorTest, NamespaceFilter) {
   ASSERT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("animal");
   search_spec.set_term_match_type(term_match_type);
   search_spec.add_namespace_filters("namespace1");
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo(document_id1);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
 }
 
-TEST_F(QueryProcessorTest, SchemaTypeFilter) {
+TEST_P(QueryProcessorTest, SchemaTypeFilter) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-  AddSchemaType(&schema, "message");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
@@ -1655,7 +2054,6 @@ TEST_F(QueryProcessorTest, SchemaTypeFilter) {
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "animal dog"
@@ -1668,45 +2066,45 @@ TEST_F(QueryProcessorTest, SchemaTypeFilter) {
       AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
       IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   search_spec.set_query("animal");
   search_spec.set_term_match_type(term_match_type);
   search_spec.add_schema_type_filters("email");
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo(document_id1);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
 }
 
-TEST_F(QueryProcessorTest, SectionFilterForOneDocument) {
+TEST_P(QueryProcessorTest, PropertyFilterForOneDocument) {
   // Create the schema and document store
-  SchemaProto schema;
-  SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
-
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
   // First and only indexed property, so it gets a section_id of 0
-  AddIndexedProperty(email_type, "subject");
   int subject_section_id = 0;
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
@@ -1718,7 +2116,6 @@ TEST_F(QueryProcessorTest, SectionFilterForOneDocument) {
                                                       .Build()));
 
   // Populate the index
-  SectionIdMask section_id_mask = 1U << subject_section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document has content "animal"
@@ -1726,50 +2123,61 @@ TEST_F(QueryProcessorTest, SectionFilterForOneDocument) {
                               "animal"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   // Create a section filter '<section name>:<query term>'
   search_spec.set_query("subject:animal");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo(document_id);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms["subject"], UnorderedElementsAre("animal"));
 }
 
-TEST_F(QueryProcessorTest, SectionFilterAcrossSchemaTypes) {
+TEST_P(QueryProcessorTest, PropertyFilterAcrossSchemaTypes) {
   // Create the schema and document store
-  SchemaProto schema;
-  SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
-  // SectionIds are assigned in ascending order per schema type, alphabetically.
-  AddIndexedProperty(email_type, "a");  // Section "a" would get sectionId 0
-  AddIndexedProperty(email_type, "foo");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       // Section "a" would get sectionId 0
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("a")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("foo")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // SectionIds are assigned in ascending order per schema type,
+  // alphabetically.
   int email_foo_section_id = 1;
-
-  SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
-  // SectionIds are assigned in ascending order per schema type, alphabetically.
-  AddIndexedProperty(message_type, "foo");
   int message_foo_section_id = 0;
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
@@ -1786,8 +2194,6 @@ TEST_F(QueryProcessorTest, SectionFilterAcrossSchemaTypes) {
                                                       .Build()));
 
   // Populate the index
-  SectionIdMask email_section_id_mask = 1U << email_foo_section_id;
-  SectionIdMask message_section_id_mask = 1U << message_foo_section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Email document has content "animal"
@@ -1800,52 +2206,52 @@ TEST_F(QueryProcessorTest, SectionFilterAcrossSchemaTypes) {
                               term_match_type, "animal"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   // Create a section filter '<section name>:<query term>'
   search_spec.set_query("foo:animal");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Ordered by descending DocumentId, so message comes first since it was
   // inserted last
-  EXPECT_THAT(
-      GetDocHitInfos(results.root_iterator.get()),
-      ElementsAre(DocHitInfo(message_document_id, message_section_id_mask),
-                  DocHitInfo(email_document_id, email_section_id_mask)));
+  DocHitInfo expectedDocHitInfo1(message_document_id);
+  expectedDocHitInfo1.UpdateSection(/*section_id=*/0);
+  DocHitInfo expectedDocHitInfo2(email_document_id);
+  expectedDocHitInfo2.UpdateSection(/*section_id=*/1);
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(expectedDocHitInfo1, expectedDocHitInfo2));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
 }
 
-TEST_F(QueryProcessorTest, SectionFilterWithinSchemaType) {
-  // Create the schema and document store
-  SchemaProto schema;
-  SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
-  // SectionIds are assigned in ascending order per schema type, alphabetically.
-  AddIndexedProperty(email_type, "foo");
+TEST_P(QueryProcessorTest, PropertyFilterWithinSchemaType) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
   int email_foo_section_id = 0;
-
-  SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
-  // SectionIds are assigned in ascending order per schema type, alphabetically.
-  AddIndexedProperty(message_type, "foo");
   int message_foo_section_id = 0;
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
@@ -1862,7 +2268,6 @@ TEST_F(QueryProcessorTest, SectionFilterWithinSchemaType) {
                                                       .Build()));
 
   // Populate the index
-  SectionIdMask email_section_id_mask = 1U << email_foo_section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Email document has content "animal"
@@ -1875,53 +2280,135 @@ TEST_F(QueryProcessorTest, SectionFilterWithinSchemaType) {
                               term_match_type, "animal"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
-  // Create a section filter '<section name>:<query term>', but only look within
-  // documents of email schema
+  // Create a section filter '<section name>:<query term>', but only look
+  // within documents of email schema
   search_spec.set_query("foo:animal");
   search_spec.add_schema_type_filters("email");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Shouldn't include the message document since we're only looking at email
   // types
-  EXPECT_THAT(
-      GetDocHitInfos(results.root_iterator.get()),
-      ElementsAre(DocHitInfo(email_document_id, email_section_id_mask)));
+  DocHitInfo expectedDocHitInfo(email_document_id);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0);
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(expectedDocHitInfo));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
 }
 
-TEST_F(QueryProcessorTest, SectionFilterRespectsDifferentSectionIds) {
+TEST_P(QueryProcessorTest, NestedPropertyFilter) {
   // Create the schema and document store
-  SchemaProto schema;
-  SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
-  // SectionIds are assigned in ascending order per schema type, alphabetically.
-  AddIndexedProperty(email_type, "foo");
-  int email_foo_section_id = 0;
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("email")
+                  // Add an unindexed property so we generate section
+                  // metadata on it
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("foo")
+                                   .SetDataTypeDocument(
+                                       "Foo", /*index_nested_properties=*/true)
+                                   .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("Foo")
+                  // Add an unindexed property so we generate section
+                  // metadata on it
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("bar")
+                                   .SetDataTypeDocument(
+                                       "Bar", /*index_nested_properties=*/true)
+                                   .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Bar")
+                       // Add an unindexed property so we generate section
+                       // metadata on it
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("baz")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
-  // SectionIds are assigned in ascending order per schema type, alphabetically.
-  AddIndexedProperty(message_type, "bar");
-  int message_foo_section_id = 0;
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // schema types populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Populate the index
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+  // Email document has content "animal"
+  ASSERT_THAT(AddTokenToIndex(email_document_id, /*section_id=*/0,
+                              term_match_type, "animal"),
+              IsOk());
+
+  SearchSpecProto search_spec;
+  // Create a section filter '<section name>:<query term>', but only look
+  // within documents of email schema
+  search_spec.set_query("foo.bar.baz:animal");
+  search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
+
+  // Even though the section id is the same, we should be able to tell that it
+  // doesn't match to the name of the section filter
+  DocHitInfo expectedDocHitInfo1(email_document_id);
+  expectedDocHitInfo1.UpdateSection(/*section_id=*/0);
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(expectedDocHitInfo1));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms["foo.bar.baz"],
+              UnorderedElementsAre("animal"));
+}
+
+TEST_P(QueryProcessorTest, PropertyFilterRespectsDifferentSectionIds) {
+  // Create the schema and document store
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("bar")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  int email_foo_section_id = 0;
+  int message_foo_section_id = 0;
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
@@ -1938,7 +2425,6 @@ TEST_F(QueryProcessorTest, SectionFilterRespectsDifferentSectionIds) {
                                                       .Build()));
 
   // Populate the index
-  SectionIdMask email_section_id_mask = 1U << email_foo_section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Email document has content "animal"
@@ -1946,51 +2432,47 @@ TEST_F(QueryProcessorTest, SectionFilterRespectsDifferentSectionIds) {
                               term_match_type, "animal"),
               IsOk());
 
-  // Message document has content "animal", but put in in the same section id as
-  // the indexed email section id, the same id as indexed property "foo" in the
-  // message type
+  // Message document has content "animal", but put in in the same section id
+  // as the indexed email section id, the same id as indexed property "foo" in
+  // the message type
   ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
                               term_match_type, "animal"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
-  // Create a section filter '<section name>:<query term>', but only look within
-  // documents of email schema
+  // Create a section filter '<section name>:<query term>', but only look
+  // within documents of email schema
   search_spec.set_query("foo:animal");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Even though the section id is the same, we should be able to tell that it
   // doesn't match to the name of the section filter
-  EXPECT_THAT(
-      GetDocHitInfos(results.root_iterator.get()),
-      ElementsAre(DocHitInfo(email_document_id, email_section_id_mask)));
+  DocHitInfo expectedDocHitInfo(email_document_id);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0);
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(expectedDocHitInfo));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
 }
 
-TEST_F(QueryProcessorTest, NonexistentSectionFilterReturnsEmptyResults) {
+TEST_P(QueryProcessorTest, NonexistentPropertyFilterReturnsEmptyResults) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
@@ -2009,44 +2491,46 @@ TEST_F(QueryProcessorTest, NonexistentSectionFilterReturnsEmptyResults) {
                               term_match_type, "animal"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
-  // Create a section filter '<section name>:<query term>', but only look within
-  // documents of email schema
+  // Create a section filter '<section name>:<query term>', but only look
+  // within documents of email schema
   search_spec.set_query("nonexistent:animal");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Even though the section id is the same, we should be able to tell that it
   // doesn't match to the name of the section filter
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms["nonexistent"],
               UnorderedElementsAre("animal"));
 }
 
-TEST_F(QueryProcessorTest, UnindexedSectionFilterReturnsEmptyResults) {
+TEST_P(QueryProcessorTest, UnindexedPropertyFilterReturnsEmptyResults) {
   // Create the schema and document store
-  SchemaProto schema;
-  SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
-  AddUnindexedProperty(email_type, "foo");
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       // Add an unindexed property so we generate section
+                       // metadata on it
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("foo")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
@@ -2065,50 +2549,49 @@ TEST_F(QueryProcessorTest, UnindexedSectionFilterReturnsEmptyResults) {
                               term_match_type, "animal"),
               IsOk());
 
-  // Perform query
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
-  // Create a section filter '<section name>:<query term>', but only look within
-  // documents of email schema
+  // Create a section filter '<section name>:<query term>', but only look
+  // within documents of email schema
   search_spec.set_query("foo:animal");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Even though the section id is the same, we should be able to tell that it
   // doesn't match to the name of the section filter
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
 }
 
-TEST_F(QueryProcessorTest, SectionFilterTermAndUnrestrictedTerm) {
+TEST_P(QueryProcessorTest, PropertyFilterTermAndUnrestrictedTerm) {
   // Create the schema and document store
-  SchemaProto schema;
-  SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
-  // SectionIds are assigned in ascending order per schema type, alphabetically.
-  AddIndexedProperty(email_type, "foo");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
   int email_foo_section_id = 0;
-
-  SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
-  // SectionIds are assigned in ascending order per schema type, alphabetically.
-  AddIndexedProperty(message_type, "foo");
   int message_foo_section_id = 0;
-
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
@@ -2125,8 +2608,6 @@ TEST_F(QueryProcessorTest, SectionFilterTermAndUnrestrictedTerm) {
                                                       .Build()));
 
   // Poplate the index
-  SectionIdMask email_section_id_mask = 1U << email_foo_section_id;
-  SectionIdMask message_section_id_mask = 1U << message_foo_section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Email document has content "animal"
@@ -2142,106 +2623,379 @@ TEST_F(QueryProcessorTest, SectionFilterTermAndUnrestrictedTerm) {
                               term_match_type, "animal"),
               IsOk());
 
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
-
   SearchSpecProto search_spec;
   // Create a section filter '<section name>:<query term>'
   search_spec.set_query("cat OR foo:animal");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
   // Ordered by descending DocumentId, so message comes first since it was
   // inserted last
-  EXPECT_THAT(
-      GetDocHitInfos(results.root_iterator.get()),
-      ElementsAre(DocHitInfo(message_document_id, message_section_id_mask),
-                  DocHitInfo(email_document_id, email_section_id_mask)));
+  DocHitInfo expectedDocHitInfo1(message_document_id);
+  expectedDocHitInfo1.UpdateSection(/*section_id=*/0);
+  DocHitInfo expectedDocHitInfo2(email_document_id);
+  expectedDocHitInfo2.UpdateSection(/*section_id=*/0);
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(expectedDocHitInfo1, expectedDocHitInfo2));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
+
   EXPECT_THAT(results.query_terms, SizeIs(2));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("cat"));
   EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
 }
 
-TEST_F(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
+TEST_P(QueryProcessorTest, TypePropertyFilter) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email")
+              .AddProperty(
+                  PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(
+                  PropertyConfigBuilder()
+                  .SetName("bar")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(
+                  PropertyConfigBuilder()
+                  .SetName("baz")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("message")
+              .AddProperty(
+                  PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(
+                  PropertyConfigBuilder()
+                  .SetName("bar")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(
+                  PropertyConfigBuilder()
+                  .SetName("baz")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  // SectionIds are assigned in ascending order per schema type,
+  // alphabetically.
+  int email_bar_section_id = 0;
+  int email_baz_section_id = 1;
+  int email_foo_section_id = 2;
+  int message_bar_section_id = 0;
+  int message_baz_section_id = 1;
+  int message_foo_section_id = 2;
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // schema types populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("message")
+                                                      .Build()));
+
+  // Poplate the index
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // Email document has content "animal" in all sections
+  ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
+                              term_match_type, "animal"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(email_document_id, email_bar_section_id,
+                              term_match_type, "animal"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(email_document_id, email_baz_section_id,
+                              term_match_type, "animal"),
+              IsOk());
 
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+  // Message document has content "animal" in all sections
+  ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
+                              term_match_type, "animal"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(message_document_id, message_bar_section_id,
+                              term_match_type, "animal"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(message_document_id, message_baz_section_id,
+                              term_match_type, "animal"),
+              IsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("animal");
+  search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
+
+  // email has property filters for foo and baz properties
+  TypePropertyMask *email_mask = search_spec.add_type_property_filters();
+  email_mask->set_schema_type("email");
+  email_mask->add_paths("foo");
+  email_mask->add_paths("baz");
+
+  // message has property filters for bar and baz properties
+  TypePropertyMask *message_mask = search_spec.add_type_property_filters();
+  message_mask->set_schema_type("message");
+  message_mask->add_paths("bar");
+  message_mask->add_paths("baz");
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+  // Ordered by descending DocumentId, so message comes first since it was
+  // inserted last
+  DocHitInfo expected_doc_hit_info1(message_document_id);
+  expected_doc_hit_info1.UpdateSection(message_bar_section_id);
+  expected_doc_hit_info1.UpdateSection(message_baz_section_id);
+  DocHitInfo expected_doc_hit_info2(email_document_id);
+  expected_doc_hit_info2.UpdateSection(email_foo_section_id);
+  expected_doc_hit_info2.UpdateSection(email_baz_section_id);
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(expected_doc_hit_info1, expected_doc_hit_info2));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
+}
+
+TEST_P(QueryProcessorTest, TypePropertyFilterWithSectionRestrict) {
+  // Create the schema and document store
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email")
+              .AddProperty(
+                  PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(
+                  PropertyConfigBuilder()
+                  .SetName("bar")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(
+                  PropertyConfigBuilder()
+                  .SetName("baz")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("message")
+              .AddProperty(
+                  PropertyConfigBuilder()
+                  .SetName("foo")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(
+                  PropertyConfigBuilder()
+                  .SetName("bar")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+              .AddProperty(
+                  PropertyConfigBuilder()
+                  .SetName("baz")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  // SectionIds are assigned in ascending order per schema type,
+  // alphabetically.
+  int email_bar_section_id = 0;
+  int email_baz_section_id = 1;
+  int email_foo_section_id = 2;
+  int message_bar_section_id = 0;
+  int message_baz_section_id = 1;
+  int message_foo_section_id = 2;
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // schema types populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace", "1")
                                                       .SetSchema("email")
-                                                      .SetCreationTimestampMs(0)
-                                                      .SetTtlMs(100)
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("message")
                                                       .Build()));
 
-  // Populate the index
-  int section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
+  // Poplate the index
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
-  EXPECT_THAT(
-      AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
-      IsOk());
+  // Email document has content "animal" in all sections
+  ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
+                              term_match_type, "animal"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(email_document_id, email_bar_section_id,
+                              term_match_type, "animal"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(email_document_id, email_baz_section_id,
+                              term_match_type, "animal"),
+              IsOk());
+
+  // Message document has content "animal" in all sections
+  ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
+                              term_match_type, "animal"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(message_document_id, message_bar_section_id,
+                              term_match_type, "animal"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(message_document_id, message_baz_section_id,
+                              term_match_type, "animal"),
+              IsOk());
+
+  SearchSpecProto search_spec;
+  // Create a section filter '<section name>:<query term>'
+  search_spec.set_query("foo:animal");
+  search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
+
+  // email has property filters for foo and baz properties
+  TypePropertyMask *email_mask = search_spec.add_type_property_filters();
+  email_mask->set_schema_type("email");
+  email_mask->add_paths("foo");
+  email_mask->add_paths("baz");
+
+  // message has property filters for bar and baz properties
+  TypePropertyMask *message_mask = search_spec.add_type_property_filters();
+  message_mask->set_schema_type("message");
+  message_mask->add_paths("bar");
+  message_mask->add_paths("baz");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+          fake_clock_.GetSystemTimeMilliseconds()));
+
+  // Only hits in sections allowed by both the property filters and section
+  // restricts should be returned. Message document should not be returned since
+  // section foo specified in the section restrict is not allowed by the
+  // property filters.
+  DocHitInfo expected_doc_hit_info(email_document_id);
+  expected_doc_hit_info.UpdateSection(email_foo_section_id);
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(expected_doc_hit_info));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
+}
+
+TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   // Arbitrary value, just has to be less than the document's creation
   // timestamp + ttl
   FakeClock fake_clock;
   fake_clock.SetSystemTimeMilliseconds(50);
 
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, store_dir_, &fake_clock,
+                          schema_store_.get()));
+  document_store_ = std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "1")
+                               .SetSchema("email")
+                               .SetCreationTimestampMs(10)
+                               .SetTtlMs(100)
+                               .Build()));
+
+  // Populate the index
+  int section_id = 0;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
+      IsOk());
+
   // Perform query
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock_));
+      std::unique_ptr<QueryProcessor> local_query_processor,
+      QueryProcessor::Create(index_.get(), numeric_index_.get(),
+                             language_segmenter_.get(), normalizer_.get(),
+                             document_store_.get(), schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("hello");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      local_query_processor->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::NONE,
+          fake_clock_.GetSystemTimeMilliseconds()));
 
+  DocHitInfo expectedDocHitInfo(document_id);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
 }
 
-TEST_F(QueryProcessorTest, DocumentPastTtlFilteredOut) {
+TEST_P(QueryProcessorTest, DocumentPastTtlFilteredOut) {
   // Create the schema and document store
-  SchemaProto schema;
-  AddSchemaType(&schema, "email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+  // Arbitrary value, just has to be greater than the document's creation
+  // timestamp + ttl
+  FakeClock fake_clock_local;
+  fake_clock_local.SetSystemTimeMilliseconds(200);
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      document_store_,
-      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, store_dir_, &fake_clock_local,
+                          schema_store_.get()));
+  document_store_ = std::move(create_result.document_store);
 
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
-                             document_store_->Put(DocumentBuilder()
-                                                      .SetKey("namespace", "1")
-                                                      .SetSchema("email")
-                                                      .SetCreationTimestampMs(0)
-                                                      .SetTtlMs(100)
-                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "1")
+                               .SetSchema("email")
+                               .SetCreationTimestampMs(50)
+                               .SetTtlMs(100)
+                               .Build()));
 
   // Populate the index
   int section_id = 0;
@@ -2251,28 +3005,327 @@ TEST_F(QueryProcessorTest, DocumentPastTtlFilteredOut) {
       AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
       IsOk());
 
-  // Arbitrary value, just has to be greater than the document's creation
-  // timestamp + ttl
-  FakeClock fake_clock;
-  fake_clock.SetSystemTimeMilliseconds(200);
-
   // Perform query
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QueryProcessor> query_processor,
-      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
-                             normalizer_.get(), document_store_.get(),
-                             schema_store_.get(), &fake_clock));
+      std::unique_ptr<QueryProcessor> local_query_processor,
+      QueryProcessor::Create(index_.get(), numeric_index_.get(),
+                             language_segmenter_.get(), normalizer_.get(),
+                             document_store_.get(), schema_store_.get()));
 
   SearchSpecProto search_spec;
   search_spec.set_query("hello");
   search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      local_query_processor->ParseSearch(
+          search_spec, ScoringSpecProto::RankingStrategy::NONE,
+          fake_clock_local.GetSystemTimeMilliseconds()));
+
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+}
+
+TEST_P(QueryProcessorTest, NumericFilter) {
+  if (GetParam() !=
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+    GTEST_SKIP() << "Numeric filter is only supported in advanced query.";
+  }
+
+  // Create the schema and document store
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("transaction")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("price")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("cost")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  // SectionIds are assigned alphabetically
+  SectionId cost_section_id = 0;
+  SectionId price_section_id = 1;
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_one_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "1")
+                               .SetSchema("transaction")
+                               .AddInt64Property("price", 10)
+                               .Build()));
+  ICING_ASSERT_OK(
+      AddToNumericIndex(document_one_id, "price", price_section_id, 10));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_two_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "2")
+                               .SetSchema("transaction")
+                               .AddInt64Property("price", 25)
+                               .Build()));
+  ICING_ASSERT_OK(
+      AddToNumericIndex(document_two_id, "price", price_section_id, 25));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_three_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "3")
+                               .SetSchema("transaction")
+                               .AddInt64Property("cost", 2)
+                               .Build()));
+  ICING_ASSERT_OK(
+      AddToNumericIndex(document_three_id, "cost", cost_section_id, 2));
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("price < 20");
+  search_spec.set_search_type(GetParam());
+  search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(search_spec,
+                                    ScoringSpecProto::RankingStrategy::NONE,
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(EqualsDocHitInfo(
+                  document_one_id, std::vector<SectionId>{price_section_id})));
+
+  search_spec.set_query("price == 25");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      results, query_processor_->ParseSearch(
+                   search_spec, ScoringSpecProto::RankingStrategy::NONE,
+                   fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(EqualsDocHitInfo(
+                  document_two_id, std::vector<SectionId>{price_section_id})));
+
+  search_spec.set_query("cost > 2");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      results, query_processor_->ParseSearch(
+                   search_spec, ScoringSpecProto::RankingStrategy::NONE,
+                   fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+
+  search_spec.set_query("cost >= 2");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      results, query_processor_->ParseSearch(
+                   search_spec, ScoringSpecProto::RankingStrategy::NONE,
+                   fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(EqualsDocHitInfo(
+                  document_three_id, std::vector<SectionId>{cost_section_id})));
+
+  search_spec.set_query("price <= 25");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      results, query_processor_->ParseSearch(
+                   search_spec, ScoringSpecProto::RankingStrategy::NONE,
+                   fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(
+      GetDocHitInfos(results.root_iterator.get()),
+      ElementsAre(EqualsDocHitInfo(document_two_id,
+                                   std::vector<SectionId>{price_section_id}),
+                  EqualsDocHitInfo(document_one_id,
+                                   std::vector<SectionId>{price_section_id})));
+}
+
+TEST_P(QueryProcessorTest, NumericFilterWithoutEnablingFeatureFails) {
+  if (GetParam() !=
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+    GTEST_SKIP() << "Numeric filter is only supported in advanced query.";
+  }
+
+  // Create the schema and document store
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("transaction")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("price")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  SectionId price_section_id = 0;
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_one_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "1")
+                               .SetSchema("transaction")
+                               .AddInt64Property("price", 10)
+                               .Build()));
+  ICING_ASSERT_OK(
+      AddToNumericIndex(document_one_id, "price", price_section_id, 10));
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("price < 20");
+  search_spec.set_search_type(GetParam());
+
+  libtextclassifier3::StatusOr<QueryResults> result_or =
+      query_processor_->ParseSearch(search_spec,
+                                    ScoringSpecProto::RankingStrategy::NONE,
+                                    fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(result_or,
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(QueryProcessorTest, GroupingInSectionRestriction) {
+  if (GetParam() !=
+      SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+    GTEST_SKIP() << "Grouping in section restriction is only supported in "
+                    "advanced query.";
+  }
+
+  // Create the schema and document store
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  SectionId prop1_section_id = 0;
+  SectionId prop2_section_id = 1;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // Create documents as follows:
+  //   Doc0:
+  //     prop1: "foo"
+  //     prop2: "bar"
+  //   Doc1:
+  //     prop1: "bar"
+  //     prop2: "foo"
+  //   Doc2:
+  //     prop1: "foo bar"
+  //     prop2: ""
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id0,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "0")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  EXPECT_THAT(
+      AddTokenToIndex(document_id0, prop1_section_id, term_match_type, "foo"),
+      IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id0, prop2_section_id, term_match_type, "bar"),
+      IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  EXPECT_THAT(
+      AddTokenToIndex(document_id1, prop1_section_id, term_match_type, "bar"),
+      IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id1, prop2_section_id, term_match_type, "foo"),
+      IsOk());
 
-  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
-                             query_processor->ParseSearch(search_spec));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  EXPECT_THAT(
+      AddTokenToIndex(document_id2, prop1_section_id, term_match_type, "foo"),
+      IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id2, prop1_section_id, term_match_type, "bar"),
+      IsOk());
+
+  // prop1:(foo bar) <=> prop1:foo AND prop1:bar, which matches doc2.
+  SearchSpecProto search_spec;
+  search_spec.set_query("prop1:(foo bar)");
+  search_spec.set_term_match_type(term_match_type);
+  search_spec.set_search_type(GetParam());
+  search_spec.add_enabled_features(
+      std::string(kListFilterQueryLanguageFeature));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      QueryResults results,
+      query_processor_->ParseSearch(search_spec,
+                                    ScoringSpecProto::RankingStrategy::NONE,
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(EqualsDocHitInfo(
+                  document_id2, std::vector<SectionId>{prop1_section_id})));
 
+  // prop2:(foo bar) <=> prop2:foo AND prop2:bar, which matches nothing.
+  search_spec.set_query("prop2:(foo bar)");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      results, query_processor_->ParseSearch(
+                   search_spec, ScoringSpecProto::RankingStrategy::NONE,
+                   fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+
+  // prop1:(foo -bar) <=> prop1:foo AND -prop1:bar, which matches doc0.
+  search_spec.set_query("prop1:(foo -bar)");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      results, query_processor_->ParseSearch(
+                   search_spec, ScoringSpecProto::RankingStrategy::NONE,
+                   fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(EqualsDocHitInfo(
+                  document_id0, std::vector<SectionId>{prop1_section_id})));
+
+  // prop2:(-foo OR bar) <=> -prop2:foo OR prop2:bar, which matches doc0 and
+  // doc2.
+  search_spec.set_query("prop2:(-foo OR bar)");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      results, query_processor_->ParseSearch(
+                   search_spec, ScoringSpecProto::RankingStrategy::NONE,
+                   fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(
+      GetDocHitInfos(results.root_iterator.get()),
+      ElementsAre(EqualsDocHitInfo(document_id2, std::vector<SectionId>{}),
+                  EqualsDocHitInfo(document_id0,
+                                   std::vector<SectionId>{prop2_section_id})));
+
+  // prop1:((foo AND bar) OR (foo AND -baz))
+  // <=> ((prop1:foo AND prop1:bar) OR (prop1:foo AND -prop1:baz)), which
+  // matches doc0 and doc2.
+  search_spec.set_query("prop1:((foo AND bar) OR (foo AND -baz))");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      results, query_processor_->ParseSearch(
+                   search_spec, ScoringSpecProto::RankingStrategy::NONE,
+                   fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(
+      GetDocHitInfos(results.root_iterator.get()),
+      ElementsAre(EqualsDocHitInfo(document_id2,
+                                   std::vector<SectionId>{prop1_section_id}),
+                  EqualsDocHitInfo(document_id0,
+                                   std::vector<SectionId>{prop1_section_id})));
 }
 
+INSTANTIATE_TEST_SUITE_P(
+    QueryProcessorTest, QueryProcessorTest,
+    testing::Values(
+        SearchSpecProto::SearchType::ICING_RAW_QUERY,
+        SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY));
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/query/query-results.h b/icing/query/query-results.h
new file mode 100644
index 0000000..52cdd71
--- /dev/null
+++ b/icing/query/query-results.h
@@ -0,0 +1,46 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_QUERY_RESULTS_H_
+#define ICING_QUERY_QUERY_RESULTS_H_
+
+#include <memory>
+#include <unordered_set>
+
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/query/query-terms.h"
+#include "icing/query/query-features.h"
+
+namespace icing {
+namespace lib {
+
+struct QueryResults {
+  std::unique_ptr<DocHitInfoIterator> root_iterator;
+  // A map from section names to sets of terms restricted to those sections.
+  // Query terms that are not restricted are found at the entry with key "".
+  SectionRestrictQueryTermsMap query_terms;
+  // Hit iterators for the text terms in the query. These query_term_iterators
+  // are completely separate from the iterators that make the iterator tree
+  // beginning with root_iterator.
+  // This will only be populated when ranking_strategy == RELEVANCE_SCORE.
+  QueryTermIteratorsMap query_term_iterators;
+  // Features that are invoked during query execution.
+  // The list of possible features is defined in query_features.h.
+  std::unordered_set<Feature> features_in_use;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_QUERY_RESULTS_H_
diff --git a/icing/query/query-terms.h b/icing/query/query-terms.h
index 1c5ce02..c4efe78 100644
--- a/icing/query/query-terms.h
+++ b/icing/query/query-terms.h
@@ -15,18 +15,24 @@
 #ifndef ICING_QUERY_QUERY_TERMS_H_
 #define ICING_QUERY_QUERY_TERMS_H_
 
+#include <memory>
 #include <string>
-#include <string_view>
 #include <unordered_map>
 #include <unordered_set>
 
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+
 namespace icing {
 namespace lib {
 
 // A map from section names to sets of terms restricted to those sections.
 // Query terms that are not restricted are found at the entry with key "".
 using SectionRestrictQueryTermsMap =
-    std::unordered_map<std::string_view, std::unordered_set<std::string>>;
+    std::unordered_map<std::string, std::unordered_set<std::string>>;
+
+// A map from query terms to a DocHitInfoIterator for that term.
+using QueryTermIteratorsMap =
+    std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>;
 
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/query/query-utils.cc b/icing/query/query-utils.cc
new file mode 100644
index 0000000..37c3600
--- /dev/null
+++ b/icing/query/query-utils.cc
@@ -0,0 +1,42 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/query-utils.h"
+
+#include <string_view>
+#include <vector>
+
+namespace icing {
+namespace lib {
+
+DocHitInfoIteratorFilter::Options GetFilterOptions(
+    const SearchSpecProto& search_spec) {
+  DocHitInfoIteratorFilter::Options options;
+
+  if (search_spec.namespace_filters_size() > 0) {
+    options.namespaces =
+        std::vector<std::string_view>(search_spec.namespace_filters().begin(),
+                                      search_spec.namespace_filters().end());
+  }
+
+  if (search_spec.schema_type_filters_size() > 0) {
+    options.schema_types =
+        std::vector<std::string_view>(search_spec.schema_type_filters().begin(),
+                                      search_spec.schema_type_filters().end());
+  }
+  return options;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/query/query-utils.h b/icing/query/query-utils.h
new file mode 100644
index 0000000..d85cf3a
--- /dev/null
+++ b/icing/query/query-utils.h
@@ -0,0 +1,30 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_QUERY_UTILS_H_
+#define ICING_QUERY_QUERY_UTILS_H_
+
+#include "icing/index/iterator/doc-hit-info-iterator-filter.h"
+#include "icing/proto/search.pb.h"
+
+namespace icing {
+namespace lib {
+
+DocHitInfoIteratorFilter::Options GetFilterOptions(
+    const SearchSpecProto& search_spec);
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_QUERY_UTILS_H_
diff --git a/icing/query/suggestion-processor.cc b/icing/query/suggestion-processor.cc
new file mode 100644
index 0000000..eb86e3b
--- /dev/null
+++ b/icing/query/suggestion-processor.cc
@@ -0,0 +1,311 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/suggestion-processor.h"
+
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/query/query-processor.h"
+#include "icing/store/document-id.h"
+#include "icing/store/suggestion-result-checker-impl.h"
+#include "icing/tokenization/tokenizer-factory.h"
+#include "icing/tokenization/tokenizer.h"
+#include "icing/transform/normalizer.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::StatusOr<std::unique_ptr<SuggestionProcessor>>
+SuggestionProcessor::Create(Index* index,
+                            const NumericIndex<int64_t>* numeric_index,
+                            const LanguageSegmenter* language_segmenter,
+                            const Normalizer* normalizer,
+                            const DocumentStore* document_store,
+                            const SchemaStore* schema_store) {
+  ICING_RETURN_ERROR_IF_NULL(index);
+  ICING_RETURN_ERROR_IF_NULL(numeric_index);
+  ICING_RETURN_ERROR_IF_NULL(language_segmenter);
+  ICING_RETURN_ERROR_IF_NULL(normalizer);
+  ICING_RETURN_ERROR_IF_NULL(document_store);
+  ICING_RETURN_ERROR_IF_NULL(schema_store);
+
+  return std::unique_ptr<SuggestionProcessor>(
+      new SuggestionProcessor(index, numeric_index, language_segmenter,
+                              normalizer, document_store, schema_store));
+}
+
+libtextclassifier3::StatusOr<
+    std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>>
+PopulateDocumentIdFilters(
+    const DocumentStore* document_store,
+    const icing::lib::SuggestionSpecProto& suggestion_spec,
+    const std::unordered_set<NamespaceId>& namespace_ids) {
+  std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
+      document_id_filter_map;
+  document_id_filter_map.reserve(suggestion_spec.document_uri_filters_size());
+  for (const NamespaceDocumentUriGroup& namespace_document_uri_group :
+       suggestion_spec.document_uri_filters()) {
+    auto namespace_id_or = document_store->GetNamespaceId(
+        namespace_document_uri_group.namespace_());
+    if (!namespace_id_or.ok()) {
+      // The current namespace doesn't exist.
+      continue;
+    }
+    NamespaceId namespace_id = namespace_id_or.ValueOrDie();
+    if (!namespace_ids.empty() &&
+        namespace_ids.find(namespace_id) == namespace_ids.end()) {
+      // The current namespace doesn't appear in the namespace filter.
+      return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+          "The namespace : ", namespace_document_uri_group.namespace_(),
+          " appears in the document uri filter, but doesn't appear in the "
+          "namespace filter."));
+    }
+
+    if (namespace_document_uri_group.document_uris().empty()) {
+      // Client should use namespace filter to filter out all document under
+      // a namespace.
+      return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+          "The namespace : ", namespace_document_uri_group.namespace_(),
+          " has empty document uri in the document uri filter. Please use the "
+          "namespace filter to exclude a namespace instead of the document uri "
+          "filter."));
+    }
+
+    // Translate namespace document Uris into document_ids
+    std::unordered_set<DocumentId> target_document_ids;
+    target_document_ids.reserve(
+        namespace_document_uri_group.document_uris_size());
+    for (std::string_view document_uri :
+         namespace_document_uri_group.document_uris()) {
+      auto document_id_or = document_store->GetDocumentId(
+          namespace_document_uri_group.namespace_(), document_uri);
+      if (!document_id_or.ok()) {
+        continue;
+      }
+      target_document_ids.insert(document_id_or.ValueOrDie());
+    }
+    document_id_filter_map.insert({namespace_id, target_document_ids});
+  }
+  return document_id_filter_map;
+}
+
+libtextclassifier3::StatusOr<std::unordered_map<SchemaTypeId, SectionIdMask>>
+PopulatePropertyFilters(
+    const SchemaStore* schema_store,
+    const icing::lib::SuggestionSpecProto& suggestion_spec,
+    const std::unordered_set<SchemaTypeId>& schema_type_ids) {
+  std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map;
+  property_filter_map.reserve(suggestion_spec.type_property_filters_size());
+  for (const TypePropertyMask& type_field_mask :
+       suggestion_spec.type_property_filters()) {
+    auto schema_type_id_or =
+        schema_store->GetSchemaTypeId(type_field_mask.schema_type());
+    if (!schema_type_id_or.ok()) {
+      // The current schema doesn't exist
+      continue;
+    }
+    SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
+
+    if (!schema_type_ids.empty() &&
+        schema_type_ids.find(schema_type_id) == schema_type_ids.end()) {
+      // The current schema type doesn't appear in the schema type filter.
+      return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+          "The schema : ", type_field_mask.schema_type(),
+          " appears in the property filter, but doesn't appear in the schema"
+          " type filter."));
+    }
+
+    if (type_field_mask.paths().empty()) {
+      return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+          "The schema type : ", type_field_mask.schema_type(),
+          " has empty path in the property filter. Please use the schema type"
+          " filter to exclude a schema type instead of the property filter."));
+    }
+
+    // Translate property paths into section id mask
+    SectionIdMask section_mask = kSectionIdMaskNone;
+    auto section_metadata_list_or =
+        schema_store->GetSectionMetadata(type_field_mask.schema_type());
+    if (!section_metadata_list_or.ok()) {
+      // The current schema doesn't has section metadata.
+      continue;
+    }
+    std::unordered_set<std::string> target_property_paths;
+    target_property_paths.reserve(type_field_mask.paths_size());
+    for (const std::string& target_property_path : type_field_mask.paths()) {
+      target_property_paths.insert(target_property_path);
+    }
+    const std::vector<SectionMetadata>* section_metadata_list =
+        section_metadata_list_or.ValueOrDie();
+    for (const SectionMetadata& section_metadata : *section_metadata_list) {
+      if (target_property_paths.find(section_metadata.path) !=
+          target_property_paths.end()) {
+        section_mask |= UINT64_C(1) << section_metadata.id;
+      }
+    }
+    property_filter_map.insert({schema_type_id, section_mask});
+  }
+  return property_filter_map;
+}
+
+libtextclassifier3::StatusOr<std::vector<TermMetadata>>
+SuggestionProcessor::QuerySuggestions(
+    const icing::lib::SuggestionSpecProto& suggestion_spec,
+    int64_t current_time_ms) {
+  // We use query tokenizer to tokenize the give prefix, and we only use the
+  // last token to be the suggestion prefix.
+
+  // Populate target namespace filter.
+  std::unordered_set<NamespaceId> namespace_ids;
+  namespace_ids.reserve(suggestion_spec.namespace_filters_size());
+  for (std::string_view name_space : suggestion_spec.namespace_filters()) {
+    auto namespace_id_or = document_store_.GetNamespaceId(name_space);
+    if (!namespace_id_or.ok()) {
+      // The current namespace doesn't exist.
+      continue;
+    }
+    namespace_ids.insert(namespace_id_or.ValueOrDie());
+  }
+  if (namespace_ids.empty() && !suggestion_spec.namespace_filters().empty()) {
+    // None of desired namespace exists, we should return directly.
+    return std::vector<TermMetadata>();
+  }
+
+  // Populate target document id filter.
+  auto document_id_filter_map_or = PopulateDocumentIdFilters(
+      &document_store_, suggestion_spec, namespace_ids);
+  if (!document_id_filter_map_or.ok()) {
+    return std::move(document_id_filter_map_or).status();
+  }
+
+  std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
+      document_id_filter_map = document_id_filter_map_or.ValueOrDie();
+  if (document_id_filter_map.empty() &&
+      !suggestion_spec.document_uri_filters().empty()) {
+    // None of desired DocumentId exists, we should return directly.
+    return std::vector<TermMetadata>();
+  }
+
+  // Populate target schema type filter.
+  std::unordered_set<SchemaTypeId> schema_type_ids;
+  schema_type_ids.reserve(suggestion_spec.schema_type_filters_size());
+  for (std::string_view schema_type : suggestion_spec.schema_type_filters()) {
+    auto schema_type_id_or = schema_store_.GetSchemaTypeId(schema_type);
+    if (!schema_type_id_or.ok()) {
+      continue;
+    }
+    schema_type_ids.insert(schema_type_id_or.ValueOrDie());
+  }
+  if (schema_type_ids.empty() &&
+      !suggestion_spec.schema_type_filters().empty()) {
+    // None of desired schema type exists, we should return directly.
+    return std::vector<TermMetadata>();
+  }
+
+  // Populate target properties filter.
+  auto property_filter_map_or =
+      PopulatePropertyFilters(&schema_store_, suggestion_spec, schema_type_ids);
+  if (!property_filter_map_or.ok()) {
+    return std::move(property_filter_map_or).status();
+  }
+  std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map =
+      property_filter_map_or.ValueOrDie();
+
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<QueryProcessor> query_processor,
+      QueryProcessor::Create(&index_, &numeric_index_, &language_segmenter_,
+                             &normalizer_, &document_store_, &schema_store_));
+
+  SearchSpecProto search_spec;
+  search_spec.set_query(suggestion_spec.prefix());
+  search_spec.set_term_match_type(
+      suggestion_spec.scoring_spec().scoring_match_type());
+  ICING_ASSIGN_OR_RETURN(
+      QueryResults query_results,
+      query_processor->ParseSearch(search_spec,
+                                   ScoringSpecProto::RankingStrategy::NONE,
+                                   current_time_ms));
+
+  ICING_ASSIGN_OR_RETURN(
+      DocHitInfoIterator::TrimmedNode trimmed_node,
+      std::move(*query_results.root_iterator).TrimRightMostNode());
+
+  // If the position of the last token is not the end of the prefix, it means
+  // there should be some operator tokens after it and are ignored by the
+  // tokenizer.
+  bool is_last_token =
+      trimmed_node.term_start_index_ + trimmed_node.unnormalized_term_length_ >=
+      suggestion_spec.prefix().length();
+
+  if (!is_last_token || trimmed_node.term_.empty()) {
+    // We don't have a valid last token, return early.
+    return std::vector<TermMetadata>();
+  }
+
+  // Populate the search base in document ids.
+  // Suggestions are only generated for the very last term,
+  // trimmed_node.iterator_ tracks search results for all previous terms. If it
+  // is null means there is no pervious term and we are generating suggetion for
+  // a single term.
+  std::unordered_set<DocumentId> search_base;
+  if (trimmed_node.iterator_ != nullptr) {
+    while (trimmed_node.iterator_->Advance().ok()) {
+      search_base.insert(trimmed_node.iterator_->doc_hit_info().document_id());
+    }
+    if (search_base.empty()) {
+      // Nothing matches the previous terms in the query. There are no valid
+      // suggestions to make, we should return directly.
+      return std::vector<TermMetadata>();
+    }
+  }
+
+  // Create result checker based on given filters.
+  SuggestionResultCheckerImpl suggestion_result_checker_impl(
+      &document_store_, &schema_store_, std::move(namespace_ids),
+      std::move(document_id_filter_map), std::move(schema_type_ids),
+      std::move(property_filter_map), std::move(trimmed_node.target_section_),
+      std::move(search_base), current_time_ms);
+  // TODO(b/228240987) support generate suggestion and append suffix for advance
+  // query and function call.
+  std::string query_prefix =
+      suggestion_spec.prefix().substr(0, trimmed_node.term_start_index_);
+  // Run suggestion based on given SuggestionSpec.
+  // Normalize token text to lowercase since all tokens in the lexicon are
+  // lowercase.
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<TermMetadata> terms,
+      index_.FindTermsByPrefix(
+          trimmed_node.term_, suggestion_spec.num_to_return(),
+          suggestion_spec.scoring_spec().scoring_match_type(),
+          suggestion_spec.scoring_spec().rank_by(),
+          &suggestion_result_checker_impl));
+  for (TermMetadata& term : terms) {
+    term.content = query_prefix + term.content;
+  }
+  return terms;
+}
+
+SuggestionProcessor::SuggestionProcessor(
+    Index* index, const NumericIndex<int64_t>* numeric_index,
+    const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
+    const DocumentStore* document_store, const SchemaStore* schema_store)
+    : index_(*index),
+      numeric_index_(*numeric_index),
+      language_segmenter_(*language_segmenter),
+      normalizer_(*normalizer),
+      document_store_(*document_store),
+      schema_store_(*schema_store) {}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/query/suggestion-processor.h b/icing/query/suggestion-processor.h
new file mode 100644
index 0000000..e100031
--- /dev/null
+++ b/icing/query/suggestion-processor.h
@@ -0,0 +1,78 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_SUGGESTION_PROCESSOR_H_
+#define ICING_QUERY_SUGGESTION_PROCESSOR_H_
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/index.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer.h"
+
+namespace icing {
+namespace lib {
+
+// Processes SuggestionSpecProtos and retrieves the specified TermMedaData that
+// satisfies the prefix and its restrictions. This also performs ranking, and
+// returns TermMetaData ordered by their hit count.
+class SuggestionProcessor {
+ public:
+  // Factory function to create a SuggestionProcessor which does not take
+  // ownership of any input components, and all pointers must refer to valid
+  // objects that outlive the created SuggestionProcessor instance.
+  //
+  // Returns:
+  //   An SuggestionProcessor on success
+  //   FAILED_PRECONDITION if any of the pointers is null.
+  static libtextclassifier3::StatusOr<std::unique_ptr<SuggestionProcessor>>
+  Create(Index* index, const NumericIndex<int64_t>* numeric_index,
+         const LanguageSegmenter* language_segmenter,
+         const Normalizer* normalizer, const DocumentStore* document_store,
+         const SchemaStore* schema_store);
+
+  // Query suggestions based on the given SuggestionSpecProto.
+  //
+  // Returns:
+  //   On success,
+  //     - One vector that represents the entire TermMetadata
+  //   INTERNAL_ERROR on all other errors
+  libtextclassifier3::StatusOr<std::vector<TermMetadata>> QuerySuggestions(
+      const SuggestionSpecProto& suggestion_spec, int64_t current_time_ms);
+
+ private:
+  explicit SuggestionProcessor(Index* index,
+                               const NumericIndex<int64_t>* numeric_index,
+                               const LanguageSegmenter* language_segmenter,
+                               const Normalizer* normalizer,
+                               const DocumentStore* document_store,
+                               const SchemaStore* schema_store);
+
+  // Not const because we could modify/sort the TermMetaData buffer in the lite
+  // index.
+  Index& index_;
+  const NumericIndex<int64_t>& numeric_index_;
+  const LanguageSegmenter& language_segmenter_;
+  const Normalizer& normalizer_;
+  const DocumentStore& document_store_;
+  const SchemaStore& schema_store_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_SUGGESTION_PROCESSOR_H_
diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc
new file mode 100644
index 0000000..9f9094d
--- /dev/null
+++ b/icing/query/suggestion-processor_test.cc
@@ -0,0 +1,722 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/suggestion-processor.h"
+
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "icing/document-builder.h"
+#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/term-metadata.h"
+#include "icing/schema-builder.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/transform/normalizer-factory.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::IsEmpty;
+using ::testing::Test;
+using ::testing::UnorderedElementsAre;
+
+std::vector<std::string> RetrieveSuggestionsText(
+    const std::vector<TermMetadata>& terms) {
+  std::vector<std::string> suggestions;
+  suggestions.reserve(terms.size());
+  for (const TermMetadata& term : terms) {
+    suggestions.push_back(term.content);
+  }
+  return suggestions;
+}
+
+class SuggestionProcessorTest : public Test {
+ protected:
+  SuggestionProcessorTest()
+      : test_dir_(GetTestTempDir() + "/icing"),
+        store_dir_(test_dir_ + "/store"),
+        schema_store_dir_(test_dir_ + "/schema_store"),
+        index_dir_(test_dir_ + "/index"),
+        numeric_index_dir_(test_dir_ + "/numeric_index") {}
+
+  void SetUp() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(index_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      // If we've specified using the reverse-JNI method for segmentation (i.e.
+      // not ICU), then we won't have the ICU data file included to set up.
+      // Technically, we could choose to use reverse-JNI for segmentation AND
+      // include an ICU data file, but that seems unlikely and our current BUILD
+      // setup doesn't do this.
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, store_dir_, &fake_clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    document_store_ = std::move(create_result.document_store);
+
+    Index::Options options(index_dir_,
+                           /*index_merge_size=*/1024 * 1024,
+                           /*lite_index_sort_at_indexing=*/true,
+                           /*lite_index_sort_size=*/1024 * 8);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+    // TODO(b/249829533): switch to use persistent numeric index.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        numeric_index_,
+        DummyNumericIndex<int64_t>::Create(filesystem_, numeric_index_dir_));
+
+    language_segmenter_factory::SegmenterOptions segmenter_options(
+        ULOC_US, jni_cache_.get());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        language_segmenter_,
+        language_segmenter_factory::Create(segmenter_options));
+
+    ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
+                                                /*max_term_byte_size=*/1000));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        suggestion_processor_,
+        SuggestionProcessor::Create(
+            index_.get(), numeric_index_.get(), language_segmenter_.get(),
+            normalizer_.get(), document_store_.get(), schema_store_.get()));
+  }
+
+  libtextclassifier3::Status AddTokenToIndex(
+      DocumentId document_id, SectionId section_id,
+      TermMatchType::Code term_match_type, const std::string& token) {
+    Index::Editor editor = index_->Edit(document_id, section_id,
+                                        term_match_type, /*namespace_id=*/0);
+    auto status = editor.BufferTerm(token.c_str());
+    return status.ok() ? editor.IndexAllBufferedTerms() : status;
+  }
+
+  void TearDown() override {
+    document_store_.reset();
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  const std::string test_dir_;
+  const std::string store_dir_;
+  const std::string schema_store_dir_;
+
+ private:
+  IcingFilesystem icing_filesystem_;
+  const std::string index_dir_;
+  const std::string numeric_index_dir_;
+
+ protected:
+  std::unique_ptr<Index> index_;
+  std::unique_ptr<NumericIndex<int64_t>> numeric_index_;
+  std::unique_ptr<LanguageSegmenter> language_segmenter_;
+  std::unique_ptr<Normalizer> normalizer_;
+  FakeClock fake_clock_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+  std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
+  std::unique_ptr<SuggestionProcessor> suggestion_processor_;
+};
+
+constexpr SectionId kSectionId2 = 2;
+
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_And) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "foo"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "bar"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "fool"),
+              IsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("bar f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor_->QuerySuggestions(
+          suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("bar foo"));
+}
+
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_AndNary) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "foo"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "bar"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "cat"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "fool"),
+              IsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("bar cat f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor_->QuerySuggestions(
+          suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(RetrieveSuggestionsText(terms),
+              UnorderedElementsAre("bar cat foo"));
+}
+
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_Or) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "fo"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "bar"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "foo"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "cat"),
+              IsOk());
+
+  // Search for "(bar OR cat) AND f" both document1 "bar fo" and document2 "cat
+  // foo" could match.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("bar OR cat f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor_->QuerySuggestions(
+          suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(RetrieveSuggestionsText(terms),
+              UnorderedElementsAre("bar OR cat fo", "bar OR cat foo"));
+}
+
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_OrNary) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "3")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "fo"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "bar"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "foo"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "cat"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId2, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "fool"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId2, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "lot"),
+              IsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  // Search for "((bar OR cat) OR lot) AND f"
+  suggestion_spec.set_prefix("bar OR cat OR lot f");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor_->QuerySuggestions(
+          suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  // "fo" in document1, "foo" in document2 and "fool" in document3 could match.
+  EXPECT_THAT(
+      RetrieveSuggestionsText(terms),
+      UnorderedElementsAre("bar OR cat OR lot fo", "bar OR cat OR lot foo",
+                           "bar OR cat OR lot fool"));
+}
+
+TEST_F(SuggestionProcessorTest, MultipleTermsTest_NormalizedTerm) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "foo"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "bar"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "fool"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "bar"),
+              IsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  // Search for "bar AND FO"
+  suggestion_spec.set_prefix("bar FO");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor_->QuerySuggestions(
+          suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  // The term is normalized.
+  EXPECT_THAT(RetrieveSuggestionsText(terms),
+              UnorderedElementsAre("bar foo", "bar fool"));
+
+  // Search for "bar AND ḞÖ"
+  suggestion_spec.set_prefix("bar ḞÖ");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      terms, suggestion_processor_->QuerySuggestions(
+                 suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  // The term is normalized.
+  EXPECT_THAT(RetrieveSuggestionsText(terms),
+              UnorderedElementsAre("bar foo", "bar fool"));
+}
+
+TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "foo"),
+              IsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("nonExistTerm");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor_->QuerySuggestions(
+          suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(terms, IsEmpty());
+}
+
+TEST_F(SuggestionProcessorTest, PrefixTrailingSpaceTest) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "foo"),
+              IsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f    ");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor_->QuerySuggestions(
+          suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(terms, IsEmpty());
+}
+
+TEST_F(SuggestionProcessorTest, NormalizePrefixTest) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "foo"),
+              IsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("F");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor_->QuerySuggestions(
+          suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
+
+  suggestion_spec.set_prefix("fO");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      terms, suggestion_processor_->QuerySuggestions(
+                 suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
+
+  suggestion_spec.set_prefix("Fo");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      terms, suggestion_processor_->QuerySuggestions(
+                 suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
+
+  suggestion_spec.set_prefix("FO");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      terms, suggestion_processor_->QuerySuggestions(
+                 suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
+}
+
+TEST_F(SuggestionProcessorTest, ParenthesesOperatorPrefixTest) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "foo"),
+              IsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("{f}");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor_->QuerySuggestions(
+          suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(terms, IsEmpty());
+
+  suggestion_spec.set_prefix("[f]");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      terms, suggestion_processor_->QuerySuggestions(
+                 suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(terms, IsEmpty());
+
+  suggestion_spec.set_prefix("(f)");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      terms, suggestion_processor_->QuerySuggestions(
+                 suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(terms, IsEmpty());
+}
+
+TEST_F(SuggestionProcessorTest, OtherSpecialPrefixTest) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "foo"),
+              IsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f:");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  auto terms_or = suggestion_processor_->QuerySuggestions(
+      suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
+  if (SearchSpecProto::default_instance().search_type() ==
+      SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+    ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
+    EXPECT_THAT(terms, IsEmpty());
+  } else {
+    EXPECT_THAT(terms_or,
+                StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  }
+
+  // TODO(b/208654892): Update handling for hyphens to only consider it a hyphen
+  // within a TEXT token (rather than a MINUS token) when surrounded on both
+  // sides by TEXT rather than just preceded by TEXT.
+  suggestion_spec.set_prefix("f-");
+  terms_or = suggestion_processor_->QuerySuggestions(
+      suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
+  EXPECT_THAT(terms, IsEmpty());
+
+  suggestion_spec.set_prefix("f OR");
+  terms_or = suggestion_processor_->QuerySuggestions(
+      suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
+  if (SearchSpecProto::default_instance().search_type() ==
+      SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+    ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
+    EXPECT_THAT(terms, IsEmpty());
+  } else {
+    EXPECT_THAT(terms_or,
+                StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  }
+}
+
+TEST_F(SuggestionProcessorTest, InvalidPrefixTest) {
+  // Create the schema and document store
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ASSERT_THAT(schema_store_->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
+                              TermMatchType::EXACT_ONLY, "original"),
+              IsOk());
+
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("OR OR - :");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  auto terms_or = suggestion_processor_->QuerySuggestions(
+      suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
+  if (SearchSpecProto::default_instance().search_type() ==
+      SearchSpecProto::SearchType::ICING_RAW_QUERY) {
+    ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
+    EXPECT_THAT(terms, IsEmpty());
+  } else {
+    EXPECT_THAT(terms_or,
+                StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  }
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/page-result-state.h b/icing/result/page-result-state.h
index a26c44e..5932b56 100644
--- a/icing/result/page-result-state.h
+++ b/icing/result/page-result-state.h
@@ -18,6 +18,7 @@
 #include <cstdint>
 #include <vector>
 
+#include "icing/result/projection-tree.h"
 #include "icing/result/snippet-context.h"
 #include "icing/scoring/scored-document-hit.h"
 
@@ -29,11 +30,14 @@ struct PageResultState {
   PageResultState(std::vector<ScoredDocumentHit> scored_document_hits_in,
                   uint64_t next_page_token_in,
                   SnippetContext snippet_context_in,
-                  int num_previously_returned_in)
+                  std::unordered_map<std::string, ProjectionTree> tree_map,
+                  int num_previously_returned_in, int num_per_page_in)
       : scored_document_hits(std::move(scored_document_hits_in)),
         next_page_token(next_page_token_in),
         snippet_context(std::move(snippet_context_in)),
-        num_previously_returned(num_previously_returned_in) {}
+        projection_tree_map(std::move(tree_map)),
+        num_previously_returned(num_previously_returned_in),
+        requested_page_size(num_per_page_in) {}
 
   // Results of one page
   std::vector<ScoredDocumentHit> scored_document_hits;
@@ -44,8 +48,15 @@ struct PageResultState {
   // Information needed for snippeting.
   SnippetContext snippet_context;
 
+  // Information needed for projection.
+  std::unordered_map<std::string, ProjectionTree> projection_tree_map;
+
   // Number of results that have been returned in previous pages.
   int num_previously_returned;
+
+  // The page size for this query. This should always be >=
+  // scored_document_hits.size();
+  int requested_page_size;
 };
 
 }  // namespace lib
diff --git a/icing/result/page-result.h b/icing/result/page-result.h
new file mode 100644
index 0000000..6645593
--- /dev/null
+++ b/icing/result/page-result.h
@@ -0,0 +1,46 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_RESULT_PAGE_RESULT_H_
+#define ICING_RESULT_PAGE_RESULT_H_
+
+#include <vector>
+
+#include "icing/proto/search.pb.h"
+
+namespace icing {
+namespace lib {
+
+// Contains information of the search result of one page.
+struct PageResult {
+  PageResult(std::vector<SearchResultProto::ResultProto> results_in,
+             int num_results_with_snippets_in, int requested_page_size_in)
+      : results(std::move(results_in)),
+        num_results_with_snippets(num_results_with_snippets_in),
+        requested_page_size(requested_page_size_in) {}
+
+  // Results of one page
+  std::vector<SearchResultProto::ResultProto> results;
+
+  // Number of results with snippets.
+  int num_results_with_snippets;
+
+  // The page size for this query. This should always be >= results.size().
+  int requested_page_size;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_RESULT_PAGE_RESULT_H_
diff --git a/icing/result/projection-tree.cc b/icing/result/projection-tree.cc
new file mode 100644
index 0000000..9896491
--- /dev/null
+++ b/icing/result/projection-tree.cc
@@ -0,0 +1,50 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/projection-tree.h"
+
+#include <algorithm>
+
+#include "icing/proto/search.pb.h"
+#include "icing/schema/property-util.h"
+
+namespace icing {
+namespace lib {
+
+ProjectionTree::ProjectionTree(
+    const SchemaStore::ExpandedTypePropertyMask& type_field_mask) {
+  for (const std::string& field_mask : type_field_mask.paths) {
+    Node* current_node = &root_;
+    for (std::string_view sub_field_mask :
+         property_util::SplitPropertyPathExpr(field_mask)) {
+      current_node = AddChildNode(sub_field_mask, &current_node->children);
+    }
+  }
+}
+
+ProjectionTree::Node* ProjectionTree::AddChildNode(
+    std::string_view property_name, std::vector<Node>* current_children) {
+  auto itr = std::find_if(current_children->begin(), current_children->end(),
+                          [&property_name](const Node& node) {
+                            return node.name == property_name;
+                          });
+  if (itr != current_children->end()) {
+    return &(*itr);
+  }
+  current_children->push_back(ProjectionTree::Node(std::string(property_name)));
+  return &current_children->back();
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/projection-tree.h b/icing/result/projection-tree.h
new file mode 100644
index 0000000..cdf268a
--- /dev/null
+++ b/icing/result/projection-tree.h
@@ -0,0 +1,61 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_RESULT_PROJECTION_TREE_H_
+#define ICING_RESULT_PROJECTION_TREE_H_
+
+#include <string_view>
+#include <vector>
+
+#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
+
+namespace icing {
+namespace lib {
+
+class ProjectionTree {
+ public:
+  struct Node {
+    explicit Node(std::string name = "") : name(std::move(name)) {}
+
+    std::string name;
+    std::vector<Node> children;
+
+    bool operator==(const Node& other) const {
+      return name == other.name && children == other.children;
+    }
+  };
+
+  explicit ProjectionTree(
+      const SchemaStore::ExpandedTypePropertyMask& type_field_mask);
+
+  const Node& root() const { return root_; }
+
+  bool operator==(const ProjectionTree& other) const {
+    return root_ == other.root_;
+  }
+
+ private:
+  // Add a child node with property_name to current_children and returns a
+  // pointer to the child node.
+  Node* AddChildNode(std::string_view property_name,
+                     std::vector<Node>* current_children);
+
+  Node root_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_RESULT_PROJECTION_TREE_H_
diff --git a/icing/result/projection-tree_test.cc b/icing/result/projection-tree_test.cc
new file mode 100644
index 0000000..46d0c12
--- /dev/null
+++ b/icing/result/projection-tree_test.cc
@@ -0,0 +1,118 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/projection-tree.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+TEST(ProjectionTreeTest, CreateEmptyFieldMasks) {
+  ProjectionTree tree({});
+  EXPECT_THAT(tree.root().name, IsEmpty());
+  EXPECT_THAT(tree.root().children, IsEmpty());
+}
+
+TEST(ProjectionTreeTest, CreateTreeTopLevel) {
+  SchemaStore::ExpandedTypePropertyMask type_field_mask{"", {"subject"}};
+
+  ProjectionTree tree(type_field_mask);
+  EXPECT_THAT(tree.root().name, IsEmpty());
+  ASSERT_THAT(tree.root().children, SizeIs(1));
+  EXPECT_THAT(tree.root().children.at(0).name, Eq("subject"));
+  EXPECT_THAT(tree.root().children.at(0).children, IsEmpty());
+}
+
+TEST(ProjectionTreeTest, CreateTreeMultipleTopLevel) {
+  SchemaStore::ExpandedTypePropertyMask type_field_mask{"",
+                                                        {"subject", "body"}};
+
+  ProjectionTree tree(type_field_mask);
+  EXPECT_THAT(tree.root().name, IsEmpty());
+  ASSERT_THAT(tree.root().children, SizeIs(2));
+
+  const ProjectionTree::Node* child0 = &tree.root().children.at(0);
+  const ProjectionTree::Node* child1 = &tree.root().children.at(1);
+  if (child0->name != "subject") {
+    std::swap(child0, child1);
+  }
+
+  EXPECT_THAT(child0->name, Eq("subject"));
+  EXPECT_THAT(child0->children, IsEmpty());
+  EXPECT_THAT(child1->name, Eq("body"));
+  EXPECT_THAT(child1->children, IsEmpty());
+}
+
+TEST(ProjectionTreeTest, CreateTreeNested) {
+  SchemaStore::ExpandedTypePropertyMask type_field_mask{
+      "", {"subject.body", "body"}};
+
+  ProjectionTree tree(type_field_mask);
+  EXPECT_THAT(tree.root().name, IsEmpty());
+  ASSERT_THAT(tree.root().children, SizeIs(2));
+
+  const ProjectionTree::Node* child0 = &tree.root().children.at(0);
+  const ProjectionTree::Node* child1 = &tree.root().children.at(1);
+  if (child0->name != "subject.body") {
+    std::swap(child0, child1);
+  }
+
+  EXPECT_THAT(child0->name, Eq("subject"));
+  ASSERT_THAT(child0->children, SizeIs(1));
+  EXPECT_THAT(child0->children.at(0).name, Eq("body"));
+  EXPECT_THAT(child0->children.at(0).children, IsEmpty());
+  EXPECT_THAT(child1->name, Eq("body"));
+  EXPECT_THAT(child1->children, IsEmpty());
+}
+
+TEST(ProjectionTreeTest, CreateTreeNestedSharedNode) {
+  SchemaStore::ExpandedTypePropertyMask type_field_mask{
+      "", {"sender.name.first", "sender.emailAddress"}};
+
+  ProjectionTree tree(type_field_mask);
+  EXPECT_THAT(tree.root().name, IsEmpty());
+  ASSERT_THAT(tree.root().children, SizeIs(1));
+  EXPECT_THAT(tree.root().children.at(0).name, Eq("sender"));
+  ASSERT_THAT(tree.root().children.at(0).children, SizeIs(2));
+
+  const ProjectionTree::Node* child0_child0 =
+      &tree.root().children.at(0).children.at(0);
+  const ProjectionTree::Node* child0_child1 =
+      &tree.root().children.at(0).children.at(1);
+  if (child0_child0->name != "name") {
+    std::swap(child0_child0, child0_child1);
+  }
+
+  EXPECT_THAT(child0_child0->name, Eq("name"));
+  ASSERT_THAT(child0_child0->children, SizeIs(1));
+  EXPECT_THAT(child0_child0->children.at(0).name, Eq("first"));
+  EXPECT_THAT(child0_child0->children.at(0).children, IsEmpty());
+  EXPECT_THAT(child0_child1->name, Eq("emailAddress"));
+  EXPECT_THAT(child0_child1->children, IsEmpty());
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/projector.cc b/icing/result/projector.cc
new file mode 100644
index 0000000..26478d2
--- /dev/null
+++ b/icing/result/projector.cc
@@ -0,0 +1,62 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/projector.h"
+
+#include <algorithm>
+
+#include "icing/proto/document.pb.h"
+
+namespace icing {
+namespace lib {
+
+namespace projector {
+
+void Project(const std::vector<ProjectionTree::Node>& projection_tree,
+             DocumentProto* document) {
+  int num_kept = 0;
+  for (int cur_pos = 0; cur_pos < document->properties_size(); ++cur_pos) {
+    PropertyProto* prop = document->mutable_properties(cur_pos);
+    auto itr = std::find_if(projection_tree.begin(), projection_tree.end(),
+                            [&prop](const ProjectionTree::Node& node) {
+                              return node.name == prop->name();
+                            });
+    if (itr == projection_tree.end()) {
+      // Property is not present in the projection tree. Just skip it.
+      continue;
+    }
+    // This property should be kept.
+    document->mutable_properties()->SwapElements(num_kept, cur_pos);
+    ++num_kept;
+    if (itr->children.empty()) {
+      // A field mask does refer to this property, but it has no children. So
+      // we should take the entire property, with all of its
+      // subproperties/values
+      continue;
+    }
+    // The field mask refers to children of this property. Recurse through the
+    // document values that this property holds and project the children
+    // requested by this field mask.
+    for (DocumentProto& subproperty : *(prop->mutable_document_values())) {
+      Project(itr->children, &subproperty);
+    }
+  }
+  document->mutable_properties()->DeleteSubrange(
+      num_kept, document->properties_size() - num_kept);
+}
+
+}  // namespace projector
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/projector.h b/icing/result/projector.h
new file mode 100644
index 0000000..43d9052
--- /dev/null
+++ b/icing/result/projector.h
@@ -0,0 +1,36 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_RESULT_PROJECTOR_H_
+#define ICING_RESULT_PROJECTOR_H_
+
+#include <vector>
+
+#include "icing/proto/document.pb.h"
+#include "icing/result/projection-tree.h"
+
+namespace icing {
+namespace lib {
+
+namespace projector {
+
+void Project(const std::vector<ProjectionTree::Node>& projection_tree,
+             DocumentProto* document);
+
+}  // namespace projector
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_RESULT_PROJECTOR_H_
diff --git a/icing/result/result-adjustment-info.cc b/icing/result/result-adjustment-info.cc
new file mode 100644
index 0000000..00ac379
--- /dev/null
+++ b/icing/result/result-adjustment-info.cc
@@ -0,0 +1,64 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/result-adjustment-info.h"
+
+#include <string>
+#include <unordered_map>
+
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/snippet-context.h"
+#include "icing/schema/schema-store.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+SnippetContext CreateSnippetContext(const SearchSpecProto& search_spec,
+                                    const ResultSpecProto& result_spec,
+                                    SectionRestrictQueryTermsMap query_terms) {
+  if (result_spec.snippet_spec().num_to_snippet() > 0 &&
+      result_spec.snippet_spec().num_matches_per_property() > 0) {
+    // Needs snippeting
+    return SnippetContext(std::move(query_terms), result_spec.snippet_spec(),
+                          search_spec.term_match_type());
+  }
+  return SnippetContext(/*query_terms_in=*/{},
+                        ResultSpecProto::SnippetSpecProto::default_instance(),
+                        TermMatchType::UNKNOWN);
+}
+
+}  // namespace
+
+ResultAdjustmentInfo::ResultAdjustmentInfo(
+    const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+    const ResultSpecProto& result_spec, const SchemaStore* schema_store,
+    SectionRestrictQueryTermsMap query_terms)
+    : snippet_context(CreateSnippetContext(search_spec, result_spec,
+                                           std::move(query_terms))),
+      remaining_num_to_snippet(snippet_context.snippet_spec.num_to_snippet()) {
+  for (const SchemaStore::ExpandedTypePropertyMask& type_field_mask :
+       schema_store->ExpandTypePropertyMasks(
+           result_spec.type_property_masks())) {
+    projection_tree_map.insert(
+        {type_field_mask.schema_type, ProjectionTree(type_field_mask)});
+  }
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/result-adjustment-info.h b/icing/result/result-adjustment-info.h
new file mode 100644
index 0000000..e859492
--- /dev/null
+++ b/icing/result/result-adjustment-info.h
@@ -0,0 +1,53 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_RESULT_RESULT_ADJUSTMENT_INFO_H_
+#define ICING_RESULT_RESULT_ADJUSTMENT_INFO_H_
+
+#include <string>
+#include <unordered_map>
+
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/snippet-context.h"
+#include "icing/schema/schema-store.h"
+
+namespace icing {
+namespace lib {
+
+// A wrapper struct for information used in result retrieval.
+// - Snippet
+// - Projection
+struct ResultAdjustmentInfo {
+  // Information needed for snippeting.
+  SnippetContext snippet_context;
+
+  // Remaining # of docs to snippet.
+  int remaining_num_to_snippet;
+
+  // Information needed for projection.
+  std::unordered_map<std::string, ProjectionTree> projection_tree_map;
+
+  explicit ResultAdjustmentInfo(const SearchSpecProto& search_spec,
+                                const ScoringSpecProto& scoring_spec,
+                                const ResultSpecProto& result_spec,
+                                const SchemaStore* schema_store,
+                                SectionRestrictQueryTermsMap query_terms);
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_RESULT_RESULT_ADJUSTMENT_INFO_H_
diff --git a/icing/result/result-adjustment-info_test.cc b/icing/result/result-adjustment-info_test.cc
new file mode 100644
index 0000000..cbce557
--- /dev/null
+++ b/icing/result/result-adjustment-info_test.cc
@@ -0,0 +1,198 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/result-adjustment-info.h"
+
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/snippet-context.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::AnyOf;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::Pair;
+using ::testing::UnorderedElementsAre;
+
+class ResultAdjustmentInfoTest : public testing::Test {
+ protected:
+  ResultAdjustmentInfoTest() : test_dir_(GetTestTempDir() + "/icing") {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  }
+
+  void SetUp() override {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("Email"))
+            .AddType(SchemaTypeConfigBuilder().SetType("Phone"))
+            .Build();
+    ASSERT_THAT(schema_store_->SetSchema(
+                    schema, /*ignore_errors_and_delete_documents=*/false,
+                    /*allow_circular_schema_definitions=*/false),
+                IsOk());
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  FakeClock fake_clock_;
+};
+
+SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(match_type);
+  return search_spec;
+}
+
+ScoringSpecProto CreateScoringSpec(bool is_descending_order) {
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC
+                                                : ScoringSpecProto::Order::ASC);
+  return scoring_spec;
+}
+
+ResultSpecProto CreateResultSpec(
+    int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) {
+  ResultSpecProto result_spec;
+  result_spec.set_result_group_type(result_group_type);
+  result_spec.set_num_per_page(num_per_page);
+  return result_spec;
+}
+
+TEST_F(ResultAdjustmentInfoTest,
+       ShouldConstructSnippetContextAccordingToSpecs) {
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
+
+  SectionRestrictQueryTermsMap query_terms_map;
+  query_terms_map.emplace("term1", std::unordered_set<std::string>());
+
+  ResultAdjustmentInfo result_adjustment_info(
+      CreateSearchSpec(TermMatchType::EXACT_ONLY),
+      CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+      schema_store_.get(), query_terms_map);
+  const SnippetContext snippet_context = result_adjustment_info.snippet_context;
+
+  // Snippet context should be derived from the specs above.
+  EXPECT_TRUE(
+      result_adjustment_info.snippet_context.query_terms.find("term1") !=
+      result_adjustment_info.snippet_context.query_terms.end());
+  EXPECT_THAT(result_adjustment_info.snippet_context.snippet_spec,
+              EqualsProto(result_spec.snippet_spec()));
+  EXPECT_THAT(result_adjustment_info.snippet_context.match_type,
+              Eq(TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(result_adjustment_info.remaining_num_to_snippet, Eq(5));
+}
+
+TEST_F(ResultAdjustmentInfoTest, NoSnippetingShouldReturnNull) {
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+  // Setting num_to_snippet to 0 so that snippeting info won't be
+  // stored.
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(0);
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
+
+  SectionRestrictQueryTermsMap query_terms_map;
+  query_terms_map.emplace("term1", std::unordered_set<std::string>());
+
+  ResultAdjustmentInfo result_adjustment_info(
+      CreateSearchSpec(TermMatchType::EXACT_ONLY),
+      CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+      schema_store_.get(), query_terms_map);
+
+  EXPECT_THAT(result_adjustment_info.snippet_context.query_terms, IsEmpty());
+  EXPECT_THAT(
+      result_adjustment_info.snippet_context.snippet_spec,
+      EqualsProto(ResultSpecProto::SnippetSpecProto::default_instance()));
+  EXPECT_THAT(result_adjustment_info.snippet_context.match_type,
+              TermMatchType::UNKNOWN);
+  EXPECT_THAT(result_adjustment_info.remaining_num_to_snippet, Eq(0));
+}
+
+TEST_F(ResultAdjustmentInfoTest,
+       ShouldConstructProjectionTreeMapAccordingToSpecs) {
+  // Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+  TypePropertyMask* email_type_property_mask =
+      result_spec.add_type_property_masks();
+  email_type_property_mask->set_schema_type("Email");
+  email_type_property_mask->add_paths("sender.name");
+  email_type_property_mask->add_paths("sender.emailAddress");
+  TypePropertyMask* phone_type_property_mask =
+      result_spec.add_type_property_masks();
+  phone_type_property_mask->set_schema_type("Phone");
+  phone_type_property_mask->add_paths("caller");
+  TypePropertyMask* wildcard_type_property_mask =
+      result_spec.add_type_property_masks();
+  wildcard_type_property_mask->set_schema_type(
+      std::string(SchemaStore::kSchemaTypeWildcard));
+  wildcard_type_property_mask->add_paths("wild.card");
+
+  ResultAdjustmentInfo result_adjustment_info(
+      CreateSearchSpec(TermMatchType::EXACT_ONLY),
+      CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+      schema_store_.get(),
+      /*query_terms=*/{});
+
+  ProjectionTree email_projection_tree =
+      ProjectionTree({"Email", {"sender.name", "sender.emailAddress"}});
+  ProjectionTree alternative_email_projection_tree =
+      ProjectionTree({"Email", {"sender.emailAddress", "sender.name"}});
+  ProjectionTree phone_projection_tree = ProjectionTree({"Phone", {"caller"}});
+  ProjectionTree wildcard_projection_tree = ProjectionTree(
+      {std::string(SchemaStore::kSchemaTypeWildcard), {"wild.card"}});
+
+  EXPECT_THAT(result_adjustment_info.projection_tree_map,
+              UnorderedElementsAre(
+                  Pair("Email", AnyOf(email_projection_tree,
+                                      alternative_email_projection_tree)),
+                  Pair("Phone", phone_projection_tree),
+                  Pair(std::string(SchemaStore::kSchemaTypeWildcard),
+                       wildcard_projection_tree)));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/result-retriever-v2.cc b/icing/result/result-retriever-v2.cc
new file mode 100644
index 0000000..44fa602
--- /dev/null
+++ b/icing/result/result-retriever-v2.cc
@@ -0,0 +1,268 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/result-retriever-v2.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/mutex.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/result/page-result.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/projector.h"
+#include "icing/result/result-adjustment-info.h"
+#include "icing/result/result-state-v2.h"
+#include "icing/result/snippet-context.h"
+#include "icing/result/snippet-retriever.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-id.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+void ApplyProjection(const ResultAdjustmentInfo* adjustment_info,
+                     DocumentProto* document) {
+  if (adjustment_info == nullptr) {
+    return;
+  }
+
+  auto itr = adjustment_info->projection_tree_map.find(document->schema());
+  if (itr != adjustment_info->projection_tree_map.end()) {
+    projector::Project(itr->second.root().children, document);
+  } else {
+    auto wildcard_projection_tree_itr =
+        adjustment_info->projection_tree_map.find(
+            std::string(SchemaStore::kSchemaTypeWildcard));
+    if (wildcard_projection_tree_itr !=
+        adjustment_info->projection_tree_map.end()) {
+      projector::Project(wildcard_projection_tree_itr->second.root().children,
+                         document);
+    }
+  }
+}
+
+bool ApplySnippet(ResultAdjustmentInfo* adjustment_info,
+                  const SnippetRetriever& snippet_retriever,
+                  const DocumentProto& document, SectionIdMask section_id_mask,
+                  SearchResultProto::ResultProto* result) {
+  if (adjustment_info == nullptr) {
+    return false;
+  }
+
+  const SnippetContext& snippet_context = adjustment_info->snippet_context;
+  int& remaining_num_to_snippet = adjustment_info->remaining_num_to_snippet;
+
+  if (snippet_context.snippet_spec.num_matches_per_property() > 0 &&
+      remaining_num_to_snippet > 0) {
+    SnippetProto snippet_proto = snippet_retriever.RetrieveSnippet(
+        snippet_context.query_terms, snippet_context.match_type,
+        snippet_context.snippet_spec, document, section_id_mask);
+    *result->mutable_snippet() = std::move(snippet_proto);
+    --remaining_num_to_snippet;
+    return true;
+  }
+
+  return false;
+}
+
+}  // namespace
+
+bool GroupResultLimiterV2::ShouldBeRemoved(
+    const ScoredDocumentHit& scored_document_hit,
+    const std::unordered_map<int32_t, int>& entry_id_group_id_map,
+    const DocumentStore& document_store, std::vector<int>& group_result_limits,
+    ResultSpecProto::ResultGroupingType result_group_type,
+    int64_t current_time_ms) const {
+  auto document_filter_data_optional =
+      document_store.GetAliveDocumentFilterData(
+          scored_document_hit.document_id(), current_time_ms);
+  if (!document_filter_data_optional) {
+    // The document doesn't exist.
+    return true;
+  }
+  NamespaceId namespace_id =
+      document_filter_data_optional.value().namespace_id();
+  SchemaTypeId schema_type_id =
+      document_filter_data_optional.value().schema_type_id();
+  auto entry_id_or = document_store.GetResultGroupingEntryId(
+      result_group_type, namespace_id, schema_type_id);
+  if (!entry_id_or.ok()) {
+    return false;
+  }
+  int32_t entry_id = entry_id_or.ValueOrDie();
+  auto iter = entry_id_group_id_map.find(entry_id);
+  if (iter == entry_id_group_id_map.end()) {
+    // If a ResultGrouping Entry Id isn't found in entry_id_group_id_map, then
+    // there are no limits placed on results from this entry id.
+    return false;
+  }
+  int& count = group_result_limits.at(iter->second);
+  if (count <= 0) {
+    return true;
+  }
+  --count;
+  return false;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<ResultRetrieverV2>>
+ResultRetrieverV2::Create(
+    const DocumentStore* doc_store, const SchemaStore* schema_store,
+    const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
+    std::unique_ptr<const GroupResultLimiterV2> group_result_limiter) {
+  ICING_RETURN_ERROR_IF_NULL(doc_store);
+  ICING_RETURN_ERROR_IF_NULL(schema_store);
+  ICING_RETURN_ERROR_IF_NULL(language_segmenter);
+  ICING_RETURN_ERROR_IF_NULL(normalizer);
+  ICING_RETURN_ERROR_IF_NULL(group_result_limiter);
+
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<SnippetRetriever> snippet_retriever,
+      SnippetRetriever::Create(schema_store, language_segmenter, normalizer));
+
+  return std::unique_ptr<ResultRetrieverV2>(
+      new ResultRetrieverV2(doc_store, std::move(snippet_retriever),
+                            std::move(group_result_limiter)));
+}
+
+std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage(
+    ResultStateV2& result_state, int64_t current_time_ms) const {
+  absl_ports::unique_lock l(&result_state.mutex);
+
+  // For calculating page
+  int original_scored_document_hits_ranker_size =
+      result_state.scored_document_hits_ranker->size();
+  int num_results_with_snippets = 0;
+
+  // Retrieve info
+  std::vector<SearchResultProto::ResultProto> results;
+  int32_t num_total_bytes = 0;
+  while (results.size() < result_state.num_per_page() &&
+         !result_state.scored_document_hits_ranker->empty()) {
+    JoinedScoredDocumentHit next_best_document_hit =
+        result_state.scored_document_hits_ranker->PopNext();
+    if (group_result_limiter_->ShouldBeRemoved(
+            next_best_document_hit.parent_scored_document_hit(),
+            result_state.entry_id_group_id_map(), doc_store_,
+            result_state.group_result_limits, result_state.result_group_type(),
+            current_time_ms)) {
+      continue;
+    }
+
+    libtextclassifier3::StatusOr<DocumentProto> document_or = doc_store_.Get(
+        next_best_document_hit.parent_scored_document_hit().document_id());
+    if (!document_or.ok()) {
+      // Skip the document if getting errors.
+      ICING_LOG(WARNING) << "Fail to fetch document from document store: "
+                         << document_or.status().error_message();
+      continue;
+    }
+
+    DocumentProto document = std::move(document_or).ValueOrDie();
+    // Apply parent projection
+    ApplyProjection(result_state.parent_adjustment_info(), &document);
+
+    SearchResultProto::ResultProto result;
+    // Add parent snippet if requested.
+    if (ApplySnippet(result_state.parent_adjustment_info(), *snippet_retriever_,
+                     document,
+                     next_best_document_hit.parent_scored_document_hit()
+                         .hit_section_id_mask(),
+                     &result)) {
+      ++num_results_with_snippets;
+    }
+
+    // Add the document, itself.
+    *result.mutable_document() = std::move(document);
+    result.set_score(next_best_document_hit.final_score());
+
+    // Retrieve child documents
+    for (const ScoredDocumentHit& child_scored_document_hit :
+         next_best_document_hit.child_scored_document_hits()) {
+      if (result.joined_results_size() >=
+          result_state.max_joined_children_per_parent_to_return()) {
+        break;
+      }
+
+      libtextclassifier3::StatusOr<DocumentProto> child_document_or =
+          doc_store_.Get(child_scored_document_hit.document_id());
+      if (!child_document_or.ok()) {
+        // Skip the document if getting errors.
+        ICING_LOG(WARNING)
+            << "Fail to fetch child document from document store: "
+            << child_document_or.status().error_message();
+        continue;
+      }
+
+      DocumentProto child_document = std::move(child_document_or).ValueOrDie();
+      ApplyProjection(result_state.child_adjustment_info(), &child_document);
+
+      SearchResultProto::ResultProto* child_result =
+          result.add_joined_results();
+      // Add child snippet if requested.
+      ApplySnippet(result_state.child_adjustment_info(), *snippet_retriever_,
+                   child_document,
+                   child_scored_document_hit.hit_section_id_mask(),
+                   child_result);
+
+      *child_result->mutable_document() = std::move(child_document);
+      child_result->set_score(child_scored_document_hit.score());
+    }
+
+    size_t result_bytes = result.ByteSizeLong();
+    results.push_back(std::move(result));
+
+    // Check if num_total_bytes + result_bytes reaches or exceeds
+    // num_total_bytes_per_page_threshold. Use subtraction to avoid integer
+    // overflow.
+    if (result_bytes >=
+        result_state.num_total_bytes_per_page_threshold() - num_total_bytes) {
+      break;
+    }
+    num_total_bytes += result_bytes;
+  }
+
+  // Update numbers in ResultState
+  result_state.num_returned += results.size();
+  result_state.IncrementNumTotalHits(
+      result_state.scored_document_hits_ranker->size() -
+      original_scored_document_hits_ranker_size);
+
+  bool has_more_results = !result_state.scored_document_hits_ranker->empty();
+
+  return std::make_pair(
+      PageResult(std::move(results), num_results_with_snippets,
+                 result_state.num_per_page()),
+      has_more_results);
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/result-retriever-v2.h b/icing/result/result-retriever-v2.h
new file mode 100644
index 0000000..7b1a364
--- /dev/null
+++ b/icing/result/result-retriever-v2.h
@@ -0,0 +1,111 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_RESULT_RETRIEVER_V2_H_
+#define ICING_RESULT_RETRIEVER_V2_H_
+
+#include <cstdint>
+#include <memory>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/search.pb.h"
+#include "icing/result/page-result.h"
+#include "icing/result/result-state-v2.h"
+#include "icing/result/snippet-retriever.h"
+#include "icing/schema/schema-store.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-store.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer.h"
+
+namespace icing {
+namespace lib {
+
+class GroupResultLimiterV2 {
+ public:
+  GroupResultLimiterV2() {}
+
+  virtual ~GroupResultLimiterV2() = default;
+
+  // Returns true if the scored_document_hit should be removed.
+  virtual bool ShouldBeRemoved(
+      const ScoredDocumentHit& scored_document_hit,
+      const std::unordered_map<int32_t, int>& entry_id_group_id_map,
+      const DocumentStore& document_store,
+      std::vector<int>& group_result_limits,
+      ResultSpecProto::ResultGroupingType result_group_type,
+      int64_t current_time_ms) const;
+};
+
+class ResultRetrieverV2 {
+ public:
+  // Factory function to create a ResultRetrieverV2 which does not take
+  // ownership of any input components, and all pointers must refer to valid
+  // objects that outlive the created ResultRetrieverV2 instance.
+  //
+  // Returns:
+  //   A ResultRetrieverV2 on success
+  //   FAILED_PRECONDITION on any null pointer input
+  static libtextclassifier3::StatusOr<std::unique_ptr<ResultRetrieverV2>>
+  Create(const DocumentStore* doc_store, const SchemaStore* schema_store,
+         const LanguageSegmenter* language_segmenter,
+         const Normalizer* normalizer,
+         std::unique_ptr<const GroupResultLimiterV2> group_result_limiter =
+             std::make_unique<const GroupResultLimiterV2>());
+
+  // Retrieves results (pairs of DocumentProtos and SnippetProtos) with the
+  // given ResultState which holds document and snippet information. It pulls
+  // out the next top rank documents from ResultState, retrieves the documents
+  // from storage, updates ResultState, and finally wraps the result + other
+  // information into PageResult. The expected number of documents to return is
+  // min(num_per_page, the number of all scored document hits) inside
+  // ResultState.
+  //
+  // The number of snippets to return is based on the total number of snippets
+  // needed and number of snippets that have already been returned previously
+  // for the same query. The order of results returned will be sorted by
+  // scored_document_hit_comparator inside ResultState.
+  //
+  // An additional boolean value will be returned, indicating if ResultState has
+  // remaining documents to be retrieved next round.
+  //
+  // All errors will be ignored. It will keep retrieving the next document and
+  // valid documents will be included in PageResult.
+  //
+  // Returns:
+  //   std::pair<PageResult, bool>
+  std::pair<PageResult, bool> RetrieveNextPage(ResultStateV2& result_state,
+                                               int64_t current_time_ms) const;
+
+ private:
+  explicit ResultRetrieverV2(
+      const DocumentStore* doc_store,
+      std::unique_ptr<SnippetRetriever> snippet_retriever,
+      std::unique_ptr<const GroupResultLimiterV2> group_result_limiter)
+      : doc_store_(*doc_store),
+        snippet_retriever_(std::move(snippet_retriever)),
+        group_result_limiter_(std::move(group_result_limiter)) {}
+
+  const DocumentStore& doc_store_;
+  std::unique_ptr<SnippetRetriever> snippet_retriever_;
+  const std::unique_ptr<const GroupResultLimiterV2> group_result_limiter_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_RESULT_RETRIEVER_V2_H_
diff --git a/icing/result/result-retriever-v2_group-result-limiter_test.cc b/icing/result/result-retriever-v2_group-result-limiter_test.cc
new file mode 100644
index 0000000..2914a8d
--- /dev/null
+++ b/icing/result/result-retriever-v2_group-result-limiter_test.cc
@@ -0,0 +1,1163 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/result/page-result.h"
+#include "icing/result/result-retriever-v2.h"
+#include "icing/result/result-state-v2.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::Pair;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+using ::testing::UnorderedElementsAre;
+
+class ResultRetrieverV2GroupResultLimiterTest : public testing::Test {
+ protected:
+  ResultRetrieverV2GroupResultLimiterTest()
+      : test_dir_(GetTestTempDir() + "/icing") {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  }
+
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+    language_segmenter_factory::SegmenterOptions options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        language_segmenter_,
+        language_segmenter_factory::Create(std::move(options)));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+    ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
+                                                /*max_term_byte_size=*/10000));
+
+    SchemaProto schema;
+    schema.add_types()->set_schema_type("Document");
+    schema.add_types()->set_schema_type("Message");
+    schema.add_types()->set_schema_type("Person");
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    document_store_ = std::move(create_result.document_store);
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  std::unique_ptr<LanguageSegmenter> language_segmenter_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<Normalizer> normalizer_;
+  std::unique_ptr<DocumentStore> document_store_;
+  FakeClock fake_clock_;
+};
+
+ResultSpecProto CreateResultSpec(
+    int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) {
+  ResultSpecProto result_spec;
+  result_spec.set_result_group_type(result_group_type);
+  result_spec.set_num_per_page(num_per_page);
+  return result_spec;
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+       ResultGroupingShouldLimitResults) {
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+      ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score())};
+
+  // Create a ResultSpec that limits "namespace" to a single result.
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_namespace_("namespace");
+
+  // Creates a ResultState with 2 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Only the top ranked document in "namespace" (document2), should be
+  // returned.
+  auto [page_result, has_more_results] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  ASSERT_THAT(page_result.results, SizeIs(1));
+  EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2));
+  // Document1 has not been returned due to GroupResultLimiter, but since it was
+  // "filtered out", there should be no more results.
+  EXPECT_FALSE(has_more_results);
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+       ResultGroupingHasEmptyFirstPage) {
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+      ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score())};
+
+  // Create a ResultSpec that limits "namespace" to 0 results.
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(0);
+  entry->set_namespace_("namespace");
+
+  // Creates a ResultState with 2 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // First page: empty page
+  auto [page_result, has_more_results] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  ASSERT_THAT(page_result.results, IsEmpty());
+  EXPECT_FALSE(has_more_results);
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+       ResultGroupingHasEmptyLastPage) {
+  // Creates 4 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3 < document4
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2));
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace", "uri/3")
+                                .SetSchema("Document")
+                                .SetScore(3)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store_->Put(document3));
+
+  DocumentProto document4 = DocumentBuilder()
+                                .SetKey("namespace", "uri/4")
+                                .SetSchema("Document")
+                                .SetScore(4)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             document_store_->Put(document4));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+      ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score()),
+      ScoredDocumentHit(document_id3, kSectionIdMaskNone, document3.score()),
+      ScoredDocumentHit(document_id4, kSectionIdMaskNone, document4.score())};
+
+  // Create a ResultSpec that limits "namespace" to 2 results.
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(2);
+  entry->set_namespace_("namespace");
+
+  // Creates a ResultState with 4 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // First page: document4 and document3 should be returned.
+  auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  ASSERT_THAT(page_result1.results, SizeIs(2));
+  EXPECT_THAT(page_result1.results.at(0).document(), EqualsProto(document4));
+  EXPECT_THAT(page_result1.results.at(1).document(), EqualsProto(document3));
+  EXPECT_TRUE(has_more_results1);
+
+  // Second page: although there are valid document hits in result state, all of
+  // them will be filtered out by group result limiter, so we should get an
+  // empty page.
+  auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(page_result2.results, SizeIs(0));
+  EXPECT_FALSE(has_more_results2);
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+       ResultGroupingDoesNotLimitOtherNamespaceResults) {
+  // Creates 4 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3 < document4
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2));
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "uri/3")
+                                .SetSchema("Document")
+                                .SetScore(3)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store_->Put(document3));
+
+  DocumentProto document4 = DocumentBuilder()
+                                .SetKey("namespace2", "uri/4")
+                                .SetSchema("Document")
+                                .SetScore(4)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             document_store_->Put(document4));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+      ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score()),
+      ScoredDocumentHit(document_id3, kSectionIdMaskNone, document3.score()),
+      ScoredDocumentHit(document_id4, kSectionIdMaskNone, document4.score())};
+
+  // Create a ResultSpec that limits "namespace1" to a single result, but
+  // doesn't limit "namespace2".
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_namespace_("namespace1");
+
+  // Creates a ResultState with 4 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // All documents in "namespace2" should be returned.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(3));
+  EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document4));
+  EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document3));
+  EXPECT_THAT(page_result.results.at(2).document(), EqualsProto(document2));
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+       ResultGroupingNonexistentNamespaceShouldBeIgnored) {
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+      ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score())};
+
+  // Create a ResultSpec that limits "namespace"+"nonExistentNamespace" to a
+  // single result.
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_namespace_("namespace");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("nonexistentNamespace");
+
+  // Creates a ResultState with 2 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Only the top ranked document in "namespace" (document2), should be
+  // returned. The presence of "nonexistentNamespace" in the same result
+  // grouping should have no effect.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(1));
+  EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2));
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+       ResultGroupingNonexistentSchemaShouldBeIgnored) {
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+      ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score())};
+
+  // Create a ResultSpec that limits "Document"+"nonExistentSchema" to a
+  // single result.
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::SCHEMA_TYPE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_schema("Document");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_schema("nonexistentSchema");
+
+  // Creates a ResultState with 2 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Only the top ranked document in "Document" (document2), should be
+  // returned. The presence of "nonexistentNamespace" in the same result
+  // grouping should have no effect.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(1));
+  EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2));
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+       ResultGroupingMultiNamespaceGrouping) {
+  // Creates 6 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3 < document4 < document5 <
+  // document6
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2));
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "uri/3")
+                                .SetSchema("Document")
+                                .SetScore(3)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store_->Put(document3));
+
+  DocumentProto document4 = DocumentBuilder()
+                                .SetKey("namespace2", "uri/4")
+                                .SetSchema("Document")
+                                .SetScore(4)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             document_store_->Put(document4));
+
+  DocumentProto document5 = DocumentBuilder()
+                                .SetKey("namespace3", "uri/5")
+                                .SetSchema("Document")
+                                .SetScore(5)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             document_store_->Put(document5));
+
+  DocumentProto document6 = DocumentBuilder()
+                                .SetKey("namespace3", "uri/6")
+                                .SetSchema("Document")
+                                .SetScore(6)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6,
+                             document_store_->Put(document6));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+      ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score()),
+      ScoredDocumentHit(document_id3, kSectionIdMaskNone, document3.score()),
+      ScoredDocumentHit(document_id4, kSectionIdMaskNone, document4.score()),
+      ScoredDocumentHit(document_id5, kSectionIdMaskNone, document5.score()),
+      ScoredDocumentHit(document_id6, kSectionIdMaskNone, document6.score())};
+
+  // Create a ResultSpec that limits "namespace1" to a single result and limits
+  // "namespace2"+"namespace3" to a total of two results.
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_namespace_("namespace1");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(2);
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace2");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace3");
+
+  // Creates a ResultState with 6 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Only the top-ranked result in "namespace1" (document2) should be returned.
+  // Only the top-ranked results across "namespace2" and "namespace3"
+  // (document6, document5) should be returned.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(3));
+  EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document6));
+  EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document5));
+  EXPECT_THAT(page_result.results.at(2).document(), EqualsProto(document2));
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+       ResultGroupingMultiSchemaGrouping) {
+  // Creates 6 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3 < document4 < document5 <
+  // document6
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Person")
+                                .SetScore(1)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Message")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2));
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace", "uri/3")
+                                .SetSchema("Person")
+                                .SetScore(3)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store_->Put(document3));
+
+  DocumentProto document4 = DocumentBuilder()
+                                .SetKey("namespace", "uri/4")
+                                .SetSchema("Message")
+                                .SetScore(4)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             document_store_->Put(document4));
+
+  DocumentProto document5 = DocumentBuilder()
+                                .SetKey("namespace", "uri/5")
+                                .SetSchema("Document")
+                                .SetScore(5)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             document_store_->Put(document5));
+
+  DocumentProto document6 = DocumentBuilder()
+                                .SetKey("namespace", "uri/6")
+                                .SetSchema("Document")
+                                .SetScore(6)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6,
+                             document_store_->Put(document6));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+      ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score()),
+      ScoredDocumentHit(document_id3, kSectionIdMaskNone, document3.score()),
+      ScoredDocumentHit(document_id4, kSectionIdMaskNone, document4.score()),
+      ScoredDocumentHit(document_id5, kSectionIdMaskNone, document5.score()),
+      ScoredDocumentHit(document_id6, kSectionIdMaskNone, document6.score())};
+
+  // Create a ResultSpec that limits "namespace1" to a single result and limits
+  // "namespace2"+"namespace3" to a total of two results.
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::SCHEMA_TYPE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_schema("Document");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(2);
+  entry = result_grouping->add_entry_groupings();
+  entry->set_schema("Message");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_schema("Person");
+
+  // Creates a ResultState with 6 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Only the top-ranked result in "Document" (document6) should be returned.
+  // Only the top-ranked results across "Message" and "Person"
+  // (document5, document3) should be returned.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(3));
+  EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document6));
+  EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document4));
+  EXPECT_THAT(page_result.results.at(2).document(), EqualsProto(document3));
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+       ResultGroupingMultiNamespaceAndSchemaGrouping) {
+  // Creates 6 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3 < document4 < document5 <
+  // document6
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2));
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace1", "uri/3")
+                                .SetSchema("Document")
+                                .SetScore(3)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store_->Put(document3));
+
+  DocumentProto document4 = DocumentBuilder()
+                                .SetKey("namespace2", "uri/4")
+                                .SetSchema("Document")
+                                .SetScore(4)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             document_store_->Put(document4));
+
+  DocumentProto document5 = DocumentBuilder()
+                                .SetKey("namespace3", "uri/5")
+                                .SetSchema("Message")
+                                .SetScore(5)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             document_store_->Put(document5));
+
+  DocumentProto document6 = DocumentBuilder()
+                                .SetKey("namespace3", "uri/6")
+                                .SetSchema("Message")
+                                .SetScore(6)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6,
+                             document_store_->Put(document6));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+      ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score()),
+      ScoredDocumentHit(document_id3, kSectionIdMaskNone, document3.score()),
+      ScoredDocumentHit(document_id4, kSectionIdMaskNone, document4.score()),
+      ScoredDocumentHit(document_id5, kSectionIdMaskNone, document5.score()),
+      ScoredDocumentHit(document_id6, kSectionIdMaskNone, document6.score())};
+
+  // Create a ResultSpec that limits "namespace1" to a single result and limits
+  // "namespace2"+"namespace3" to a total of two results.
+  ResultSpecProto result_spec = CreateResultSpec(
+      /*num_per_page=*/5, ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_namespace_("namespace1");
+  entry->set_schema("Document");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(2);
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace2");
+  entry->set_schema("Document");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace3");
+  entry->set_schema("Message");
+
+  // Creates a ResultState with 6 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Only the top-ranked result in "namespace1xDocument" (document3)
+  // should be returned.
+  // Only the top-ranked results across "namespace2xDocument" and
+  // "namespace3xMessage" (document6, document5) should be returned.
+
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(3));
+  EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document6));
+  EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document5));
+  EXPECT_THAT(page_result.results.at(2).document(), EqualsProto(document3));
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+       ResultGroupingOnlyNonexistentNamespaces) {
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+      ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score())};
+
+  // Create a ResultSpec that limits "nonexistentNamespace" to a single result.
+  // but doesn't limit "namespace"
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_namespace_("nonexistentNamespace");
+
+  // Creates a ResultState with 2 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // All documents in "namespace" should be returned. The presence of
+  // "nonexistentNamespace" should have no effect.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+  EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2));
+  EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document1));
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+       ResultGroupingOnlyNonexistentSchemas) {
+  // Creates 2 documents and ensures the relationship in terms of document
+  // score is: document1 < document2
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+      ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score())};
+
+  // Create a ResultSpec that limits "nonexistentSchema" to a single result.
+  // but doesn't limit "Document"
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::SCHEMA_TYPE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(1);
+  entry->set_schema("nonexistentSchema");
+
+  // Creates a ResultState with 2 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // All documents in "Document" should be returned. The presence of
+  // "nonexistentDocument" should have no effect.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+  EXPECT_THAT(page_result.results.at(0).document(), EqualsProto(document2));
+  EXPECT_THAT(page_result.results.at(1).document(), EqualsProto(document1));
+}
+
+TEST_F(ResultRetrieverV2GroupResultLimiterTest,
+       ShouldUpdateResultStateCorrectlyWithGroupResultLimiter) {
+  // Creates 5 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3 < document4 < document5
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace2", "uri/1")
+                                .SetSchema("Document")
+                                .SetScore(1)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri/2")
+                                .SetSchema("Document")
+                                .SetScore(2)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2));
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace1", "uri/3")
+                                .SetSchema("Document")
+                                .SetScore(3)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store_->Put(document3));
+
+  DocumentProto document4 = DocumentBuilder()
+                                .SetKey("namespace2", "uri/4")
+                                .SetSchema("Document")
+                                .SetScore(4)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             document_store_->Put(document4));
+
+  DocumentProto document5 = DocumentBuilder()
+                                .SetKey("namespace2", "uri/5")
+                                .SetSchema("Document")
+                                .SetScore(5)
+                                .SetCreationTimestampMs(1000)
+                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             document_store_->Put(document5));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      ScoredDocumentHit(document_id1, kSectionIdMaskNone, document1.score()),
+      ScoredDocumentHit(document_id2, kSectionIdMaskNone, document2.score()),
+      ScoredDocumentHit(document_id3, kSectionIdMaskNone, document3.score()),
+      ScoredDocumentHit(document_id4, kSectionIdMaskNone, document4.score()),
+      ScoredDocumentHit(document_id5, kSectionIdMaskNone, document5.score())};
+
+  // Create a ResultSpec that limits "namespace1" to 3 results and "namespace2"
+  // to a single result.
+  ResultSpecProto::ResultGroupingType result_grouping_type =
+      ResultSpecProto::NAMESPACE;
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/2, result_grouping_type);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(3);
+  entry->set_namespace_("namespace1");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(1);
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace2");
+
+  // Get corpus ids.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      CorpusId corpus_id1, document_store_->GetResultGroupingEntryId(
+                               result_grouping_type, "namespace1", "Document"));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      CorpusId corpus_id2, document_store_->GetResultGroupingEntryId(
+                               result_grouping_type, "namespace2", "Document"));
+
+  // Creates a ResultState with 5 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *document_store_);
+  {
+    absl_ports::shared_lock l(&result_state.mutex);
+
+    ASSERT_THAT(result_state.entry_id_group_id_map(),
+                UnorderedElementsAre(Pair(corpus_id1, 0), Pair(corpus_id2, 1)));
+    ASSERT_THAT(result_state.group_result_limits, ElementsAre(3, 1));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // document5, document4, document1 belong to namespace2 (with max_results =
+  // 1).
+  // docuemnt3, document2 belong to namespace 1 (with max_results = 3).
+  // Since num_per_page is 2, we expect to get document5 and document3 in the
+  // first page.
+  auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  ASSERT_THAT(page_result1.results, SizeIs(2));
+  ASSERT_THAT(page_result1.results.at(0).document(), EqualsProto(document5));
+  ASSERT_THAT(page_result1.results.at(1).document(), EqualsProto(document3));
+  ASSERT_TRUE(has_more_results1);
+  {
+    absl_ports::shared_lock l(&result_state.mutex);
+
+    // Should remove document5, document4 and document3 from
+    // scored_document_hits. It removes more than num_per_page documents because
+    // document4 is filtered out by GroupResultLimiter and ResultRetriever has
+    // to fetch the next one until returning num_per_page documents or no
+    // remaining documents in scored_document_hits.
+    ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
+                                           document1.score());
+    ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
+                                           document2.score());
+    EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(SizeIs(2)));
+
+    // Even though we removed 3 document hits from scored_document_hits this
+    // round, num_returned should still be 2, since document4 was "filtered out"
+    // and should not be counted into num_returned.
+    EXPECT_THAT(result_state.num_returned, Eq(2));
+    // corpus_id_group_id_map should be unchanged.
+    EXPECT_THAT(result_state.entry_id_group_id_map(),
+                UnorderedElementsAre(Pair(corpus_id1, 0), Pair(corpus_id2, 1)));
+    // GroupResultLimiter should decrement the # in group_result_limits.
+    EXPECT_THAT(result_state.group_result_limits, ElementsAre(2, 0));
+  }
+
+  // Although there are document2 and document1 left, since namespace2 has
+  // reached its max results, document1 should be excluded from the second page.
+  auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  ASSERT_THAT(page_result2.results, SizeIs(1));
+  ASSERT_THAT(page_result2.results.at(0).document(), EqualsProto(document2));
+  ASSERT_FALSE(has_more_results2);
+  {
+    absl_ports::shared_lock l(&result_state.mutex);
+
+    // Should remove document2 and document1 from scored_document_hits.
+    EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(IsEmpty()));
+    // Even though we removed 2 document hits from scored_document_hits this
+    // round, num_returned should only be incremented by 1 (and thus become 3),
+    // since document1 was "filtered out" and should not be counted into
+    // num_returned.
+    EXPECT_THAT(result_state.num_returned, Eq(3));
+    // corpus_id_group_id_map should be unchanged.
+    EXPECT_THAT(result_state.entry_id_group_id_map(),
+                UnorderedElementsAre(Pair(corpus_id1, 0), Pair(corpus_id2, 1)));
+    // GroupResultLimiter should decrement the # in group_result_limits.
+    EXPECT_THAT(result_state.group_result_limits, ElementsAre(1, 0));
+  }
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/result-retriever-v2_projection_test.cc b/icing/result/result-retriever-v2_projection_test.cc
new file mode 100644
index 0000000..1a75631
--- /dev/null
+++ b/icing/result/result-retriever-v2_projection_test.cc
@@ -0,0 +1,1957 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <limits>
+#include <memory>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/page-result.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/result-adjustment-info.h"
+#include "icing/result/result-retriever-v2.h"
+#include "icing/result/result-state-v2.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::SizeIs;
+
+class ResultRetrieverV2ProjectionTest : public testing::Test {
+ protected:
+  ResultRetrieverV2ProjectionTest() : test_dir_(GetTestTempDir() + "/icing") {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  }
+
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+    language_segmenter_factory::SegmenterOptions options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        language_segmenter_,
+        language_segmenter_factory::Create(std::move(options)));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+    ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
+                                                /*max_term_byte_size=*/10000));
+
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder()
+                         .SetType("Email")
+                         .AddProperty(PropertyConfigBuilder()
+                                          .SetName("name")
+                                          .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                             TOKENIZER_PLAIN)
+                                          .SetCardinality(CARDINALITY_OPTIONAL))
+                         .AddProperty(PropertyConfigBuilder()
+                                          .SetName("body")
+                                          .SetDataTypeString(TERM_MATCH_EXACT,
+                                                             TOKENIZER_PLAIN)
+                                          .SetCardinality(CARDINALITY_OPTIONAL))
+                         .AddProperty(
+                             PropertyConfigBuilder()
+                                 .SetName("sender")
+                                 .SetDataTypeDocument(
+                                     "Person", /*index_nested_properties=*/true)
+                                 .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("Person")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("name")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("emailAddress")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("Artist")
+                    .AddParentType("Person")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("name")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("emailAddress")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("Musician")
+                    .AddParentType("Artist")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("name")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("emailAddress")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("WithPhone")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("phoneNumber")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("phoneModel")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(SchemaTypeConfigBuilder()
+                         .SetType("PersonWithPhone")
+                         .AddParentType("Person")
+                         .AddParentType("WithPhone")
+                         .AddProperty(PropertyConfigBuilder()
+                                          .SetName("name")
+                                          .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                             TOKENIZER_PLAIN)
+                                          .SetCardinality(CARDINALITY_OPTIONAL))
+                         .AddProperty(PropertyConfigBuilder()
+                                          .SetName("emailAddress")
+                                          .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                             TOKENIZER_PLAIN)
+                                          .SetCardinality(CARDINALITY_OPTIONAL))
+                         .AddProperty(PropertyConfigBuilder()
+                                          .SetName("phoneNumber")
+                                          .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                             TOKENIZER_PLAIN)
+                                          .SetCardinality(CARDINALITY_OPTIONAL))
+                         .AddProperty(PropertyConfigBuilder()
+                                          .SetName("phoneModel")
+                                          .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                             TOKENIZER_PLAIN)
+                                          .SetCardinality(CARDINALITY_OPTIONAL))
+                         .Build())
+            .Build();
+    ASSERT_THAT(schema_store_->SetSchema(
+                    schema, /*ignore_errors_and_delete_documents=*/false,
+                    /*allow_circular_schema_definitions=*/false),
+                IsOk());
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    document_store_ = std::move(create_result.document_store);
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  SectionId GetSectionId(const std::string& type, const std::string& property) {
+    auto type_id_or = schema_store_->GetSchemaTypeId(type);
+    if (!type_id_or.ok()) {
+      return kInvalidSectionId;
+    }
+    SchemaTypeId type_id = type_id_or.ValueOrDie();
+    for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) {
+      auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id);
+      if (!metadata_or.ok()) {
+        break;
+      }
+      const SectionMetadata* metadata = metadata_or.ValueOrDie();
+      if (metadata->path == property) {
+        return metadata->id;
+      }
+    }
+    return kInvalidSectionId;
+  }
+
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  std::unique_ptr<LanguageSegmenter> language_segmenter_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<Normalizer> normalizer_;
+  std::unique_ptr<DocumentStore> document_store_;
+  FakeClock fake_clock_;
+};
+
+SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
+  SectionIdMask mask = 0;
+  for (SectionId section_id : section_ids) {
+    mask |= (UINT64_C(1) << section_id);
+  }
+  return mask;
+}
+
+SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(match_type);
+  return search_spec;
+}
+
+ScoringSpecProto CreateScoringSpec(bool is_descending_order) {
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC
+                                                : ScoringSpecProto::Order::ASC);
+  return scoring_spec;
+}
+
+ResultSpecProto CreateResultSpec(int num_per_page) {
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(num_per_page);
+  return result_spec;
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionTopLevelLeadNodeFieldPath) {
+  // 1. Add two Email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+  type_property_mask->set_schema_type("Email");
+  type_property_mask->add_paths("name");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned results only contain the 'name' property.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Goodnight Moon!")
+          .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionNestedLeafNodeFieldPath) {
+  // 1. Add two Email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "shopgirl@aol.com")
+                  .Build())
+          .AddStringProperty("name", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .AddStringProperty("name", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+  type_property_mask->set_schema_type("Email");
+  type_property_mask->add_paths("sender.name");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned results only contain the 'sender.name'
+  // property.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri1")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "Meg Ryan")
+                                   .Build())
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri2")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "Tom Hanks")
+                                   .Build())
+          .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionIntermediateNodeFieldPath) {
+  // 1. Add two Email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "shopgirl@aol.com")
+                  .Build())
+          .AddStringProperty("name", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .AddStringProperty("name", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+  type_property_mask->set_schema_type("Email");
+  type_property_mask->add_paths("sender");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned results only contain the 'sender'
+  // property and all of the subproperties of 'sender'.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "shopgirl@aol.com")
+                  .Build())
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleNestedFieldPaths) {
+  // 1. Add two Email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "shopgirl@aol.com")
+                  .Build())
+          .AddStringProperty("name", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .AddStringProperty("name", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+  type_property_mask->set_schema_type("Email");
+  type_property_mask->add_paths("sender.name");
+  type_property_mask->add_paths("sender.emailAddress");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned results only contain the 'sender.name' and
+  // 'sender.address' properties.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "shopgirl@aol.com")
+                  .Build())
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender", DocumentBuilder()
+                            .SetKey("namespace", "uri2")
+                            .SetSchema("Person")
+                            .AddStringProperty("name", "Tom Hanks")
+                            .AddStringProperty("emailAddress", "ny152@aol.com")
+                            .Build())
+          .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionEmptyFieldPath) {
+  // 1. Add two Email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+  type_property_mask->set_schema_type("Email");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned results contain *no* properties.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one = DocumentBuilder()
+                                             .SetKey("namespace", "uri1")
+                                             .SetCreationTimestampMs(1000)
+                                             .SetSchema("Email")
+                                             .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two = DocumentBuilder()
+                                             .SetKey("namespace", "uri2")
+                                             .SetCreationTimestampMs(1000)
+                                             .SetSchema("Email")
+                                             .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionInvalidFieldPath) {
+  // 1. Add two Email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+  type_property_mask->set_schema_type("Email");
+  type_property_mask->add_paths("nonExistentProperty");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned results contain *no* properties.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one = DocumentBuilder()
+                                             .SetKey("namespace", "uri1")
+                                             .SetCreationTimestampMs(1000)
+                                             .SetSchema("Email")
+                                             .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two = DocumentBuilder()
+                                             .SetKey("namespace", "uri2")
+                                             .SetCreationTimestampMs(1000)
+                                             .SetSchema("Email")
+                                             .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionValidAndInvalidFieldPath) {
+  // 1. Add two Email documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+  type_property_mask->set_schema_type("Email");
+  type_property_mask->add_paths("name");
+  type_property_mask->add_paths("nonExistentProperty");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned results only contain the 'name' property.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Goodnight Moon!")
+          .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesNoWildcards) {
+  // 1. Add two documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .AddStringProperty("emailAddress", "ny152@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* type_property_mask = result_spec.add_type_property_masks();
+  type_property_mask->set_schema_type("Email");
+  type_property_mask->add_paths("name");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      //*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned Email results only contain the 'name'
+  // property and the returned Person results have all of their properties.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .AddStringProperty("emailAddress", "ny152@aol.com")
+          .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesWildcard) {
+  // 1. Add two documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .AddStringProperty("emailAddress", "ny152@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* wildcard_type_property_mask =
+      result_spec.add_type_property_masks();
+  wildcard_type_property_mask->set_schema_type(
+      std::string(SchemaStore::kSchemaTypeWildcard));
+  wildcard_type_property_mask->add_paths("name");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned Email results only contain the 'name'
+  // property and the returned Person results only contain the 'name' property.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest,
+       ProjectionMultipleTypesWildcardWithOneOverride) {
+  // 1. Add two documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .AddStringProperty("emailAddress", "ny152@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* email_type_property_mask =
+      result_spec.add_type_property_masks();
+  email_type_property_mask->set_schema_type("Email");
+  email_type_property_mask->add_paths("body");
+  TypePropertyMask* wildcard_type_property_mask =
+      result_spec.add_type_property_masks();
+  wildcard_type_property_mask->set_schema_type(
+      std::string(SchemaStore::kSchemaTypeWildcard));
+  wildcard_type_property_mask->add_paths("name");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned Email results only contain the 'body'
+  // property and the returned Person results  only contain the 'name' property.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest,
+       ProjectionSingleTypesWildcardAndOverride) {
+  // 1. Add two documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Mr. Body")
+                  .AddStringProperty("emailAddress", "mr.body123@gmail.com")
+                  .Build())
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .AddStringProperty("emailAddress", "ny152@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* email_type_property_mask =
+      result_spec.add_type_property_masks();
+  email_type_property_mask->set_schema_type("Email");
+  email_type_property_mask->add_paths("sender.name");
+  TypePropertyMask* wildcard_type_property_mask =
+      result_spec.add_type_property_masks();
+  wildcard_type_property_mask->set_schema_type(
+      std::string(SchemaStore::kSchemaTypeWildcard));
+  wildcard_type_property_mask->add_paths("name");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned Email results only contain the 'sender.name'
+  // property and the returned Person results only contain the 'name' property.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "Mr. Body")
+                                   .Build())
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest,
+       ProjectionSingleTypesWildcardAndOverrideNestedProperty) {
+  // 1. Add two documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Mr. Body")
+                  .AddStringProperty("emailAddress", "mr.body123@gmail.com")
+                  .Build())
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .AddStringProperty("emailAddress", "ny152@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* email_type_property_mask =
+      result_spec.add_type_property_masks();
+  email_type_property_mask->set_schema_type("Email");
+  email_type_property_mask->add_paths("sender.name");
+  TypePropertyMask* wildcard_type_property_mask =
+      result_spec.add_type_property_masks();
+  wildcard_type_property_mask->set_schema_type(
+      std::string(SchemaStore::kSchemaTypeWildcard));
+  wildcard_type_property_mask->add_paths("sender");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned Email results only contain the 'sender.name'
+  // property and the returned Person results contain no properties.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "Mr. Body")
+                                   .Build())
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two = DocumentBuilder()
+                                             .SetKey("namespace", "uri2")
+                                             .SetCreationTimestampMs(1000)
+                                             .SetSchema("Person")
+                                             .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionJoinDocuments) {
+  // 1. Add one Person document
+  DocumentProto person_document =
+      DocumentBuilder()
+          .SetKey("namespace", "Person/1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .AddStringProperty("emailAddress", "ny152@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id,
+                             document_store_->Put(person_document));
+
+  // 2. Add two Email documents
+  DocumentProto email_document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "Email/1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id1,
+                             document_store_->Put(email_document1));
+
+  DocumentProto email_document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "Email/2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("name", "Goodnight Moon!")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id2,
+                             document_store_->Put(email_document2));
+
+  // 3. Setup the joined scored results.
+  std::vector<SectionId> person_hit_section_ids = {
+      GetSectionId("Person", "name")};
+  std::vector<SectionId> email_hit_section_ids = {
+      GetSectionId("Email", "name"), GetSectionId("Email", "body")};
+  SectionIdMask person_hit_section_id_mask =
+      CreateSectionIdMask(person_hit_section_ids);
+  SectionIdMask email_hit_section_id_mask =
+      CreateSectionIdMask(email_hit_section_ids);
+
+  ScoredDocumentHit person_scored_doc_hit(
+      person_document_id, person_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit email1_scored_doc_hit(
+      email_document_id1, email_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit email2_scored_doc_hit(
+      email_document_id2, email_hit_section_id_mask, /*score=*/0);
+  // Create JoinedScoredDocumentHits mapping Person to Email1 and Email2
+  std::vector<JoinedScoredDocumentHit> joined_scored_document_hits = {
+      JoinedScoredDocumentHit(
+          /*final_score=*/0,
+          /*parent_scored_document_hit=*/person_scored_doc_hit,
+          /*child_scored_document_hits=*/
+          {email1_scored_doc_hit, email2_scored_doc_hit})};
+
+  // 4. Create parent ResultSpec with type property mask.
+  ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/2);
+  parent_result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int>::max());
+  TypePropertyMask* type_property_mask =
+      parent_result_spec.add_type_property_masks();
+  type_property_mask->set_schema_type("Person");
+  type_property_mask->add_paths("name");
+
+  // 5. Create child ResultSpec with type property mask.
+  ResultSpecProto child_result_spec;
+  type_property_mask = child_result_spec.add_type_property_masks();
+  type_property_mask->set_schema_type("Email");
+  type_property_mask->add_paths("body");
+
+  // 6. Create ResultState with custom ResultSpecs.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
+          std::move(joined_scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), child_result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      parent_result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 7. Verify that the returned results:
+  //    - Person docs only contain the "name" property.
+  //    - Email docs only contain the "body" property.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(1));
+
+  // Check parent doc.
+  DocumentProto projected_person_document =
+      DocumentBuilder()
+          .SetKey("namespace", "Person/1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_person_document));
+
+  // Check child docs.
+  ASSERT_THAT(page_result.results.at(0).joined_results(), SizeIs(2));
+  // Check Email1
+  DocumentProto projected_email_document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "Email/1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).joined_results(0).document(),
+              EqualsProto(projected_email_document1));
+  // Check Email2
+  DocumentProto projected_email_document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "Email/2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty("body",
+                             "Count all the sheep and tell them 'Hello'.")
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).joined_results(1).document(),
+              EqualsProto(projected_email_document2));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionPolymorphism) {
+  // 1. Add two documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .AddStringProperty("emailAddress", "ny152@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Artist")
+          .AddStringProperty("name", "Joe Artist")
+          .AddStringProperty("emailAddress", "artist@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, kSectionIdMaskAll, /*score=*/0},
+      {document_id2, kSectionIdMaskAll, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  // Since Artist is a child type of Person, the TypePropertyMask for Person
+  // also applies to Artist.
+  TypePropertyMask* person_type_property_mask =
+      result_spec.add_type_property_masks();
+  person_type_property_mask->set_schema_type("Person");
+  person_type_property_mask->add_paths("name");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned Person and Artist results only contain the
+  // 'name' property.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Artist")
+          .AddStringProperty("name", "Joe Artist")
+          .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionTransitivePolymorphism) {
+  // 1. Add two documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .AddStringProperty("emailAddress", "ny152@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Musician")
+          .AddStringProperty("name", "Joe Musician")
+          .AddStringProperty("emailAddress", "Musician@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, kSectionIdMaskAll, /*score=*/0},
+      {document_id2, kSectionIdMaskAll, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  // Since Musician is a transitive child type of Person, the TypePropertyMask
+  // for Person also applies to Musician.
+  TypePropertyMask* person_type_property_mask =
+      result_spec.add_type_property_masks();
+  person_type_property_mask->set_schema_type("Person");
+  person_type_property_mask->add_paths("name");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned Person and Musician results only contain the
+  // 'name' property.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Musician")
+          .AddStringProperty("name", "Joe Musician")
+          .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest,
+       ProjectionPolymorphismChildMissingProperty) {
+  // 1. Add an artist document with missing 'emailAddress', which is allowed
+  // since 'emailAddress' in the parent type 'Person' is defined as optional.
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri")
+                               .SetCreationTimestampMs(1000)
+                               .SetSchema("Artist")
+                               .AddStringProperty("name", "Joe Artist")
+                               .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document));
+
+  // 2. Setup the scored results.
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id, kSectionIdMaskAll, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask for the missing property
+  // 'emailAddress' in the Person type. Since Artist is a child type of Person,
+  // the TypePropertyMask for Person also applies to Artist.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* person_type_property_mask =
+      result_spec.add_type_property_masks();
+  person_type_property_mask->set_schema_type("Person");
+  person_type_property_mask->add_paths("emailAddress");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned person document does not contain any property,
+  // since 'emailAddress' is missing.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(1));
+  DocumentProto projected_document = DocumentBuilder()
+                                         .SetKey("namespace", "uri")
+                                         .SetCreationTimestampMs(1000)
+                                         .SetSchema("Artist")
+                                         .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionPolymorphismMerge) {
+  // 1. Add two documents
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .AddStringProperty("emailAddress", "ny152@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document_one));
+
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Artist")
+          .AddStringProperty("name", "Joe Artist")
+          .AddStringProperty("emailAddress", "artist@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document_two));
+
+  // 2. Setup the scored results.
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, kSectionIdMaskAll, /*score=*/0},
+      {document_id2, kSectionIdMaskAll, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+  TypePropertyMask* person_type_property_mask =
+      result_spec.add_type_property_masks();
+  person_type_property_mask->set_schema_type("Person");
+  person_type_property_mask->add_paths("name");
+  // Since Artist is a child type of Person, the TypePropertyMask for Person
+  // will be merged to Artist's TypePropertyMask by polymorphism, so that 'name'
+  // will also show in Artist's projection results.
+  TypePropertyMask* artist_type_property_mask =
+      result_spec.add_type_property_masks();
+  artist_type_property_mask->set_schema_type("Artist");
+  artist_type_property_mask->add_paths("emailAddress");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned Person results only contain the 'name'
+  // property and the returned Artist results contain both the 'name' and
+  // 'emailAddress' properties.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  DocumentProto projected_document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document_one));
+
+  DocumentProto projected_document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Artist")
+          .AddStringProperty("name", "Joe Artist")
+          .AddStringProperty("emailAddress", "artist@aol.com")
+          .Build();
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleParentPolymorphism) {
+  // 1. Add a document
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri")
+                               .SetCreationTimestampMs(1000)
+                               .SetSchema("PersonWithPhone")
+                               .AddStringProperty("name", "name")
+                               .AddStringProperty("emailAddress", "email")
+                               .AddStringProperty("phoneNumber", "12345")
+                               .AddStringProperty("phoneModel", "pixel")
+                               .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document));
+
+  // 2. Setup the scored results.
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id, kSectionIdMaskAll, /*score=*/0}};
+
+  // 3. Create a ResultSpec with type property mask.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/1);
+  // Since PersonWithPhone is a child type of Person, the TypePropertyMask
+  // also applies to PersonWithPhone.
+  TypePropertyMask* person_type_property_mask =
+      result_spec.add_type_property_masks();
+  person_type_property_mask->set_schema_type("Person");
+  person_type_property_mask->add_paths("name");
+  // Since PersonWithPhone is a child type of WithPhone, the
+  // TypePropertyMask also applies to PersonWithPhone.
+  TypePropertyMask* with_phone_type_property_mask =
+      result_spec.add_type_property_masks();
+  with_phone_type_property_mask->set_schema_type("WithPhone");
+  with_phone_type_property_mask->add_paths("phoneNumber");
+
+  // 4. Create ResultState with custom ResultSpec.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // 5. Verify that the returned document only contains the 'name' and the
+  // 'phoneNumber' property.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(1));
+
+  DocumentProto projected_document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("PersonWithPhone")
+          .AddStringProperty("name", "name")
+          .AddStringProperty("phoneNumber", "12345")
+          .Build();
+  EXPECT_THAT(page_result.results.at(0).document(),
+              EqualsProto(projected_document));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/result-retriever-v2_snippet_test.cc b/icing/result/result-retriever-v2_snippet_test.cc
new file mode 100644
index 0000000..440d31c
--- /dev/null
+++ b/icing/result/result-retriever-v2_snippet_test.cc
@@ -0,0 +1,1162 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <limits>
+#include <memory>
+#include <string_view>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/page-result.h"
+#include "icing/result/result-adjustment-info.h"
+#include "icing/result/result-retriever-v2.h"
+#include "icing/result/result-state-v2.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/snippet-helpers.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+class ResultRetrieverV2SnippetTest : public testing::Test {
+ protected:
+  ResultRetrieverV2SnippetTest() : test_dir_(GetTestTempDir() + "/icing") {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  }
+
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+    language_segmenter_factory::SegmenterOptions options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        language_segmenter_,
+        language_segmenter_factory::Create(std::move(options)));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+    ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
+                                                /*max_term_byte_size=*/10000));
+
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("Email")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("subject")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("body")
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("name")
+                    .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                    .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    ASSERT_THAT(schema_store_->SetSchema(
+                    schema, /*ignore_errors_and_delete_documents=*/false,
+                    /*allow_circular_schema_definitions=*/false),
+                IsOk());
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    document_store_ = std::move(create_result.document_store);
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  SectionId GetSectionId(const std::string& type, const std::string& property) {
+    auto type_id_or = schema_store_->GetSchemaTypeId(type);
+    if (!type_id_or.ok()) {
+      return kInvalidSectionId;
+    }
+    SchemaTypeId type_id = type_id_or.ValueOrDie();
+    for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) {
+      auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id);
+      if (!metadata_or.ok()) {
+        break;
+      }
+      const SectionMetadata* metadata = metadata_or.ValueOrDie();
+      if (metadata->path == property) {
+        return metadata->id;
+      }
+    }
+    return kInvalidSectionId;
+  }
+
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  std::unique_ptr<LanguageSegmenter> language_segmenter_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<Normalizer> normalizer_;
+  std::unique_ptr<DocumentStore> document_store_;
+  FakeClock fake_clock_;
+};
+
+ResultSpecProto::SnippetSpecProto CreateSnippetSpec() {
+  ResultSpecProto::SnippetSpecProto snippet_spec;
+  snippet_spec.set_num_to_snippet(std::numeric_limits<int>::max());
+  snippet_spec.set_num_matches_per_property(std::numeric_limits<int>::max());
+  snippet_spec.set_max_window_utf32_length(1024);
+  return snippet_spec;
+}
+
+DocumentProto CreateEmailDocument(int id) {
+  return DocumentBuilder()
+      .SetKey("icing", "Email/" + std::to_string(id))
+      .SetSchema("Email")
+      .AddStringProperty("subject", "subject foo " + std::to_string(id))
+      .AddStringProperty("body", "body bar " + std::to_string(id))
+      .SetCreationTimestampMs(1574365086666 + id)
+      .Build();
+}
+
+DocumentProto CreatePersonDocument(int id) {
+  return DocumentBuilder()
+      .SetKey("icing", "Person/" + std::to_string(id))
+      .SetSchema("Person")
+      .AddStringProperty("name", "person " + std::to_string(id))
+      .SetCreationTimestampMs(1574365086666 + id)
+      .Build();
+}
+
+SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
+  SectionIdMask mask = 0;
+  for (SectionId section_id : section_ids) {
+    mask |= (UINT64_C(1) << section_id);
+  }
+  return mask;
+}
+
+SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(match_type);
+  return search_spec;
+}
+
+ScoringSpecProto CreateScoringSpec(bool is_descending_order) {
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC
+                                                : ScoringSpecProto::Order::ASC);
+  return scoring_spec;
+}
+
+ResultSpecProto CreateResultSpec(int num_per_page) {
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(num_per_page);
+  return result_spec;
+}
+
+TEST_F(ResultRetrieverV2SnippetTest,
+       DefaultSnippetSpecShouldDisableSnippeting) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(CreateEmailDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(CreateEmailDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0},
+      {document_id3, hit_section_id_mask, /*score=*/0}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+          schema_store_.get(), SectionRestrictQueryTermsMap()),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(3));
+  EXPECT_THAT(page_result.results.at(0).snippet(),
+              EqualsProto(SnippetProto::default_instance()));
+  EXPECT_THAT(page_result.results.at(1).snippet(),
+              EqualsProto(SnippetProto::default_instance()));
+  EXPECT_THAT(page_result.results.at(2).snippet(),
+              EqualsProto(SnippetProto::default_instance()));
+  EXPECT_THAT(page_result.num_results_with_snippets, Eq(0));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(CreateEmailDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(CreateEmailDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0},
+      {document_id3, hit_section_id_mask, /*score=*/0}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Create ResultSpec with custom snippet spec.
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+  *result_spec.mutable_snippet_spec() = CreateSnippetSpec();
+
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(),
+          SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(3));
+  EXPECT_THAT(page_result.num_results_with_snippets, Eq(3));
+
+  const DocumentProto& result_document_one =
+      page_result.results.at(0).document();
+  const SnippetProto& result_snippet_one = page_result.results.at(0).snippet();
+  EXPECT_THAT(result_document_one, EqualsProto(CreateEmailDocument(/*id=*/1)));
+  EXPECT_THAT(result_snippet_one.entries(), SizeIs(2));
+  EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body"));
+  std::string_view content = GetString(
+      &result_document_one, result_snippet_one.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)),
+              ElementsAre("body bar 1"));
+  EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)),
+              ElementsAre("bar"));
+  EXPECT_THAT(result_snippet_one.entries(1).property_name(), Eq("subject"));
+  content = GetString(&result_document_one,
+                      result_snippet_one.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_one.entries(1)),
+              ElementsAre("subject foo 1"));
+  EXPECT_THAT(GetMatches(content, result_snippet_one.entries(1)),
+              ElementsAre("foo"));
+
+  const DocumentProto& result_document_two =
+      page_result.results.at(1).document();
+  const SnippetProto& result_snippet_two = page_result.results.at(1).snippet();
+  EXPECT_THAT(result_document_two, EqualsProto(CreateEmailDocument(/*id=*/2)));
+  EXPECT_THAT(result_snippet_two.entries(), SizeIs(2));
+  EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body"));
+  content = GetString(&result_document_two,
+                      result_snippet_two.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)),
+              ElementsAre("body bar 2"));
+  EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)),
+              ElementsAre("bar"));
+  EXPECT_THAT(result_snippet_two.entries(1).property_name(), Eq("subject"));
+  content = GetString(&result_document_two,
+                      result_snippet_two.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_two.entries(1)),
+              ElementsAre("subject foo 2"));
+  EXPECT_THAT(GetMatches(content, result_snippet_two.entries(1)),
+              ElementsAre("foo"));
+
+  const DocumentProto& result_document_three =
+      page_result.results.at(2).document();
+  const SnippetProto& result_snippet_three =
+      page_result.results.at(2).snippet();
+  EXPECT_THAT(result_document_three,
+              EqualsProto(CreateEmailDocument(/*id=*/3)));
+  EXPECT_THAT(result_snippet_three.entries(), SizeIs(2));
+  EXPECT_THAT(result_snippet_three.entries(0).property_name(), Eq("body"));
+  content = GetString(&result_document_three,
+                      result_snippet_three.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_three.entries(0)),
+              ElementsAre("body bar 3"));
+  EXPECT_THAT(GetMatches(content, result_snippet_three.entries(0)),
+              ElementsAre("bar"));
+  EXPECT_THAT(result_snippet_three.entries(1).property_name(), Eq("subject"));
+  content = GetString(&result_document_three,
+                      result_snippet_three.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet_three.entries(1)),
+              ElementsAre("subject foo 3"));
+  EXPECT_THAT(GetMatches(content, result_snippet_three.entries(1)),
+              ElementsAre("foo"));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(CreateEmailDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(CreateEmailDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0},
+      {document_id3, hit_section_id_mask, /*score=*/0}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Create ResultSpec with custom snippet spec.
+  ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
+  snippet_spec.set_num_to_snippet(1);
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+  *result_spec.mutable_snippet_spec() = std::move(snippet_spec);
+
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(),
+          SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(3));
+  EXPECT_THAT(page_result.num_results_with_snippets, Eq(1));
+
+  const DocumentProto& result_document = page_result.results.at(0).document();
+  const SnippetProto& result_snippet = page_result.results.at(0).snippet();
+  EXPECT_THAT(result_document, EqualsProto(CreateEmailDocument(/*id=*/1)));
+  EXPECT_THAT(result_snippet.entries(), SizeIs(2));
+  EXPECT_THAT(result_snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&result_document, result_snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet.entries(0)),
+              ElementsAre("body bar 1"));
+  EXPECT_THAT(GetMatches(content, result_snippet.entries(0)),
+              ElementsAre("bar"));
+  EXPECT_THAT(result_snippet.entries(1).property_name(), Eq("subject"));
+  content =
+      GetString(&result_document, result_snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, result_snippet.entries(1)),
+              ElementsAre("subject foo 1"));
+  EXPECT_THAT(GetMatches(content, result_snippet.entries(1)),
+              ElementsAre("foo"));
+
+  EXPECT_THAT(page_result.results.at(1).document(),
+              EqualsProto(CreateEmailDocument(/*id=*/2)));
+  EXPECT_THAT(page_result.results.at(1).snippet(),
+              EqualsProto(SnippetProto::default_instance()));
+
+  EXPECT_THAT(page_result.results.at(2).document(),
+              EqualsProto(CreateEmailDocument(/*id=*/3)));
+  EXPECT_THAT(page_result.results.at(2).snippet(),
+              EqualsProto(SnippetProto::default_instance()));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllResults) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(CreateEmailDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(CreateEmailDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0},
+      {document_id3, hit_section_id_mask, /*score=*/0}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Create ResultSpec with custom snippet spec.
+  ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
+  snippet_spec.set_num_to_snippet(5);
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+  *result_spec.mutable_snippet_spec() = std::move(snippet_spec);
+
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(),
+          SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  // num_to_snippet = 5, num_previously_returned_in = 0,
+  // We can return 5 - 0 = 5 snippets at most. We're able to return all 3
+  // snippets here.
+  ASSERT_THAT(page_result.results, SizeIs(3));
+  EXPECT_THAT(page_result.results.at(0).snippet().entries(), Not(IsEmpty()));
+  EXPECT_THAT(page_result.results.at(1).snippet().entries(), Not(IsEmpty()));
+  EXPECT_THAT(page_result.results.at(2).snippet().entries(), Not(IsEmpty()));
+  EXPECT_THAT(page_result.num_results_with_snippets, Eq(3));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeResults) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(CreateEmailDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(CreateEmailDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0},
+      {document_id3, hit_section_id_mask, /*score=*/0}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Create ResultSpec with custom snippet spec.
+  ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
+  snippet_spec.set_num_to_snippet(5);
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+  *result_spec.mutable_snippet_spec() = std::move(snippet_spec);
+
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(),
+          SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+  {
+    absl_ports::unique_lock l(&result_state.mutex);
+
+    // Set remaining_num_to_snippet = 2
+    result_state.parent_adjustment_info()->remaining_num_to_snippet = 2;
+  }
+
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(3));
+  EXPECT_THAT(page_result.results.at(0).snippet().entries(), Not(IsEmpty()));
+  EXPECT_THAT(page_result.results.at(1).snippet().entries(), Not(IsEmpty()));
+  EXPECT_THAT(page_result.results.at(2).snippet().entries(), IsEmpty());
+  EXPECT_THAT(page_result.num_results_with_snippets, Eq(2));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, ShouldNotSnippetAnyResults) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(CreateEmailDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(CreateEmailDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0},
+      {document_id3, hit_section_id_mask, /*score=*/0}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Create ResultSpec with custom snippet spec.
+  ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
+  snippet_spec.set_num_to_snippet(5);
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+  *result_spec.mutable_snippet_spec() = std::move(snippet_spec);
+
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(),
+          SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+  {
+    absl_ports::unique_lock l(&result_state.mutex);
+
+    // Set remaining_num_to_snippet = 0
+    result_state.parent_adjustment_info()->remaining_num_to_snippet = 0;
+  }
+
+  // We can't return any snippets for this page.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(3));
+  EXPECT_THAT(page_result.results.at(0).snippet().entries(), IsEmpty());
+  EXPECT_THAT(page_result.results.at(1).snippet().entries(), IsEmpty());
+  EXPECT_THAT(page_result.results.at(2).snippet().entries(), IsEmpty());
+  EXPECT_THAT(page_result.num_results_with_snippets, Eq(0));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest,
+       ShouldNotSnippetAnyResultsForNonPositiveNumMatchesPerProperty) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(CreateEmailDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(CreateEmailDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0},
+      {document_id3, hit_section_id_mask, /*score=*/0}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Create ResultSpec with custom snippet spec.
+  ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
+  snippet_spec.set_num_to_snippet(5);
+  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3);
+  *result_spec.mutable_snippet_spec() = std::move(snippet_spec);
+
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+          schema_store_.get(),
+          SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+      /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+  {
+    absl_ports::unique_lock l(&result_state.mutex);
+
+    // Set num_matchers_per_property = 0
+    result_state.parent_adjustment_info()
+        ->snippet_context.snippet_spec.set_num_matches_per_property(0);
+  }
+
+  // We can't return any snippets for this page even though num_to_snippet > 0.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(3));
+  EXPECT_THAT(page_result.results.at(0).snippet().entries(), IsEmpty());
+  EXPECT_THAT(page_result.results.at(1).snippet().entries(), IsEmpty());
+  EXPECT_THAT(page_result.results.at(2).snippet().entries(), IsEmpty());
+  EXPECT_THAT(page_result.num_results_with_snippets, Eq(0));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, JoinSnippeted) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId person_document_id1,
+      document_store_->Put(CreatePersonDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId person_document_id2,
+      document_store_->Put(CreatePersonDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId person_document_id3,
+      document_store_->Put(CreatePersonDocument(/*id=*/3)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId email_document_id1,
+      document_store_->Put(CreateEmailDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId email_document_id2,
+      document_store_->Put(CreateEmailDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId email_document_id3,
+      document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+  std::vector<SectionId> person_hit_section_ids = {
+      GetSectionId("Person", "name")};
+  std::vector<SectionId> email_hit_section_ids = {
+      GetSectionId("Email", "subject"), GetSectionId("Email", "body")};
+  SectionIdMask person_hit_section_id_mask =
+      CreateSectionIdMask(person_hit_section_ids);
+  SectionIdMask email_hit_section_id_mask =
+      CreateSectionIdMask(email_hit_section_ids);
+
+  ScoredDocumentHit person1_scored_doc_hit(
+      person_document_id1, person_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit person2_scored_doc_hit(
+      person_document_id2, person_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit person3_scored_doc_hit(
+      person_document_id3, person_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit email1_scored_doc_hit(
+      email_document_id1, email_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit email2_scored_doc_hit(
+      email_document_id2, email_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit email3_scored_doc_hit(
+      email_document_id3, email_hit_section_id_mask, /*score=*/0);
+
+  // Create JoinedScoredDocumentHits mapping:
+  // - Person1 to Email1 and Email2
+  // - Person2 to empty
+  // - Person3 to Email3
+  JoinedScoredDocumentHit joined_scored_document_hit1(
+      /*final_score=*/0, /*parent_scored_document_hit=*/person1_scored_doc_hit,
+      /*child_scored_document_hits=*/
+      {email1_scored_doc_hit, email2_scored_doc_hit});
+  JoinedScoredDocumentHit joined_scored_document_hit2(
+      /*final_score=*/0, /*parent_scored_document_hit=*/person2_scored_doc_hit,
+      /*child_scored_document_hits=*/{});
+  JoinedScoredDocumentHit joined_scored_document_hit3(
+      /*final_score=*/0, /*parent_scored_document_hit=*/person3_scored_doc_hit,
+      /*child_scored_document_hits=*/{email3_scored_doc_hit});
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Create parent ResultSpec with custom snippet spec.
+  ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3);
+  parent_result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+  *parent_result_spec.mutable_snippet_spec() = CreateSnippetSpec();
+
+  // Create child ResultSpec with custom snippet spec.
+  ResultSpecProto child_result_spec;
+  *child_result_spec.mutable_snippet_spec() = CreateSnippetSpec();
+
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
+          std::vector<JoinedScoredDocumentHit>{joined_scored_document_hit1,
+                                               joined_scored_document_hit2,
+                                               joined_scored_document_hit3},
+          /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec,
+          schema_store_.get(),
+          SectionRestrictQueryTermsMap({{"", {"person"}}})),
+      /*child_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), child_result_spec,
+          schema_store_.get(),
+          SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+      parent_result_spec, *document_store_);
+
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(3));
+  EXPECT_THAT(page_result.num_results_with_snippets, Eq(3));
+
+  // Result1: Person1 for parent and [Email1, Email2] for children.
+  // Check parent doc (Person1).
+  const DocumentProto& result_parent_document_one =
+      page_result.results.at(0).document();
+  const SnippetProto& result_parent_snippet_one =
+      page_result.results.at(0).snippet();
+  EXPECT_THAT(result_parent_document_one,
+              EqualsProto(CreatePersonDocument(/*id=*/1)));
+  ASSERT_THAT(result_parent_snippet_one.entries(), SizeIs(1));
+  EXPECT_THAT(result_parent_snippet_one.entries(0).property_name(), Eq("name"));
+  std::string_view content =
+      GetString(&result_parent_document_one,
+                result_parent_snippet_one.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_parent_snippet_one.entries(0)),
+              ElementsAre("person 1"));
+  EXPECT_THAT(GetMatches(content, result_parent_snippet_one.entries(0)),
+              ElementsAre("person"));
+
+  // Check child docs.
+  ASSERT_THAT(page_result.results.at(0).joined_results(), SizeIs(2));
+  // Check Email1.
+  const DocumentProto& result_child_document_one =
+      page_result.results.at(0).joined_results(0).document();
+  const SnippetProto& result_child_snippet_one =
+      page_result.results.at(0).joined_results(0).snippet();
+  EXPECT_THAT(result_child_document_one,
+              EqualsProto(CreateEmailDocument(/*id=*/1)));
+  ASSERT_THAT(result_child_snippet_one.entries(), SizeIs(2));
+  EXPECT_THAT(result_child_snippet_one.entries(0).property_name(), Eq("body"));
+  content = GetString(&result_child_document_one,
+                      result_child_snippet_one.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_child_snippet_one.entries(0)),
+              ElementsAre("body bar 1"));
+  EXPECT_THAT(GetMatches(content, result_child_snippet_one.entries(0)),
+              ElementsAre("bar"));
+  EXPECT_THAT(result_child_snippet_one.entries(1).property_name(),
+              Eq("subject"));
+  content = GetString(&result_child_document_one,
+                      result_child_snippet_one.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, result_child_snippet_one.entries(1)),
+              ElementsAre("subject foo 1"));
+  EXPECT_THAT(GetMatches(content, result_child_snippet_one.entries(1)),
+              ElementsAre("foo"));
+  // Check Email2.
+  const DocumentProto& result_child_document_two =
+      page_result.results.at(0).joined_results(1).document();
+  const SnippetProto& result_child_snippet_two =
+      page_result.results.at(0).joined_results(1).snippet();
+  EXPECT_THAT(result_child_document_two,
+              EqualsProto(CreateEmailDocument(/*id=*/2)));
+  ASSERT_THAT(result_child_snippet_two.entries(), SizeIs(2));
+  EXPECT_THAT(result_child_snippet_two.entries(0).property_name(), Eq("body"));
+  content = GetString(&result_child_document_two,
+                      result_child_snippet_two.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_child_snippet_two.entries(0)),
+              ElementsAre("body bar 2"));
+  EXPECT_THAT(GetMatches(content, result_child_snippet_two.entries(0)),
+              ElementsAre("bar"));
+  EXPECT_THAT(result_child_snippet_two.entries(1).property_name(),
+              Eq("subject"));
+  content = GetString(&result_child_document_two,
+                      result_child_snippet_two.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, result_child_snippet_two.entries(1)),
+              ElementsAre("subject foo 2"));
+  EXPECT_THAT(GetMatches(content, result_child_snippet_two.entries(1)),
+              ElementsAre("foo"));
+
+  // Result2: Person2 for parent and [] for children.
+  // Check parent doc (Person1).
+  const DocumentProto& result_parent_document_two =
+      page_result.results.at(1).document();
+  const SnippetProto& result_parent_snippet_two =
+      page_result.results.at(1).snippet();
+  EXPECT_THAT(result_parent_document_two,
+              EqualsProto(CreatePersonDocument(/*id=*/2)));
+  ASSERT_THAT(result_parent_snippet_two.entries(), SizeIs(1));
+  EXPECT_THAT(result_parent_snippet_two.entries(0).property_name(), Eq("name"));
+  content = GetString(&result_parent_document_two,
+                      result_parent_snippet_two.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_parent_snippet_two.entries(0)),
+              ElementsAre("person 2"));
+  EXPECT_THAT(GetMatches(content, result_parent_snippet_two.entries(0)),
+              ElementsAre("person"));
+  // Check child docs.
+  ASSERT_THAT(page_result.results.at(1).joined_results(), IsEmpty());
+
+  // Result3: Person3 for parent and [Email3] for children.
+  // Check parent doc (Person3).
+  const DocumentProto& result_parent_document_three =
+      page_result.results.at(2).document();
+  const SnippetProto& result_parent_snippet_three =
+      page_result.results.at(2).snippet();
+  EXPECT_THAT(result_parent_document_three,
+              EqualsProto(CreatePersonDocument(/*id=*/3)));
+  ASSERT_THAT(result_parent_snippet_three.entries(), SizeIs(1));
+  EXPECT_THAT(result_parent_snippet_three.entries(0).property_name(),
+              Eq("name"));
+  content = GetString(&result_parent_document_three,
+                      result_parent_snippet_three.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_parent_snippet_three.entries(0)),
+              ElementsAre("person 3"));
+  EXPECT_THAT(GetMatches(content, result_parent_snippet_three.entries(0)),
+              ElementsAre("person"));
+
+  // Check child docs.
+  ASSERT_THAT(page_result.results.at(2).joined_results(), SizeIs(1));
+  // Check Email3.
+  const DocumentProto& result_child_document_three =
+      page_result.results.at(2).joined_results(0).document();
+  const SnippetProto& result_child_snippet_three =
+      page_result.results.at(2).joined_results(0).snippet();
+  EXPECT_THAT(result_child_document_three,
+              EqualsProto(CreateEmailDocument(/*id=*/3)));
+  ASSERT_THAT(result_child_snippet_three.entries(), SizeIs(2));
+  EXPECT_THAT(result_child_snippet_three.entries(0).property_name(),
+              Eq("body"));
+  content = GetString(&result_child_document_three,
+                      result_child_snippet_three.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, result_child_snippet_three.entries(0)),
+              ElementsAre("body bar 3"));
+  EXPECT_THAT(GetMatches(content, result_child_snippet_three.entries(0)),
+              ElementsAre("bar"));
+  EXPECT_THAT(result_child_snippet_three.entries(1).property_name(),
+              Eq("subject"));
+  content = GetString(&result_child_document_three,
+                      result_child_snippet_three.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, result_child_snippet_three.entries(1)),
+              ElementsAre("subject foo 3"));
+  EXPECT_THAT(GetMatches(content, result_child_snippet_three.entries(1)),
+              ElementsAre("foo"));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllJoinedResults) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId person_document_id1,
+      document_store_->Put(CreatePersonDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId person_document_id2,
+      document_store_->Put(CreatePersonDocument(/*id=*/2)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId email_document_id1,
+      document_store_->Put(CreateEmailDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId email_document_id2,
+      document_store_->Put(CreateEmailDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId email_document_id3,
+      document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+  std::vector<SectionId> person_hit_section_ids = {
+      GetSectionId("Person", "name")};
+  std::vector<SectionId> email_hit_section_ids = {
+      GetSectionId("Email", "subject"), GetSectionId("Email", "body")};
+  SectionIdMask person_hit_section_id_mask =
+      CreateSectionIdMask(person_hit_section_ids);
+  SectionIdMask email_hit_section_id_mask =
+      CreateSectionIdMask(email_hit_section_ids);
+
+  ScoredDocumentHit person1_scored_doc_hit(
+      person_document_id1, person_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit person2_scored_doc_hit(
+      person_document_id2, person_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit email1_scored_doc_hit(
+      email_document_id1, email_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit email2_scored_doc_hit(
+      email_document_id2, email_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit email3_scored_doc_hit(
+      email_document_id3, email_hit_section_id_mask, /*score=*/0);
+
+  // Create JoinedScoredDocumentHits mapping:
+  // - Person1 to Email1
+  // - Person2 to Email2, Email3
+  JoinedScoredDocumentHit joined_scored_document_hit1(
+      /*final_score=*/0, /*parent_scored_document_hit=*/person1_scored_doc_hit,
+      /*child_scored_document_hits=*/
+      {email1_scored_doc_hit});
+  JoinedScoredDocumentHit joined_scored_document_hit2(
+      /*final_score=*/0, /*parent_scored_document_hit=*/person2_scored_doc_hit,
+      /*child_scored_document_hits=*/
+      {email2_scored_doc_hit, email3_scored_doc_hit});
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Create parent ResultSpec with custom snippet spec.
+  ResultSpecProto::SnippetSpecProto parent_snippet_spec = CreateSnippetSpec();
+  parent_snippet_spec.set_num_to_snippet(1);
+  ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3);
+  parent_result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+  *parent_result_spec.mutable_snippet_spec() = std::move(parent_snippet_spec);
+
+  // Create child ResultSpec with custom snippet spec.
+  ResultSpecProto::SnippetSpecProto child_snippet_spec = CreateSnippetSpec();
+  child_snippet_spec.set_num_to_snippet(3);
+  ResultSpecProto child_result_spec;
+  *child_result_spec.mutable_snippet_spec() = std::move(child_snippet_spec);
+
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
+          std::vector<JoinedScoredDocumentHit>{joined_scored_document_hit1,
+                                               joined_scored_document_hit2},
+          /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec,
+          schema_store_.get(),
+          SectionRestrictQueryTermsMap({{"", {"person"}}})),
+      /*child_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), child_result_spec,
+          schema_store_.get(),
+          SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+      parent_result_spec, *document_store_);
+
+  // Only 1 parent document should be snippeted, but all of the child documents
+  // should be snippeted.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  // Result1: Person1 for parent and [Email1] for children.
+  // Check parent doc (Person1).
+  EXPECT_THAT(page_result.results.at(0).snippet().entries(), Not(IsEmpty()));
+  // Check child docs.
+  ASSERT_THAT(page_result.results.at(0).joined_results(), SizeIs(1));
+  EXPECT_THAT(page_result.results.at(0).joined_results(0).snippet().entries(),
+              Not(IsEmpty()));
+
+  // Result2: Person2 for parent and [Email2, Email3] for children.
+  // Check parent doc (Person2).
+  EXPECT_THAT(page_result.results.at(1).snippet().entries(), IsEmpty());
+  // Check child docs.
+  ASSERT_THAT(page_result.results.at(1).joined_results(), SizeIs(2));
+  EXPECT_THAT(page_result.results.at(1).joined_results(0).snippet().entries(),
+              Not(IsEmpty()));
+  EXPECT_THAT(page_result.results.at(1).joined_results(1).snippet().entries(),
+              Not(IsEmpty()));
+
+  EXPECT_THAT(page_result.num_results_with_snippets, Eq(1));
+}
+
+TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeJoinedResults) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId person_document_id1,
+      document_store_->Put(CreatePersonDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId person_document_id2,
+      document_store_->Put(CreatePersonDocument(/*id=*/2)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId email_document_id1,
+      document_store_->Put(CreateEmailDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId email_document_id2,
+      document_store_->Put(CreateEmailDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId email_document_id3,
+      document_store_->Put(CreateEmailDocument(/*id=*/3)));
+
+  std::vector<SectionId> person_hit_section_ids = {
+      GetSectionId("Person", "name")};
+  std::vector<SectionId> email_hit_section_ids = {
+      GetSectionId("Email", "subject"), GetSectionId("Email", "body")};
+  SectionIdMask person_hit_section_id_mask =
+      CreateSectionIdMask(person_hit_section_ids);
+  SectionIdMask email_hit_section_id_mask =
+      CreateSectionIdMask(email_hit_section_ids);
+
+  ScoredDocumentHit person1_scored_doc_hit(
+      person_document_id1, person_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit person2_scored_doc_hit(
+      person_document_id2, person_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit email1_scored_doc_hit(
+      email_document_id1, email_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit email2_scored_doc_hit(
+      email_document_id2, email_hit_section_id_mask, /*score=*/0);
+  ScoredDocumentHit email3_scored_doc_hit(
+      email_document_id3, email_hit_section_id_mask, /*score=*/0);
+
+  // Create JoinedScoredDocumentHits mapping:
+  // - Person1 to Email1
+  // - Person2 to Email2, Email3
+  JoinedScoredDocumentHit joined_scored_document_hit1(
+      /*final_score=*/0, /*parent_scored_document_hit=*/person1_scored_doc_hit,
+      /*child_scored_document_hits=*/
+      {email1_scored_doc_hit});
+  JoinedScoredDocumentHit joined_scored_document_hit2(
+      /*final_score=*/0, /*parent_scored_document_hit=*/person2_scored_doc_hit,
+      /*child_scored_document_hits=*/
+      {email2_scored_doc_hit, email3_scored_doc_hit});
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Create parent ResultSpec with custom snippet spec.
+  ResultSpecProto::SnippetSpecProto parent_snippet_spec = CreateSnippetSpec();
+  parent_snippet_spec.set_num_to_snippet(3);
+  ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3);
+  parent_result_spec.set_max_joined_children_per_parent_to_return(
+      std::numeric_limits<int32_t>::max());
+  *parent_result_spec.mutable_snippet_spec() = std::move(parent_snippet_spec);
+
+  // Create child ResultSpec with custom snippet spec.
+  ResultSpecProto::SnippetSpecProto child_snippet_spec = CreateSnippetSpec();
+  child_snippet_spec.set_num_to_snippet(2);
+  ResultSpecProto child_result_spec;
+  *child_result_spec.mutable_snippet_spec() = std::move(child_snippet_spec);
+
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
+          std::vector<JoinedScoredDocumentHit>{joined_scored_document_hit1,
+                                               joined_scored_document_hit2},
+          /*is_descending=*/false),
+      /*parent_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec,
+          schema_store_.get(),
+          SectionRestrictQueryTermsMap({{"", {"person"}}})),
+      /*child_adjustment_info=*/
+      std::make_unique<ResultAdjustmentInfo>(
+          CreateSearchSpec(TermMatchType::EXACT_ONLY),
+          CreateScoringSpec(/*is_descending_order=*/false), child_result_spec,
+          schema_store_.get(),
+          SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
+      parent_result_spec, *document_store_);
+
+  // All parents document should be snippeted. Only 2 child documents should be
+  // snippeted.
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result.results, SizeIs(2));
+
+  // Result1: Person1 for parent and [Email1] for children.
+  // Check parent doc (Person1).
+  EXPECT_THAT(page_result.results.at(0).snippet().entries(), Not(IsEmpty()));
+  // Check child docs.
+  ASSERT_THAT(page_result.results.at(0).joined_results(), SizeIs(1));
+  EXPECT_THAT(page_result.results.at(0).joined_results(0).snippet().entries(),
+              Not(IsEmpty()));
+
+  // Result2: Person2 for parent and [Email2, Email3] for children.
+  // Check parent doc (Person2).
+  EXPECT_THAT(page_result.results.at(1).snippet().entries(), Not(IsEmpty()));
+  // Check child docs.
+  ASSERT_THAT(page_result.results.at(1).joined_results(), SizeIs(2));
+  EXPECT_THAT(page_result.results.at(1).joined_results(0).snippet().entries(),
+              Not(IsEmpty()));
+  EXPECT_THAT(page_result.results.at(1).joined_results(1).snippet().entries(),
+              IsEmpty());
+
+  EXPECT_THAT(page_result.num_results_with_snippets, Eq(2));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/result-retriever-v2_test.cc b/icing/result/result-retriever-v2_test.cc
new file mode 100644
index 0000000..0bd40cc
--- /dev/null
+++ b/icing/result/result-retriever-v2_test.cc
@@ -0,0 +1,1012 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/result-retriever-v2.h"
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/mutex.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/result/page-result.h"
+#include "icing/result/result-state-v2.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::DoDefault;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::IsEmpty;
+using ::testing::Pointee;
+using ::testing::Return;
+using ::testing::SizeIs;
+using EntryIdMap = std::unordered_map<int32_t, int>;
+
+// Mock the behavior of GroupResultLimiter::ShouldBeRemoved.
+class MockGroupResultLimiter : public GroupResultLimiterV2 {
+ public:
+  MockGroupResultLimiter() : GroupResultLimiterV2() {
+    ON_CALL(*this, ShouldBeRemoved).WillByDefault(Return(false));
+  }
+
+  MOCK_METHOD(bool, ShouldBeRemoved,
+              (const ScoredDocumentHit&, const EntryIdMap&,
+               const DocumentStore&, std::vector<int>&,
+               ResultSpecProto::ResultGroupingType, int64_t),
+              (const, override));
+};
+
+class ResultRetrieverV2Test : public ::testing::Test {
+ protected:
+  ResultRetrieverV2Test() : test_dir_(GetTestTempDir() + "/icing") {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  }
+
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+    language_segmenter_factory::SegmenterOptions options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        language_segmenter_,
+        language_segmenter_factory::Create(std::move(options)));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+    ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
+                                                /*max_term_byte_size=*/10000));
+
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder()
+                         .SetType("Email")
+                         .AddProperty(PropertyConfigBuilder()
+                                          .SetName("name")
+                                          .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                             TOKENIZER_PLAIN)
+                                          .SetCardinality(CARDINALITY_OPTIONAL))
+                         .AddProperty(PropertyConfigBuilder()
+                                          .SetName("body")
+                                          .SetDataTypeString(TERM_MATCH_EXACT,
+                                                             TOKENIZER_PLAIN)
+                                          .SetCardinality(CARDINALITY_OPTIONAL))
+                         .AddProperty(
+                             PropertyConfigBuilder()
+                                 .SetName("sender")
+                                 .SetDataTypeDocument(
+                                     "Person", /*index_nested_properties=*/true)
+                                 .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("Person")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("name")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("emailAddress")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    ASSERT_THAT(schema_store_->SetSchema(
+                    schema, /*ignore_errors_and_delete_documents=*/false,
+                    /*allow_circular_schema_definitions=*/false),
+                IsOk());
+
+    num_total_hits_ = 0;
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  SectionId GetSectionId(const std::string& type, const std::string& property) {
+    auto type_id_or = schema_store_->GetSchemaTypeId(type);
+    if (!type_id_or.ok()) {
+      return kInvalidSectionId;
+    }
+    SchemaTypeId type_id = type_id_or.ValueOrDie();
+    for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) {
+      auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id);
+      if (!metadata_or.ok()) {
+        break;
+      }
+      const SectionMetadata* metadata = metadata_or.ValueOrDie();
+      if (metadata->path == property) {
+        return metadata->id;
+      }
+    }
+    return kInvalidSectionId;
+  }
+
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  std::unique_ptr<LanguageSegmenter> language_segmenter_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<Normalizer> normalizer_;
+  std::atomic<int> num_total_hits_;
+  FakeClock fake_clock_;
+};
+
+DocumentProto CreateDocument(int id) {
+  return DocumentBuilder()
+      .SetKey("icing", "Email/" + std::to_string(id))
+      .SetSchema("Email")
+      .AddStringProperty("name", "subject foo " + std::to_string(id))
+      .AddStringProperty("body", "body bar " + std::to_string(id))
+      .SetCreationTimestampMs(1574365086666 + id)
+      .Build();
+}
+
+SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
+  SectionIdMask mask = 0;
+  for (SectionId section_id : section_ids) {
+    mask |= (UINT64_C(1) << section_id);
+  }
+  return mask;
+}
+
+ResultSpecProto CreateResultSpec(
+    int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) {
+  ResultSpecProto result_spec;
+  result_spec.set_result_group_type(result_group_type);
+  result_spec.set_num_per_page(num_per_page);
+  return result_spec;
+}
+
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+    const Filesystem* filesystem, const std::string& base_dir,
+    const Clock* clock, const SchemaStore* schema_store) {
+  return DocumentStore::Create(
+      filesystem, base_dir, clock, schema_store,
+      /*force_recovery_and_revalidate_documents=*/false,
+      /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+      /*use_persistent_hash_map=*/false,
+      PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+      /*initialize_stats=*/nullptr);
+}
+
+TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) {
+  EXPECT_THAT(
+      ResultRetrieverV2::Create(/*doc_store=*/nullptr, schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  EXPECT_THAT(
+      ResultRetrieverV2::Create(doc_store.get(), /*schema_store=*/nullptr,
+                                language_segmenter_.get(), normalizer_.get()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+                                        /*language_segmenter=*/nullptr,
+                                        normalizer_.get()),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+                                        language_segmenter_.get(),
+                                        /*normalizer=*/nullptr),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(CreateDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(CreateDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             doc_store->Put(CreateDocument(/*id=*/3)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             doc_store->Put(CreateDocument(/*id=*/4)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             doc_store->Put(CreateDocument(/*id=*/5)));
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/19},
+      {document_id2, hit_section_id_mask, /*score=*/12},
+      {document_id3, hit_section_id_mask, /*score=*/8},
+      {document_id4, hit_section_id_mask, /*score=*/3},
+      {document_id5, hit_section_id_mask, /*score=*/1}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  SearchResultProto::ResultProto result1;
+  *result1.mutable_document() = CreateDocument(/*id=*/1);
+  result1.set_score(19);
+  SearchResultProto::ResultProto result2;
+  *result2.mutable_document() = CreateDocument(/*id=*/2);
+  result2.set_score(12);
+  SearchResultProto::ResultProto result3;
+  *result3.mutable_document() = CreateDocument(/*id=*/3);
+  result3.set_score(8);
+  SearchResultProto::ResultProto result4;
+  *result4.mutable_document() = CreateDocument(/*id=*/4);
+  result4.set_score(3);
+  SearchResultProto::ResultProto result5;
+  *result5.mutable_document() = CreateDocument(/*id=*/5);
+  result5.set_score(1);
+
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
+      *doc_store);
+
+  // First page, 2 results
+  auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(page_result1.results,
+              ElementsAre(EqualsProto(result1), EqualsProto(result2)));
+  // num_results_with_snippets is 0 when there is no snippet.
+  EXPECT_THAT(page_result1.num_results_with_snippets, Eq(0));
+  // Requested page size is same as num_per_page.
+  EXPECT_THAT(page_result1.requested_page_size, Eq(2));
+  // Has more results.
+  EXPECT_TRUE(has_more_results1);
+
+  // Second page, 2 results
+  auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(page_result2.results,
+              ElementsAre(EqualsProto(result3), EqualsProto(result4)));
+  // num_results_with_snippets is 0 when there is no snippet.
+  EXPECT_THAT(page_result2.num_results_with_snippets, Eq(0));
+  // Requested page size is same as num_per_page.
+  EXPECT_THAT(page_result2.requested_page_size, Eq(2));
+  // Has more results.
+  EXPECT_TRUE(has_more_results2);
+
+  // Third page, 1 result
+  auto [page_result3, has_more_results3] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(page_result3.results, ElementsAre(EqualsProto(result5)));
+  // num_results_with_snippets is 0 when there is no snippet.
+  EXPECT_THAT(page_result3.num_results_with_snippets, Eq(0));
+  // Requested page size is same as num_per_page.
+  EXPECT_THAT(page_result3.requested_page_size, Eq(2));
+  // No more results.
+  EXPECT_FALSE(has_more_results3);
+}
+
+TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(CreateDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(CreateDocument(/*id=*/2)));
+
+  DocumentId invalid_document_id = -1;
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/12},
+      {document_id2, hit_section_id_mask, /*score=*/4},
+      {invalid_document_id, hit_section_id_mask, /*score=*/0}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get(),
+                                std::make_unique<MockGroupResultLimiter>()));
+
+  SearchResultProto::ResultProto result1;
+  *result1.mutable_document() = CreateDocument(/*id=*/1);
+  result1.set_score(12);
+  SearchResultProto::ResultProto result2;
+  *result2.mutable_document() = CreateDocument(/*id=*/2);
+  result2.set_score(4);
+
+  ResultStateV2 result_state1(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits),
+          /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      CreateResultSpec(/*num_per_page=*/3, ResultSpecProto::NAMESPACE),
+      *doc_store);
+  PageResult page_result1 =
+      result_retriever
+          ->RetrieveNextPage(result_state1,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  EXPECT_THAT(page_result1.results,
+              ElementsAre(EqualsProto(result1), EqualsProto(result2)));
+
+  DocumentId non_existing_document_id = 4;
+  scored_document_hits = {
+      {non_existing_document_id, hit_section_id_mask, /*score=*/15},
+      {document_id1, hit_section_id_mask, /*score=*/12},
+      {document_id2, hit_section_id_mask, /*score=*/4}};
+  ResultStateV2 result_state2(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits),
+          /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      CreateResultSpec(/*num_per_page=*/3, ResultSpecProto::NAMESPACE),
+      *doc_store);
+  PageResult page_result2 =
+      result_retriever
+          ->RetrieveNextPage(result_state2,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  EXPECT_THAT(page_result2.results,
+              ElementsAre(EqualsProto(result1), EqualsProto(result2)));
+}
+
+TEST_F(ResultRetrieverV2Test,
+       ShouldLimitNumChildDocumentsByMaxJoinedChildPerParent) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  // 1. Add 2 Person document
+  DocumentProto person_document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "Person/1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Joe Fox")
+          .AddStringProperty("emailAddress", "ny152@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id1,
+                             doc_store->Put(person_document1));
+
+  DocumentProto person_document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "Person/2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Meg Ryan")
+          .AddStringProperty("emailAddress", "shopgirl@aol.com")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id2,
+                             doc_store->Put(person_document2));
+
+  // 2. Add 4 Email documents
+  DocumentProto email_document1 = DocumentBuilder()
+                                      .SetKey("namespace", "Email/1")
+                                      .SetCreationTimestampMs(1000)
+                                      .SetSchema("Email")
+                                      .AddStringProperty("name", "Test 1")
+                                      .AddStringProperty("body", "Test 1")
+                                      .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id1,
+                             doc_store->Put(email_document1));
+
+  DocumentProto email_document2 = DocumentBuilder()
+                                      .SetKey("namespace", "Email/2")
+                                      .SetCreationTimestampMs(1000)
+                                      .SetSchema("Email")
+                                      .AddStringProperty("name", "Test 2")
+                                      .AddStringProperty("body", "Test 2")
+                                      .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id2,
+                             doc_store->Put(email_document2));
+
+  DocumentProto email_document3 = DocumentBuilder()
+                                      .SetKey("namespace", "Email/3")
+                                      .SetCreationTimestampMs(1000)
+                                      .SetSchema("Email")
+                                      .AddStringProperty("name", "Test 3")
+                                      .AddStringProperty("body", "Test 3")
+                                      .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id3,
+                             doc_store->Put(email_document3));
+
+  DocumentProto email_document4 = DocumentBuilder()
+                                      .SetKey("namespace", "Email/4")
+                                      .SetCreationTimestampMs(1000)
+                                      .SetSchema("Email")
+                                      .AddStringProperty("name", "Test 4")
+                                      .AddStringProperty("body", "Test 4")
+                                      .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id4,
+                             doc_store->Put(email_document4));
+
+  // 3. Setup the joined scored results.
+  std::vector<SectionId> person_hit_section_ids = {
+      GetSectionId("Person", "name")};
+  std::vector<SectionId> email_hit_section_ids = {
+      GetSectionId("Email", "name"), GetSectionId("Email", "body")};
+  SectionIdMask person_hit_section_id_mask =
+      CreateSectionIdMask(person_hit_section_ids);
+  SectionIdMask email_hit_section_id_mask =
+      CreateSectionIdMask(email_hit_section_ids);
+
+  ScoredDocumentHit person1_scored_doc_hit(
+      person_document_id1, person_hit_section_id_mask, /*score=*/1);
+  ScoredDocumentHit person2_scored_doc_hit(
+      person_document_id2, person_hit_section_id_mask, /*score=*/2);
+  ScoredDocumentHit email1_scored_doc_hit(
+      email_document_id1, email_hit_section_id_mask, /*score=*/3);
+  ScoredDocumentHit email2_scored_doc_hit(
+      email_document_id2, email_hit_section_id_mask, /*score=*/4);
+  ScoredDocumentHit email3_scored_doc_hit(
+      email_document_id3, email_hit_section_id_mask, /*score=*/5);
+  ScoredDocumentHit email4_scored_doc_hit(
+      email_document_id4, email_hit_section_id_mask, /*score=*/6);
+  // Create JoinedScoredDocumentHits mapping:
+  // - Person1 to Email1
+  // - Person2 to Email2, Email3, Email4
+  std::vector<JoinedScoredDocumentHit> joined_scored_document_hits = {
+      JoinedScoredDocumentHit(
+          /*final_score=*/1,
+          /*parent_scored_document_hit=*/person1_scored_doc_hit,
+          /*child_scored_document_hits=*/{email1_scored_doc_hit}),
+      JoinedScoredDocumentHit(
+          /*final_score=*/3,
+          /*parent_scored_document_hit=*/person2_scored_doc_hit,
+          /*child_scored_document_hits=*/
+          {email4_scored_doc_hit, email3_scored_doc_hit,
+           email2_scored_doc_hit})};
+
+  // 4. Retrieve result with max_joined_children_per_parent_to_return = 2.
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+  result_spec.set_max_joined_children_per_parent_to_return(2);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
+          std::move(joined_scored_document_hits), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *doc_store);
+
+  // Result1: person2 with child docs = [email4, email3]
+  SearchResultProto::ResultProto result1;
+  *result1.mutable_document() = person_document2;
+  result1.set_score(3);
+  SearchResultProto::ResultProto* child1 = result1.add_joined_results();
+  *child1->mutable_document() = email_document4;
+  child1->set_score(6);
+  SearchResultProto::ResultProto* child2 = result1.add_joined_results();
+  *child2->mutable_document() = email_document3;
+  child2->set_score(5);
+
+  // Result2: person1 with child docs = [email1]
+  SearchResultProto::ResultProto result2;
+  *result2.mutable_document() = person_document1;
+  result2.set_score(1);
+  SearchResultProto::ResultProto* child3 = result2.add_joined_results();
+  *child3->mutable_document() = email_document1;
+  child3->set_score(3);
+
+  auto [page_result, has_more_results] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(page_result.results,
+              ElementsAre(EqualsProto(result1), EqualsProto(result2)));
+  // No more results.
+  EXPECT_FALSE(has_more_results);
+}
+
+TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) {
+  MockFilesystem mock_filesystem;
+  EXPECT_CALL(mock_filesystem,
+              PRead(A<int>(), A<void*>(), A<size_t>(), A<off_t>()))
+      .WillOnce(Return(false))
+      .WillRepeatedly(DoDefault());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&mock_filesystem, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(CreateDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(CreateDocument(/*id=*/2)));
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get(),
+                                std::make_unique<MockGroupResultLimiter>()));
+
+  SearchResultProto::ResultProto result1;
+  *result1.mutable_document() = CreateDocument(/*id=*/1);
+  result1.set_score(0);
+
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits),
+          /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
+      *doc_store);
+  PageResult page_result =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  // We mocked mock_filesystem to return an internal error when retrieving doc2,
+  // so doc2 should be skipped and doc1 should still be returned.
+  EXPECT_THAT(page_result.results, ElementsAre(EqualsProto(result1)));
+}
+
+TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(CreateDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(CreateDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             doc_store->Put(CreateDocument(/*id=*/3)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             doc_store->Put(CreateDocument(/*id=*/4)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             doc_store->Put(CreateDocument(/*id=*/5)));
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0},
+      {document_id3, hit_section_id_mask, /*score=*/0},
+      {document_id4, hit_section_id_mask, /*score=*/0},
+      {document_id5, hit_section_id_mask, /*score=*/0}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits),
+          /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
+      *doc_store);
+
+  // First page, 2 results
+  PageResult page_result1 =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result1.results, SizeIs(2));
+  {
+    absl_ports::shared_lock l(&result_state.mutex);
+
+    // num_returned = size of first page
+    EXPECT_THAT(result_state.num_returned, Eq(2));
+    // Should remove the 2 returned docs from scored_document_hits and only
+    // contain the remaining 3.
+    EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(SizeIs(3)));
+  }
+
+  // Second page, 2 results
+  PageResult page_result2 =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result2.results, SizeIs(2));
+  {
+    absl_ports::shared_lock l(&result_state.mutex);
+
+    // num_returned = size of first and second pages
+    EXPECT_THAT(result_state.num_returned, Eq(4));
+    // Should remove the 2 returned docs from scored_document_hits and only
+    // contain the remaining 1.
+    EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(SizeIs(1)));
+  }
+
+  // Third page, 1 result
+  PageResult page_result3 =
+      result_retriever
+          ->RetrieveNextPage(result_state,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result3.results, SizeIs(1));
+  {
+    absl_ports::shared_lock l(&result_state.mutex);
+
+    // num_returned = size of first, second and third pages
+    EXPECT_THAT(result_state.num_returned, Eq(5));
+    // Should remove the 1 returned doc from scored_document_hits and become
+    // empty.
+    EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(IsEmpty()));
+  }
+}
+
+TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(CreateDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(CreateDocument(/*id=*/2)));
+  std::vector<ScoredDocumentHit> scored_document_hits1 = {
+      {document_id1, hit_section_id_mask, /*score=*/0},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+  std::shared_ptr<ResultStateV2> result_state1 =
+      std::make_shared<ResultStateV2>(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1),
+              /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          *doc_store);
+  {
+    absl_ports::unique_lock l(&result_state1->mutex);
+
+    result_state1->RegisterNumTotalHits(&num_total_hits_);
+    ASSERT_THAT(num_total_hits_, Eq(2));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             doc_store->Put(CreateDocument(/*id=*/3)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             doc_store->Put(CreateDocument(/*id=*/4)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             doc_store->Put(CreateDocument(/*id=*/5)));
+  std::vector<ScoredDocumentHit> scored_document_hits2 = {
+      {document_id3, hit_section_id_mask, /*score=*/0},
+      {document_id4, hit_section_id_mask, /*score=*/0},
+      {document_id5, hit_section_id_mask, /*score=*/0}};
+  std::shared_ptr<ResultStateV2> result_state2 =
+      std::make_shared<ResultStateV2>(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2),
+              /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
+          *doc_store);
+  {
+    absl_ports::unique_lock l(&result_state2->mutex);
+
+    result_state2->RegisterNumTotalHits(&num_total_hits_);
+    ASSERT_THAT(num_total_hits_, Eq(5));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  // Should get 1 doc in the first page of result_state1, and num_total_hits
+  // should be decremented by 1.
+  PageResult page_result1 =
+      result_retriever
+          ->RetrieveNextPage(*result_state1,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result1.results, SizeIs(1));
+  EXPECT_THAT(num_total_hits_, Eq(4));
+
+  // Should get 2 docs in the first page of result_state2, and num_total_hits
+  // should be decremented by 2.
+  PageResult page_result2 =
+      result_retriever
+          ->RetrieveNextPage(*result_state2,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result2.results, SizeIs(2));
+  EXPECT_THAT(num_total_hits_, Eq(2));
+
+  // Should get 1 doc in the second page of result_state2 (although num_per_page
+  // is 2, there is only 1 doc left), and num_total_hits should be decremented
+  // by 1.
+  PageResult page_result3 =
+      result_retriever
+          ->RetrieveNextPage(*result_state2,
+                             fake_clock_.GetSystemTimeMilliseconds())
+          .first;
+  ASSERT_THAT(page_result3.results, SizeIs(1));
+  EXPECT_THAT(num_total_hits_, Eq(1));
+
+  // Destruct result_state1. There is 1 doc left, so num_total_hits should be
+  // decremented by 1 when destructing it.
+  result_state1.reset();
+  EXPECT_THAT(num_total_hits_, Eq(0));
+
+  // Destruct result_state2. There is 0 doc left, so num_total_hits should be
+  // unchanged when destructing it.
+  result_state1.reset();
+  EXPECT_THAT(num_total_hits_, Eq(0));
+}
+
+TEST_F(ResultRetrieverV2Test, ShouldLimitNumTotalBytesPerPage) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(CreateDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(CreateDocument(/*id=*/2)));
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/5},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  SearchResultProto::ResultProto result1;
+  *result1.mutable_document() = CreateDocument(/*id=*/1);
+  result1.set_score(5);
+  SearchResultProto::ResultProto result2;
+  *result2.mutable_document() = CreateDocument(/*id=*/2);
+  result2.set_score(0);
+
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+  result_spec.set_num_total_bytes_per_page_threshold(result1.ByteSizeLong());
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits),
+          /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *doc_store);
+
+  // First page. Only result1 should be returned, since its byte size meets
+  // num_total_bytes_per_page_threshold and ResultRetriever should terminate
+  // early even though # of results is still below num_per_page.
+  auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(page_result1.results, ElementsAre(EqualsProto(result1)));
+  // Has more results.
+  EXPECT_TRUE(has_more_results1);
+
+  // Second page, result2.
+  auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(page_result2.results, ElementsAre(EqualsProto(result2)));
+  // No more results.
+  EXPECT_FALSE(has_more_results2);
+}
+
+TEST_F(ResultRetrieverV2Test,
+       ShouldReturnSingleLargeResultAboveNumTotalBytesPerPageThreshold) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(CreateDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(CreateDocument(/*id=*/2)));
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/5},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  SearchResultProto::ResultProto result1;
+  *result1.mutable_document() = CreateDocument(/*id=*/1);
+  result1.set_score(5);
+  SearchResultProto::ResultProto result2;
+  *result2.mutable_document() = CreateDocument(/*id=*/2);
+  result2.set_score(0);
+
+  int threshold = 1;
+  ASSERT_THAT(result1.ByteSizeLong(), Gt(threshold));
+
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+  result_spec.set_num_total_bytes_per_page_threshold(threshold);
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits),
+          /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *doc_store);
+
+  // First page. Should return single result1 even though its byte size exceeds
+  // num_total_bytes_per_page_threshold.
+  auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(page_result1.results, ElementsAre(EqualsProto(result1)));
+  // Has more results.
+  EXPECT_TRUE(has_more_results1);
+
+  // Second page, result2.
+  auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(page_result2.results, ElementsAre(EqualsProto(result2)));
+  // No more results.
+  EXPECT_FALSE(has_more_results2);
+}
+
+TEST_F(ResultRetrieverV2Test,
+       ShouldRetrieveNextResultWhenBelowNumTotalBytesPerPageThreshold) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(CreateDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(CreateDocument(/*id=*/2)));
+
+  std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+                                            GetSectionId("Email", "body")};
+  SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, hit_section_id_mask, /*score=*/5},
+      {document_id2, hit_section_id_mask, /*score=*/0}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ResultRetrieverV2> result_retriever,
+      ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+                                language_segmenter_.get(), normalizer_.get()));
+
+  SearchResultProto::ResultProto result1;
+  *result1.mutable_document() = CreateDocument(/*id=*/1);
+  result1.set_score(5);
+  SearchResultProto::ResultProto result2;
+  *result2.mutable_document() = CreateDocument(/*id=*/2);
+  result2.set_score(0);
+
+  int threshold = result1.ByteSizeLong() + 1;
+  ASSERT_THAT(result1.ByteSizeLong() + result2.ByteSizeLong(), Gt(threshold));
+
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+  result_spec.set_num_total_bytes_per_page_threshold(threshold);
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits),
+          /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, *doc_store);
+
+  // After retrieving result1, total bytes are still below the threshold and #
+  // of results is still below num_per_page, so ResultRetriever should continue
+  // the retrieval process and thus include result2 into this page, even though
+  // finally total bytes of result1 + result2 exceed the threshold.
+  auto [page_result, has_more_results] = result_retriever->RetrieveNextPage(
+      result_state, fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(page_result.results,
+              ElementsAre(EqualsProto(result1), EqualsProto(result2)));
+  // No more results.
+  EXPECT_FALSE(has_more_results);
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/result-retriever.cc b/icing/result/result-retriever.cc
deleted file mode 100644
index f09d834..0000000
--- a/icing/result/result-retriever.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/result/result-retriever.h"
-
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/proto/search.pb.h"
-#include "icing/proto/term.pb.h"
-#include "icing/result/page-result-state.h"
-#include "icing/result/snippet-context.h"
-#include "icing/util/status-macros.h"
-
-namespace icing {
-namespace lib {
-libtextclassifier3::StatusOr<std::unique_ptr<ResultRetriever>>
-ResultRetriever::Create(const DocumentStore* doc_store,
-                        const SchemaStore* schema_store,
-                        const LanguageSegmenter* language_segmenter,
-                        const Normalizer* normalizer,
-                        bool ignore_bad_document_ids) {
-  ICING_RETURN_ERROR_IF_NULL(doc_store);
-  ICING_RETURN_ERROR_IF_NULL(schema_store);
-  ICING_RETURN_ERROR_IF_NULL(language_segmenter);
-
-  ICING_ASSIGN_OR_RETURN(
-      std::unique_ptr<SnippetRetriever> snippet_retriever,
-      SnippetRetriever::Create(schema_store, language_segmenter, normalizer));
-
-  return std::unique_ptr<ResultRetriever>(new ResultRetriever(
-      doc_store, std::move(snippet_retriever), ignore_bad_document_ids));
-}
-
-libtextclassifier3::StatusOr<std::vector<SearchResultProto::ResultProto>>
-ResultRetriever::RetrieveResults(
-    const PageResultState& page_result_state) const {
-  std::vector<SearchResultProto::ResultProto> search_results;
-  search_results.reserve(page_result_state.scored_document_hits.size());
-
-  const SnippetContext& snippet_context = page_result_state.snippet_context;
-  // Calculates how many snippets to return for this page.
-  int remaining_num_to_snippet = snippet_context.snippet_spec.num_to_snippet() -
-                                 page_result_state.num_previously_returned;
-
-  if (remaining_num_to_snippet < 0) {
-    remaining_num_to_snippet = 0;
-  }
-
-  for (const auto& scored_document_hit :
-       page_result_state.scored_document_hits) {
-    libtextclassifier3::StatusOr<DocumentProto> document_or =
-        doc_store_.Get(scored_document_hit.document_id());
-
-    if (!document_or.ok()) {
-      // Internal errors from document store are IO errors, return directly.
-      if (absl_ports::IsInternal(document_or.status())) {
-        return document_or.status();
-      }
-
-      if (ignore_bad_document_ids_) {
-        continue;
-      } else {
-        return document_or.status();
-      }
-    }
-
-    SearchResultProto::ResultProto result;
-    // Add the snippet if requested.
-    if (snippet_context.snippet_spec.num_matches_per_property() > 0 &&
-        remaining_num_to_snippet > search_results.size()) {
-      SnippetProto snippet_proto = snippet_retriever_->RetrieveSnippet(
-          snippet_context.query_terms, snippet_context.match_type,
-          snippet_context.snippet_spec, document_or.ValueOrDie(),
-          scored_document_hit.hit_section_id_mask());
-      *result.mutable_snippet() = std::move(snippet_proto);
-    }
-
-    // Add the document, itself.
-    *result.mutable_document() = std::move(document_or).ValueOrDie();
-    search_results.push_back(std::move(result));
-  }
-  return search_results;
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/result/result-retriever.h b/icing/result/result-retriever.h
deleted file mode 100644
index 6f33eef..0000000
--- a/icing/result/result-retriever.h
+++ /dev/null
@@ -1,95 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_RESULT_RETRIEVER_H_
-#define ICING_RESULT_RETRIEVER_H_
-
-#include <utility>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/proto/search.pb.h"
-#include "icing/proto/term.pb.h"
-#include "icing/query/query-terms.h"
-#include "icing/result/page-result-state.h"
-#include "icing/result/snippet-context.h"
-#include "icing/result/snippet-retriever.h"
-#include "icing/schema/schema-store.h"
-#include "icing/schema/section.h"
-#include "icing/scoring/scored-document-hit.h"
-#include "icing/store/document-id.h"
-#include "icing/store/document-store.h"
-#include "icing/tokenization/language-segmenter.h"
-#include "icing/transform/normalizer.h"
-
-namespace icing {
-namespace lib {
-
-class ResultRetriever {
- public:
-  // Factory function to create a ResultRetriever which does not take ownership
-  // of any input components, and all pointers must refer to valid objects that
-  // outlive the created ResultRetriever instance.
-  //
-  // Returns:
-  //   A ResultRetriever on success
-  //   FAILED_PRECONDITION on any null pointer input
-  static libtextclassifier3::StatusOr<std::unique_ptr<ResultRetriever>> Create(
-      const DocumentStore* doc_store, const SchemaStore* schema_store,
-      const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
-      bool ignore_bad_document_ids = true);
-
-  // Retrieves results (pairs of DocumentProtos and SnippetProtos) with the
-  // given document and snippet information. The expected number of documents to
-  // return is the number of all scored document hits inside PageResultState.
-  // The number of snippets to return is based on the total number of snippets
-  // needed and number of snippets that have already been returned previously
-  // for the same query. The order of results returned is the same as the order
-  // of scored document hits inside PageResultState.
-  //
-  // "ignore_bad_document_ids" from constructor indicates whether to ignore
-  // invalid and non-existing document ids. If it's true, errors on some
-  // document ids will be ignored and valid documents will be returned,
-  // otherwise any error will be returned immediately. Note that IO errors will
-  // always be returned.
-  //
-  // Returns when ignore_bad_document_ids is true:
-  //   A list of ResultProto on success
-  //   INTERNAL_ERROR on IO error
-  //
-  // Returns when ignore_bad_document_ids is false:
-  //   A list of ResultProto on success
-  //   INVALID_ARGUMENT if any document_id < 0
-  //   NOT_FOUND if any doc doesn't exist or has been deleted
-  //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<std::vector<SearchResultProto::ResultProto>>
-  RetrieveResults(const PageResultState& page_result_state) const;
-
- private:
-  explicit ResultRetriever(const DocumentStore* doc_store,
-                           std::unique_ptr<SnippetRetriever> snippet_retriever,
-                           bool ignore_bad_document_ids)
-      : doc_store_(*doc_store),
-        snippet_retriever_(std::move(snippet_retriever)),
-        ignore_bad_document_ids_(ignore_bad_document_ids) {}
-
-  const DocumentStore& doc_store_;
-  std::unique_ptr<SnippetRetriever> snippet_retriever_;
-  const bool ignore_bad_document_ids_;
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_RESULT_RETRIEVER_H_
diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc
deleted file mode 100644
index 36dbfd9..0000000
--- a/icing/result/result-retriever_test.cc
+++ /dev/null
@@ -1,586 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/result/result-retriever.h"
-
-#include <limits>
-#include <memory>
-
-#include "gtest/gtest.h"
-#include "icing/document-builder.h"
-#include "icing/file/mock-filesystem.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
-#include "icing/portable/equals-proto.h"
-#include "icing/proto/document.pb.h"
-#include "icing/proto/schema.pb.h"
-#include "icing/proto/search.pb.h"
-#include "icing/proto/term.pb.h"
-#include "icing/schema/schema-store.h"
-#include "icing/store/document-id.h"
-#include "icing/testing/common-matchers.h"
-#include "icing/testing/fake-clock.h"
-#include "icing/testing/snippet-helpers.h"
-#include "icing/testing/test-data.h"
-#include "icing/testing/tmp-directory.h"
-#include "icing/tokenization/language-segmenter-factory.h"
-#include "icing/transform/normalizer-factory.h"
-#include "icing/transform/normalizer.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-using ::icing::lib::portable_equals_proto::EqualsProto;
-using ::testing::ElementsAre;
-using ::testing::Eq;
-using ::testing::IsEmpty;
-using ::testing::Return;
-using ::testing::SizeIs;
-
-class ResultRetrieverTest : public testing::Test {
- protected:
-  ResultRetrieverTest() : test_dir_(GetTestTempDir() + "/icing") {
-    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
-  }
-
-  void SetUp() override {
-    ICING_ASSERT_OK(
-        // File generated via icu_data_file rule in //icing/BUILD.
-        icu_data_file_helper::SetUpICUDataFile(
-            GetTestFilePath("icing/icu.dat")));
-    ICING_ASSERT_OK_AND_ASSIGN(language_segmenter_,
-                               language_segmenter_factory::Create());
-
-    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                               SchemaStore::Create(&filesystem_, test_dir_));
-    ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
-                                                /*max_term_byte_size=*/10000));
-
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type("email");
-    PropertyConfigProto* prop_config = type_config->add_properties();
-    prop_config->set_property_name("subject");
-    prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
-    prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    prop_config->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::PREFIX);
-    prop_config->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
-    prop_config = type_config->add_properties();
-    prop_config->set_property_name("body");
-    prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
-    prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    prop_config->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    prop_config->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
-    ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
-  }
-
-  void TearDown() override {
-    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
-  }
-
-  const Filesystem filesystem_;
-  const std::string test_dir_;
-  std::unique_ptr<LanguageSegmenter> language_segmenter_;
-  std::unique_ptr<SchemaStore> schema_store_;
-  std::unique_ptr<Normalizer> normalizer_;
-  FakeClock fake_clock_;
-};
-
-ResultSpecProto::SnippetSpecProto CreateSnippetSpec() {
-  ResultSpecProto::SnippetSpecProto snippet_spec;
-  snippet_spec.set_num_to_snippet(std::numeric_limits<int>::max());
-  snippet_spec.set_num_matches_per_property(std::numeric_limits<int>::max());
-  snippet_spec.set_max_window_bytes(1024);
-  return snippet_spec;
-}
-
-DocumentProto CreateDocument(int id) {
-  return DocumentBuilder()
-      .SetKey("icing", "email/" + std::to_string(id))
-      .SetSchema("email")
-      .AddStringProperty("subject", "subject foo " + std::to_string(id))
-      .AddStringProperty("body", "body bar " + std::to_string(id))
-      .SetCreationTimestampMs(1574365086666 + id)
-      .Build();
-}
-
-TEST_F(ResultRetrieverTest, CreationWithNullPointerShouldFail) {
-  EXPECT_THAT(
-      ResultRetriever::Create(/*doc_store=*/nullptr, schema_store_.get(),
-                              language_segmenter_.get(), normalizer_.get()),
-      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                            schema_store_.get()));
-
-  EXPECT_THAT(
-      ResultRetriever::Create(doc_store.get(), /*schema_store=*/nullptr,
-                              language_segmenter_.get(), normalizer_.get()),
-      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-  EXPECT_THAT(ResultRetriever::Create(doc_store.get(), schema_store_.get(),
-                                      /*language_segmenter=*/nullptr,
-                                      normalizer_.get()),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-  EXPECT_THAT(ResultRetriever::Create(doc_store.get(), schema_store_.get(),
-                                      language_segmenter_.get(),
-                                      /*normalizer=*/nullptr),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-}
-
-TEST_F(ResultRetrieverTest, ShouldRetrieveSimpleResults) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                            schema_store_.get()));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             doc_store->Put(CreateDocument(/*id=*/1)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             doc_store->Put(CreateDocument(/*id=*/2)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
-                             doc_store->Put(CreateDocument(/*id=*/3)));
-
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ResultRetriever> result_retriever,
-      ResultRetriever::Create(doc_store.get(), schema_store_.get(),
-                              language_segmenter_.get(), normalizer_.get()));
-
-  SearchResultProto::ResultProto result1;
-  *result1.mutable_document() = CreateDocument(/*id=*/1);
-  SearchResultProto::ResultProto result2;
-  *result2.mutable_document() = CreateDocument(/*id=*/2);
-  SearchResultProto::ResultProto result3;
-  *result3.mutable_document() = CreateDocument(/*id=*/3);
-
-  SnippetContext snippet_context(
-      /*query_terms_in=*/{},
-      ResultSpecProto::SnippetSpecProto::default_instance(),
-      TermMatchType::EXACT_ONLY);
-  PageResultState page_result_state(
-      std::move(scored_document_hits), /*next_page_token_in=*/1,
-      std::move(snippet_context), /*num_previously_returned_in=*/0);
-  EXPECT_THAT(
-      result_retriever->RetrieveResults(page_result_state),
-      IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2),
-                               EqualsProto(result3))));
-}
-
-TEST_F(ResultRetrieverTest, IgnoreErrors) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                            schema_store_.get()));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             doc_store->Put(CreateDocument(/*id=*/1)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             doc_store->Put(CreateDocument(/*id=*/2)));
-
-  DocumentId invalid_document_id = -1;
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {invalid_document_id, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ResultRetriever> result_retriever,
-      ResultRetriever::Create(doc_store.get(), schema_store_.get(),
-                              language_segmenter_.get(), normalizer_.get(),
-                              /*ignore_bad_document_ids=*/true));
-
-  SearchResultProto::ResultProto result1;
-  *result1.mutable_document() = CreateDocument(/*id=*/1);
-  SearchResultProto::ResultProto result2;
-  *result2.mutable_document() = CreateDocument(/*id=*/2);
-
-  SnippetContext snippet_context(
-      /*query_terms_in=*/{},
-      ResultSpecProto::SnippetSpecProto::default_instance(),
-      TermMatchType::EXACT_ONLY);
-  PageResultState page_result_state(
-      std::move(scored_document_hits), /*next_page_token_in=*/1,
-      std::move(snippet_context), /*num_previously_returned_in=*/0);
-  EXPECT_THAT(
-      result_retriever->RetrieveResults(page_result_state),
-      IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2))));
-}
-
-TEST_F(ResultRetrieverTest, NotIgnoreErrors) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                            schema_store_.get()));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             doc_store->Put(CreateDocument(/*id=*/1)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             doc_store->Put(CreateDocument(/*id=*/2)));
-
-  DocumentId invalid_document_id = -1;
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {invalid_document_id, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ResultRetriever> result_retriever,
-      ResultRetriever::Create(doc_store.get(), schema_store_.get(),
-                              language_segmenter_.get(), normalizer_.get(),
-                              /*ignore_bad_document_ids=*/false));
-
-  SnippetContext snippet_context(
-      /*query_terms_in=*/{},
-      ResultSpecProto::SnippetSpecProto::default_instance(),
-      TermMatchType::EXACT_ONLY);
-  PageResultState page_result_state(
-      std::move(scored_document_hits), /*next_page_token_in=*/1,
-      std::move(snippet_context), /*num_previously_returned_in=*/0);
-  EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
-  DocumentId non_existing_document_id = 4;
-  page_result_state.scored_document_hits = {
-      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {non_existing_document_id, /*hit_section_id_mask=*/0b00000011,
-       /*score=*/0}};
-  EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(ResultRetrieverTest, IOErrorShouldReturnInternalError) {
-  MockFilesystem mock_filesystem;
-  ON_CALL(mock_filesystem, OpenForRead(_)).WillByDefault(Return(false));
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&mock_filesystem, test_dir_, &fake_clock_,
-                            schema_store_.get()));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             doc_store->Put(CreateDocument(/*id=*/1)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             doc_store->Put(CreateDocument(/*id=*/2)));
-
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ResultRetriever> result_retriever,
-      ResultRetriever::Create(doc_store.get(), schema_store_.get(),
-                              language_segmenter_.get(), normalizer_.get(),
-                              /*ignore_bad_document_ids=*/true));
-
-  SnippetContext snippet_context(
-      /*query_terms_in=*/{},
-      ResultSpecProto::SnippetSpecProto::default_instance(),
-      TermMatchType::EXACT_ONLY);
-  PageResultState page_result_state(
-      std::move(scored_document_hits), /*next_page_token_in=*/1,
-      std::move(snippet_context), /*num_previously_returned_in=*/0);
-  EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
-              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-}
-
-TEST_F(ResultRetrieverTest, DefaultSnippetSpecShouldDisableSnippeting) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                            schema_store_.get()));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             doc_store->Put(CreateDocument(/*id=*/1)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             doc_store->Put(CreateDocument(/*id=*/2)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
-                             doc_store->Put(CreateDocument(/*id=*/3)));
-
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ResultRetriever> result_retriever,
-      ResultRetriever::Create(doc_store.get(), schema_store_.get(),
-                              language_segmenter_.get(), normalizer_.get()));
-
-  SnippetContext snippet_context(
-      /*query_terms_in=*/{},
-      ResultSpecProto::SnippetSpecProto::default_instance(),
-      TermMatchType::EXACT_ONLY);
-  PageResultState page_result_state(
-      std::move(scored_document_hits), /*next_page_token_in=*/1,
-      std::move(snippet_context), /*num_previously_returned_in=*/0);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<SearchResultProto::ResultProto> results,
-      result_retriever->RetrieveResults(page_result_state));
-  ASSERT_THAT(results, SizeIs(3));
-  EXPECT_THAT(results.at(0).snippet(),
-              EqualsProto(SnippetProto::default_instance()));
-  EXPECT_THAT(results.at(1).snippet(),
-              EqualsProto(SnippetProto::default_instance()));
-  EXPECT_THAT(results.at(2).snippet(),
-              EqualsProto(SnippetProto::default_instance()));
-}
-
-TEST_F(ResultRetrieverTest, SimpleSnippeted) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                            schema_store_.get()));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             doc_store->Put(CreateDocument(/*id=*/1)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             doc_store->Put(CreateDocument(/*id=*/2)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
-                             doc_store->Put(CreateDocument(/*id=*/3)));
-
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ResultRetriever> result_retriever,
-      ResultRetriever::Create(doc_store.get(), schema_store_.get(),
-                              language_segmenter_.get(), normalizer_.get()));
-
-  SnippetContext snippet_context(
-      /*query_terms_in=*/{{"", {"foo", "bar"}}}, CreateSnippetSpec(),
-      TermMatchType::EXACT_ONLY);
-  PageResultState page_result_state(
-      std::move(scored_document_hits), /*next_page_token_in=*/1,
-      std::move(snippet_context), /*num_previously_returned_in=*/0);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<SearchResultProto::ResultProto> result,
-      result_retriever->RetrieveResults(page_result_state));
-  EXPECT_THAT(result, SizeIs(3));
-  EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1)));
-  EXPECT_THAT(
-      GetWindow(result[0].document(), result[0].snippet(), "subject", 0),
-      Eq("subject foo 1"));
-  EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "subject", 0),
-              Eq("foo"));
-  EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
-              Eq("body bar 1"));
-  EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "body", 0),
-              Eq("bar"));
-
-  EXPECT_THAT(result[1].document(), EqualsProto(CreateDocument(/*id=*/2)));
-  EXPECT_THAT(
-      GetWindow(result[1].document(), result[1].snippet(), "subject", 0),
-      Eq("subject foo 2"));
-  EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "subject", 0),
-              Eq("foo"));
-  EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "body", 0),
-              Eq("body bar 2"));
-  EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "body", 0),
-              Eq("bar"));
-
-  EXPECT_THAT(result[2].document(), EqualsProto(CreateDocument(/*id=*/3)));
-  EXPECT_THAT(
-      GetWindow(result[2].document(), result[2].snippet(), "subject", 0),
-      Eq("subject foo 3"));
-  EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "subject", 0),
-              Eq("foo"));
-  EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "body", 0),
-              Eq("body bar 3"));
-  EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "body", 0),
-              Eq("bar"));
-}
-
-TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                            schema_store_.get()));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             doc_store->Put(CreateDocument(/*id=*/1)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             doc_store->Put(CreateDocument(/*id=*/2)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
-                             doc_store->Put(CreateDocument(/*id=*/3)));
-
-  ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
-  snippet_spec.set_num_to_snippet(1);
-
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ResultRetriever> result_retriever,
-      ResultRetriever::Create(doc_store.get(), schema_store_.get(),
-                              language_segmenter_.get(), normalizer_.get()));
-
-  SnippetContext snippet_context(/*query_terms_in=*/{{"", {"foo", "bar"}}},
-                                 snippet_spec, TermMatchType::EXACT_ONLY);
-  PageResultState page_result_state(
-      std::move(scored_document_hits), /*next_page_token_in=*/1,
-      std::move(snippet_context), /*num_previously_returned_in=*/0);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<SearchResultProto::ResultProto> result,
-      result_retriever->RetrieveResults(page_result_state));
-  EXPECT_THAT(result, SizeIs(3));
-  EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1)));
-  EXPECT_THAT(
-      GetWindow(result[0].document(), result[0].snippet(), "subject", 0),
-      Eq("subject foo 1"));
-  EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "subject", 0),
-              Eq("foo"));
-  EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
-              Eq("body bar 1"));
-  EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "body", 0),
-              Eq("bar"));
-
-  EXPECT_THAT(result[1].document(), EqualsProto(CreateDocument(/*id=*/2)));
-  EXPECT_THAT(result[1].snippet(),
-              EqualsProto(SnippetProto::default_instance()));
-
-  EXPECT_THAT(result[2].document(), EqualsProto(CreateDocument(/*id=*/3)));
-  EXPECT_THAT(result[2].snippet(),
-              EqualsProto(SnippetProto::default_instance()));
-}
-
-TEST_F(ResultRetrieverTest, ShouldSnippetAllResults) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                            schema_store_.get()));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             doc_store->Put(CreateDocument(/*id=*/1)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             doc_store->Put(CreateDocument(/*id=*/2)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
-                             doc_store->Put(CreateDocument(/*id=*/3)));
-
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ResultRetriever> result_retriever,
-      ResultRetriever::Create(doc_store.get(), schema_store_.get(),
-                              language_segmenter_.get(), normalizer_.get()));
-
-  ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
-  snippet_spec.set_num_to_snippet(5);
-  SnippetContext snippet_context(
-      /*query_terms_in=*/{{"", {"foo", "bar"}}}, std::move(snippet_spec),
-      TermMatchType::EXACT_ONLY);
-  PageResultState page_result_state(
-      std::move(scored_document_hits), /*next_page_token_in=*/1,
-      std::move(snippet_context), /*num_previously_returned_in=*/0);
-
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<SearchResultProto::ResultProto> result,
-      result_retriever->RetrieveResults(page_result_state));
-  // num_to_snippet = 5, num_previously_returned_in = 0,
-  // We can return 5 - 0 = 5 snippets at most. We're able to return all 3
-  // snippets here.
-  ASSERT_THAT(result, SizeIs(3));
-  EXPECT_THAT(result[0].snippet().entries(), Not(IsEmpty()));
-  EXPECT_THAT(result[1].snippet().entries(), Not(IsEmpty()));
-  EXPECT_THAT(result[2].snippet().entries(), Not(IsEmpty()));
-}
-
-TEST_F(ResultRetrieverTest, ShouldSnippetSomeResults) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                            schema_store_.get()));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             doc_store->Put(CreateDocument(/*id=*/1)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             doc_store->Put(CreateDocument(/*id=*/2)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
-                             doc_store->Put(CreateDocument(/*id=*/3)));
-
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ResultRetriever> result_retriever,
-      ResultRetriever::Create(doc_store.get(), schema_store_.get(),
-                              language_segmenter_.get(), normalizer_.get()));
-
-  ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
-  snippet_spec.set_num_to_snippet(5);
-  SnippetContext snippet_context(
-      /*query_terms_in=*/{{"", {"foo", "bar"}}}, std::move(snippet_spec),
-      TermMatchType::EXACT_ONLY);
-  PageResultState page_result_state(
-      std::move(scored_document_hits), /*next_page_token_in=*/1,
-      std::move(snippet_context), /*num_previously_returned_in=*/3);
-
-  // num_to_snippet = 5, num_previously_returned_in = 3,
-  // We can return 5 - 3 = 2 snippets.
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<SearchResultProto::ResultProto> result,
-      result_retriever->RetrieveResults(page_result_state));
-  ASSERT_THAT(result, SizeIs(3));
-  EXPECT_THAT(result[0].snippet().entries(), Not(IsEmpty()));
-  EXPECT_THAT(result[1].snippet().entries(), Not(IsEmpty()));
-  EXPECT_THAT(result[2].snippet().entries(), IsEmpty());
-}
-
-TEST_F(ResultRetrieverTest, ShouldNotSnippetAnyResults) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
-                            schema_store_.get()));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             doc_store->Put(CreateDocument(/*id=*/1)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             doc_store->Put(CreateDocument(/*id=*/2)));
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
-                             doc_store->Put(CreateDocument(/*id=*/3)));
-
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
-      {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<ResultRetriever> result_retriever,
-      ResultRetriever::Create(doc_store.get(), schema_store_.get(),
-                              language_segmenter_.get(), normalizer_.get()));
-
-  ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
-  snippet_spec.set_num_to_snippet(5);
-  SnippetContext snippet_context(
-      /*query_terms_in=*/{{"", {"foo", "bar"}}}, std::move(snippet_spec),
-      TermMatchType::EXACT_ONLY);
-  PageResultState page_result_state(
-      std::move(scored_document_hits), /*next_page_token_in=*/1,
-      std::move(snippet_context), /*num_previously_returned_in=*/6);
-
-  // num_to_snippet = 5, num_previously_returned_in = 6,
-  // We can't return any snippets for this page.
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::vector<SearchResultProto::ResultProto> result,
-      result_retriever->RetrieveResults(page_result_state));
-  ASSERT_THAT(result, SizeIs(3));
-  EXPECT_THAT(result[0].snippet().entries(), IsEmpty());
-  EXPECT_THAT(result[1].snippet().entries(), IsEmpty());
-  EXPECT_THAT(result[2].snippet().entries(), IsEmpty());
-}
-
-}  // namespace
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/result/result-state-manager.cc b/icing/result/result-state-manager.cc
index e9ae0ab..382f7db 100644
--- a/icing/result/result-state-manager.cc
+++ b/icing/result/result-state-manager.cc
@@ -14,95 +14,134 @@
 
 #include "icing/result/result-state-manager.h"
 
-#include "icing/proto/search.pb.h"
+#include <memory>
+#include <queue>
+#include <utility>
+
+#include "icing/result/page-result.h"
+#include "icing/result/result-adjustment-info.h"
+#include "icing/result/result-retriever-v2.h"
+#include "icing/result/result-state-v2.h"
+#include "icing/scoring/scored-document-hits-ranker.h"
 #include "icing/util/clock.h"
+#include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
 namespace lib {
 
-ResultStateManager::ResultStateManager(int max_hits_per_query,
-                                       int max_result_states)
-    : max_hits_per_query_(max_hits_per_query),
-      max_result_states_(max_result_states),
+ResultStateManager::ResultStateManager(int max_total_hits,
+                                       const DocumentStore& document_store)
+    : document_store_(document_store),
+      max_total_hits_(max_total_hits),
+      num_total_hits_(0),
       random_generator_(GetSteadyTimeNanoseconds()) {}
 
-libtextclassifier3::StatusOr<PageResultState>
-ResultStateManager::RankAndPaginate(ResultState result_state) {
-  if (!result_state.HasMoreResults()) {
-    return absl_ports::InvalidArgumentError("ResultState has no results");
+libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
+ResultStateManager::CacheAndRetrieveFirstPage(
+    std::unique_ptr<ScoredDocumentHitsRanker> ranker,
+    std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info,
+    std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info,
+    const ResultSpecProto& result_spec, const DocumentStore& document_store,
+    const ResultRetrieverV2& result_retriever, int64_t current_time_ms) {
+  if (ranker == nullptr) {
+    return absl_ports::InvalidArgumentError("Should not provide null ranker");
   }
 
-  // Truncates scored document hits so that they don't take up too much space.
-  result_state.TruncateHitsTo(max_hits_per_query_);
-
-  // Gets the number before calling GetNextPage() because num_returned() may
-  // change after returning more results.
-  int num_previously_returned = result_state.num_returned();
-
-  std::vector<ScoredDocumentHit> page_result_document_hits =
-      result_state.GetNextPage();
-
-  if (!result_state.HasMoreResults()) {
+  // Create shared pointer of ResultState.
+  // ResultState should be created by ResultStateManager only.
+  std::shared_ptr<ResultStateV2> result_state = std::make_shared<ResultStateV2>(
+      std::move(ranker), std::move(parent_adjustment_info),
+      std::move(child_adjustment_info), result_spec, document_store);
+
+  // Retrieve docs outside of ResultStateManager critical section.
+  // Will enter ResultState critical section inside ResultRetriever.
+  auto [page_result, has_more_results] =
+      result_retriever.RetrieveNextPage(*result_state, current_time_ms);
+  if (!has_more_results) {
     // No more pages, won't store ResultState, returns directly
-    return PageResultState(
-        std::move(page_result_document_hits), kInvalidNextPageToken,
-        result_state.snippet_context(), num_previously_returned);
+    return std::make_pair(kInvalidNextPageToken, std::move(page_result));
   }
 
-  absl_ports::unique_lock l(&mutex_);
-
   // ResultState has multiple pages, storing it
-  SnippetContext snippet_context_copy = result_state.snippet_context();
-  uint64_t next_page_token = Add(std::move(result_state));
+  int num_hits_to_add = 0;
+  {
+    // ResultState critical section
+    absl_ports::unique_lock l(&result_state->mutex);
+
+    result_state->scored_document_hits_ranker->TruncateHitsTo(max_total_hits_);
+    result_state->RegisterNumTotalHits(&num_total_hits_);
+    num_hits_to_add = result_state->scored_document_hits_ranker->size();
+  }
 
-  return PageResultState(std::move(page_result_document_hits), next_page_token,
-                         std::move(snippet_context_copy),
-                         num_previously_returned);
-}
+  // It is fine to exit ResultState critical section, since it is just created
+  // above and only this thread (this call stack) has access to it. Thus, it
+  // won't be changed during the gap before we enter ResultStateManager critical
+  // section.
+  uint64_t next_page_token = kInvalidNextPageToken;
+  {
+    // ResultStateManager critical section
+    absl_ports::unique_lock l(&mutex_);
+
+    // Remove expired result states first.
+    InternalInvalidateExpiredResultStates(kDefaultResultStateTtlInMs,
+                                          current_time_ms);
+    // Remove states to make room for this new state.
+    RemoveStatesIfNeeded(num_hits_to_add);
+    // Generate a new unique token and add it into result_state_map_.
+    next_page_token = Add(std::move(result_state), current_time_ms);
+  }
 
-uint64_t ResultStateManager::Add(ResultState result_state) {
-  RemoveStatesIfNeeded();
+  return std::make_pair(next_page_token, std::move(page_result));
+}
 
+uint64_t ResultStateManager::Add(std::shared_ptr<ResultStateV2> result_state,
+                                 int64_t current_time_ms) {
   uint64_t new_token = GetUniqueToken();
 
   result_state_map_.emplace(new_token, std::move(result_state));
   // Tracks the insertion order
-  token_queue_.push(new_token);
+  token_queue_.push(std::make_pair(new_token, current_time_ms));
 
   return new_token;
 }
 
-libtextclassifier3::StatusOr<PageResultState> ResultStateManager::GetNextPage(
-    uint64_t next_page_token) {
-  absl_ports::unique_lock l(&mutex_);
-
-  const auto& state_iterator = result_state_map_.find(next_page_token);
-  if (state_iterator == result_state_map_.end()) {
-    return absl_ports::NotFoundError("next_page_token not found");
+libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
+ResultStateManager::GetNextPage(uint64_t next_page_token,
+                                const ResultRetrieverV2& result_retriever,
+                                int64_t current_time_ms) {
+  std::shared_ptr<ResultStateV2> result_state = nullptr;
+  {
+    // ResultStateManager critical section
+    absl_ports::unique_lock l(&mutex_);
+
+    // Remove expired result states before fetching
+    InternalInvalidateExpiredResultStates(kDefaultResultStateTtlInMs,
+                                          current_time_ms);
+
+    const auto& state_iterator = result_state_map_.find(next_page_token);
+    if (state_iterator == result_state_map_.end()) {
+      return absl_ports::NotFoundError("next_page_token not found");
+    }
+    result_state = state_iterator->second;
   }
 
-  int num_returned = state_iterator->second.num_returned();
-  std::vector<ScoredDocumentHit> result_of_page =
-      state_iterator->second.GetNextPage();
-  if (result_of_page.empty()) {
-    // This shouldn't happen, all our active states should contain results, but
-    // a sanity check here in case of any data inconsistency.
-    InternalInvalidateResultState(next_page_token);
-    return absl_ports::NotFoundError(
-        "No more results, token has been invalidated.");
-  }
+  // Retrieve docs outside of ResultStateManager critical section.
+  // Will enter ResultState critical section inside ResultRetriever.
+  auto [page_result, has_more_results] =
+      result_retriever.RetrieveNextPage(*result_state, current_time_ms);
 
-  // Copies the SnippetContext in case the ResultState is invalidated.
-  SnippetContext snippet_context_copy =
-      state_iterator->second.snippet_context();
+  if (!has_more_results) {
+    {
+      // ResultStateManager critical section
+      absl_ports::unique_lock l(&mutex_);
 
-  if (!state_iterator->second.HasMoreResults()) {
-    InternalInvalidateResultState(next_page_token);
-  }
+      InternalInvalidateResultState(next_page_token);
+    }
 
-  return PageResultState(result_of_page, next_page_token,
-                         std::move(snippet_context_copy), num_returned);
+    next_page_token = kInvalidNextPageToken;
+  }
+  return std::make_pair(next_page_token, std::move(page_result));
 }
 
 void ResultStateManager::InvalidateResultState(uint64_t next_page_token) {
@@ -117,10 +156,16 @@ void ResultStateManager::InvalidateResultState(uint64_t next_page_token) {
 
 void ResultStateManager::InvalidateAllResultStates() {
   absl_ports::unique_lock l(&mutex_);
+  InternalInvalidateAllResultStates();
+}
 
+void ResultStateManager::InternalInvalidateAllResultStates() {
+  // We don't have to reset num_total_hits_ (to 0) here, since clearing
+  // result_state_map_ will "eventually" invoke the destructor of ResultState
+  // (which decrements num_total_hits_) and num_total_hits_ will become 0.
   result_state_map_.clear();
   invalidated_token_set_.clear();
-  token_queue_ = {};
+  token_queue_ = std::queue<std::pair<uint64_t, int64_t>>();
 }
 
 uint64_t ResultStateManager::GetUniqueToken() {
@@ -136,24 +181,41 @@ uint64_t ResultStateManager::GetUniqueToken() {
   return new_token;
 }
 
-void ResultStateManager::RemoveStatesIfNeeded() {
+void ResultStateManager::RemoveStatesIfNeeded(int num_hits_to_add) {
   if (result_state_map_.empty() || token_queue_.empty()) {
     return;
   }
 
-  // Removes any tokens that were previously invalidated.
+  // 1. Check if this new result_state would take up the entire result state
+  // manager budget.
+  if (num_hits_to_add > max_total_hits_) {
+    // This single result state will exceed our budget. Drop everything else to
+    // accomodate it.
+    InternalInvalidateAllResultStates();
+    return;
+  }
+
+  // 2. Remove any tokens that were previously invalidated.
   while (!token_queue_.empty() &&
-         invalidated_token_set_.find(token_queue_.front()) !=
+         invalidated_token_set_.find(token_queue_.front().first) !=
              invalidated_token_set_.end()) {
-    invalidated_token_set_.erase(token_queue_.front());
+    invalidated_token_set_.erase(token_queue_.front().first);
     token_queue_.pop();
   }
 
-  // Removes the oldest state
-  if (result_state_map_.size() >= max_result_states_ && !token_queue_.empty()) {
-    result_state_map_.erase(token_queue_.front());
+  // 3. If we're over budget, remove states from oldest to newest until we fit
+  // into our budget.
+  // Note: num_total_hits_ may not be decremented immediately after invalidating
+  // a result state, since other threads may still hold the shared pointer.
+  // Thus, we have to check if token_queue_ is empty or not, since it is
+  // possible that num_total_hits_ is non-zero and still greater than
+  // max_total_hits_ when token_queue_ is empty. Still "eventually" it will be
+  // decremented after the last thread releases the shared pointer.
+  while (!token_queue_.empty() && num_total_hits_ > max_total_hits_) {
+    InternalInvalidateResultState(token_queue_.front().first);
     token_queue_.pop();
   }
+  invalidated_token_set_.clear();
 }
 
 void ResultStateManager::InternalInvalidateResultState(uint64_t token) {
@@ -161,10 +223,35 @@ void ResultStateManager::InternalInvalidateResultState(uint64_t token) {
   // invalidated_token_set_. The entry in token_queue_ can't be easily removed
   // right now (may need O(n) time), so we leave it there and later completely
   // remove the token in RemoveStatesIfNeeded().
-  if (result_state_map_.erase(token) > 0) {
+  auto itr = result_state_map_.find(token);
+  if (itr != result_state_map_.end()) {
+    // We don't have to decrement num_total_hits_ here, since erasing the shared
+    // ptr instance will "eventually" invoke the destructor of ResultState and
+    // it will handle this.
+    result_state_map_.erase(itr);
     invalidated_token_set_.insert(token);
   }
 }
 
+void ResultStateManager::InternalInvalidateExpiredResultStates(
+    int64_t result_state_ttl, int64_t current_time_ms) {
+  while (!token_queue_.empty() &&
+         current_time_ms - token_queue_.front().second >= result_state_ttl) {
+    auto itr = result_state_map_.find(token_queue_.front().first);
+    if (itr != result_state_map_.end()) {
+      // We don't have to decrement num_total_hits_ here, since erasing the
+      // shared ptr instance will "eventually" invoke the destructor of
+      // ResultState and it will handle this.
+      result_state_map_.erase(itr);
+    } else {
+      // Since result_state_map_ and invalidated_token_set_ are mutually
+      // exclusive, we remove the token from invalidated_token_set_ only if it
+      // isn't present in result_state_map_.
+      invalidated_token_set_.erase(token_queue_.front().first);
+    }
+    token_queue_.pop();
+  }
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/result/result-state-manager.h b/icing/result/result-state-manager.h
index eaf9eb5..a64ae2c 100644
--- a/icing/result/result-state-manager.h
+++ b/icing/result/result-state-manager.h
@@ -15,6 +15,8 @@
 #ifndef ICING_RESULT_RESULT_STATE_MANAGER_H_
 #define ICING_RESULT_RESULT_STATE_MANAGER_H_
 
+#include <atomic>
+#include <memory>
 #include <queue>
 #include <random>
 #include <unordered_map>
@@ -22,10 +24,13 @@
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/mutex.h"
-#include "icing/proto/scoring.pb.h"
 #include "icing/proto/search.pb.h"
-#include "icing/result/page-result-state.h"
-#include "icing/result/result-state.h"
+#include "icing/result/page-result.h"
+#include "icing/result/result-adjustment-info.h"
+#include "icing/result/result-retriever-v2.h"
+#include "icing/result/result-state-v2.h"
+#include "icing/scoring/scored-document-hits-ranker.h"
+#include "icing/util/clock.h"
 
 namespace icing {
 namespace lib {
@@ -34,38 +39,62 @@ namespace lib {
 // SearchResultProto.next_page_token.
 inline constexpr uint64_t kInvalidNextPageToken = 0;
 
+// 1 hr as the default ttl for a ResultState after being pushed into
+// token_queue_.
+inline constexpr int64_t kDefaultResultStateTtlInMs = 1LL * 60 * 60 * 1000;
+
 // Used to store and manage ResultState.
 class ResultStateManager {
  public:
-  explicit ResultStateManager(int max_hits_per_query, int max_result_states);
+  explicit ResultStateManager(int max_total_hits,
+                              const DocumentStore& document_store);
 
   ResultStateManager(const ResultStateManager&) = delete;
   ResultStateManager& operator=(const ResultStateManager&) = delete;
 
-  // Ranks the results and returns the first page of them. The result object
-  // PageResultState contains a next_page_token which can be used to fetch more
-  // pages later. It will be set to a default value 0 if there're no more pages.
+  // Creates a new result state, retrieves and returns PageResult for the first
+  // page. Also caches the new result state and returns a next_page_token which
+  // can be used to fetch more pages from the same result state later. Before
+  // caching the result state, adjusts (truncate) the size and evicts some old
+  // result states if exceeding the cache size limit. next_page_token will be
+  // set to a default value kInvalidNextPageToken if there're no more pages.
+  //
+  // NOTE: parent_adjustment_info and child_adjustment_info can be nullptr if
+  //       there is no requirement to apply adjustment (snippet, projection) to
+  //       them.
   //
-  // NOTE: it's caller's responsibility not to call this method with the same
-  // ResultState more than once, otherwise duplicate states will be stored
-  // internally.
+  // NOTE: it is possible to have empty result for the first page even if the
+  //       ranker was not empty before the retrieval, since GroupResultLimiter
+  //       may filter out all docs. In this case, the first page is also the
+  //       last page and next_page_token will be set to kInvalidNextPageToken.
   //
   // Returns:
-  //   A PageResultState on success
-  //   INVALID_ARGUMENT if the input state contains no results
-  libtextclassifier3::StatusOr<PageResultState> RankAndPaginate(
-      ResultState result_state) ICING_LOCKS_EXCLUDED(mutex_);
+  //   A token and PageResult wrapped by std::pair on success
+  //   INVALID_ARGUMENT if the input ranker is null or contains no results
+  libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
+  CacheAndRetrieveFirstPage(
+      std::unique_ptr<ScoredDocumentHitsRanker> ranker,
+      std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info,
+      std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info,
+      const ResultSpecProto& result_spec, const DocumentStore& document_store,
+      const ResultRetrieverV2& result_retriever, int64_t current_time_ms)
+      ICING_LOCKS_EXCLUDED(mutex_);
 
-  // Retrieves and returns the next page of results wrapped in PageResultState.
+  // Retrieves and returns PageResult for the next page.
   // The returned results won't exist in ResultStateManager anymore. If the
   // query has no more pages after this retrieval, the input token will be
   // invalidated.
   //
+  // NOTE: it is possible to have empty result for the last page even if the
+  //       ranker was not empty before the retrieval, since GroupResultLimiter
+  //       may filtered out all remaining docs.
+  //
   // Returns:
-  //   PageResultState on success, guaranteed to have non-empty results
+  //   A token and PageResult wrapped by std::pair on success
   //   NOT_FOUND if failed to find any more results
-  libtextclassifier3::StatusOr<PageResultState> GetNextPage(
-      uint64_t next_page_token) ICING_LOCKS_EXCLUDED(mutex_);
+  libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>> GetNextPage(
+      uint64_t next_page_token, const ResultRetrieverV2& result_retriever,
+      int64_t current_time_ms) ICING_LOCKS_EXCLUDED(mutex_);
 
   // Invalidates the result state associated with the given next-page token.
   void InvalidateResultState(uint64_t next_page_token)
@@ -74,23 +103,30 @@ class ResultStateManager {
   // Invalidates all result states / tokens currently in ResultStateManager.
   void InvalidateAllResultStates() ICING_LOCKS_EXCLUDED(mutex_);
 
+  int num_total_hits() const { return num_total_hits_; }
+
  private:
   absl_ports::shared_mutex mutex_;
 
-  // The maximum number of scored document hits to return for a query. When we
-  // have more than the maximum number, extra hits will be truncated.
-  const int max_hits_per_query_;
+  const DocumentStore& document_store_;
+
+  // The maximum number of scored document hits that all result states may
+  // have. When a new result state is added such that num_total_hits_ would
+  // exceed max_total_hits_, the oldest result states are evicted until
+  // num_total_hits_ is below max_total_hits.
+  const int max_total_hits_;
 
-  // The maximum number of result states. When we have more than the maximum
-  // number, the oldest / firstly added result state will be removed.
-  const int max_result_states_;
+  // The number of scored document hits that all result states currently held by
+  // the result state manager have.
+  std::atomic<int> num_total_hits_;
 
   // A hash map of (next-page token -> result state)
-  std::unordered_map<uint64_t, ResultState> result_state_map_
+  std::unordered_map<uint64_t, std::shared_ptr<ResultStateV2>> result_state_map_
       ICING_GUARDED_BY(mutex_);
 
-  // A queue used to track the insertion order of tokens
-  std::queue<uint64_t> token_queue_ ICING_GUARDED_BY(mutex_);
+  // A queue used to track the insertion order of tokens with pushed timestamps.
+  std::queue<std::pair<uint64_t, int64_t>> token_queue_
+      ICING_GUARDED_BY(mutex_);
 
   // A set to temporarily store the invalidated tokens before they're finally
   // removed from token_queue_. We store the invalidated tokens to ensure the
@@ -105,20 +141,37 @@ class ResultStateManager {
   // currently valid tokens. When the maximum number of result states is
   // reached, the oldest / firstly added result state will be removed to make
   // room for the new state.
-  uint64_t Add(ResultState result_state) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+  uint64_t Add(std::shared_ptr<ResultStateV2> result_state,
+               int64_t current_time_ms) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Helper method to generate a next-page token that is unique among all
   // existing tokens in token_queue_.
   uint64_t GetUniqueToken() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
-  // Helper method to remove old states to make room for incoming states.
-  void RemoveStatesIfNeeded() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+  // Helper method to remove old states to make room for incoming states with
+  // size num_hits_to_add.
+  void RemoveStatesIfNeeded(int num_hits_to_add)
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Helper method to remove a result state from result_state_map_, the token
   // will then be temporarily kept in invalidated_token_set_ until it's finally
   // removed from token_queue_.
   void InternalInvalidateResultState(uint64_t token)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Internal method to invalidate all result states / tokens currently in
+  // ResultStateManager. We need this separate method so that other public
+  // methods don't need to call InvalidateAllResultStates(). Public methods
+  // calling each other may cause deadlock issues.
+  void InternalInvalidateAllResultStates()
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Internal method to invalidate and remove expired result states / tokens
+  // currently in ResultStateManager that were created before
+  // current_time - result_state_ttl.
+  void InternalInvalidateExpiredResultStates(int64_t result_state_ttl,
+                                             int64_t current_time_ms)
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 };
 
 }  // namespace lib
diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc
index 6defa6f..75d1d93 100644
--- a/icing/result/result-state-manager_test.cc
+++ b/icing/result/result-state-manager_test.cc
@@ -14,22 +14,39 @@
 
 #include "icing/result/result-state-manager.h"
 
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
 #include "icing/portable/equals-proto.h"
+#include "icing/result/page-result.h"
+#include "icing/result/result-adjustment-info.h"
+#include "icing/result/result-retriever-v2.h"
+#include "icing/schema/schema-store.h"
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+#include "icing/scoring/scored-document-hits-ranker.h"
+#include "icing/store/document-store.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+#include "unicode/uloc.h"
 
 namespace icing {
 namespace lib {
 namespace {
+
 using ::icing::lib::portable_equals_proto::EqualsProto;
-using ::testing::ElementsAre;
 using ::testing::Eq;
-using ::testing::Gt;
 using ::testing::IsEmpty;
-
-ScoredDocumentHit CreateScoredDocumentHit(DocumentId document_id) {
-  return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1);
-}
+using ::testing::Not;
+using ::testing::SizeIs;
+using PageResultInfo = std::pair<uint64_t, PageResult>;
 
 ScoringSpecProto CreateScoringSpec() {
   ScoringSpecProto scoring_spec;
@@ -37,441 +54,1521 @@ ScoringSpecProto CreateScoringSpec() {
   return scoring_spec;
 }
 
-ResultSpecProto CreateResultSpec(int num_per_page) {
+ResultSpecProto CreateResultSpec(
+    int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) {
   ResultSpecProto result_spec;
+  result_spec.set_result_group_type(result_group_type);
   result_spec.set_num_per_page(num_per_page);
   return result_spec;
 }
 
-ResultState CreateResultState(
-    const std::vector<ScoredDocumentHit>& scored_document_hits,
-    int num_per_page) {
-  return ResultState(scored_document_hits, /*query_terms=*/{},
-                     SearchSpecProto::default_instance(), CreateScoringSpec(),
-                     CreateResultSpec(num_per_page));
+DocumentProto CreateDocument(int id) {
+  return DocumentBuilder()
+      .SetNamespace("namespace")
+      .SetUri(std::to_string(id))
+      .SetSchema("Document")
+      .SetCreationTimestampMs(1574365086666 + id)
+      .SetScore(1)
+      .Build();
 }
 
-TEST(ResultStateManagerTest, ShouldRankAndPaginateOnePage) {
-  ResultState original_result_state =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2),
-                         CreateScoredDocumentHit(/*document_id=*/3)},
-                        /*num_per_page=*/10);
+class ResultStateManagerTest : public testing::Test {
+ protected:
+  ResultStateManagerTest() : test_dir_(GetTestTempDir() + "/icing") {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  }
+
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    clock_ = std::make_unique<FakeClock>();
+
+    language_segmenter_factory::SegmenterOptions options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        language_segmenter_,
+        language_segmenter_factory::Create(std::move(options)));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, clock_.get()));
+    SchemaProto schema;
+    schema.add_types()->set_schema_type("Document");
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
+                                                /*max_term_byte_size=*/10000));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult result,
+        DocumentStore::Create(
+            &filesystem_, test_dir_, clock_.get(), schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    document_store_ = std::move(result.document_store);
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        result_retriever_, ResultRetrieverV2::Create(
+                               document_store_.get(), schema_store_.get(),
+                               language_segmenter_.get(), normalizer_.get()));
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+    clock_.reset();
+  }
+
+  std::pair<ScoredDocumentHit, DocumentProto> AddScoredDocument(
+      DocumentId document_id) {
+    DocumentProto document;
+    document.set_namespace_("namespace");
+    document.set_uri(std::to_string(document_id));
+    document.set_schema("Document");
+    document.set_creation_timestamp_ms(1574365086666 + document_id);
+    document_store_->Put(document);
+    return std::make_pair(
+        ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1),
+        std::move(document));
+  }
+
+  std::pair<std::vector<ScoredDocumentHit>, std::vector<DocumentProto>>
+  AddScoredDocuments(const std::vector<DocumentId>& document_ids) {
+    std::vector<ScoredDocumentHit> scored_document_hits;
+    std::vector<DocumentProto> document_protos;
+
+    for (DocumentId document_id : document_ids) {
+      std::pair<ScoredDocumentHit, DocumentProto> pair =
+          AddScoredDocument(document_id);
+      scored_document_hits.emplace_back(std::move(pair.first));
+      document_protos.emplace_back(std::move(pair.second));
+    }
+
+    std::reverse(document_protos.begin(), document_protos.end());
+
+    return std::make_pair(std::move(scored_document_hits),
+                          std::move(document_protos));
+  }
+
+  FakeClock* clock() { return clock_.get(); }
+  const FakeClock* clock() const { return clock_.get(); }
+
+  DocumentStore& document_store() { return *document_store_; }
+  const DocumentStore& document_store() const { return *document_store_; }
+
+  SchemaStore& schema_store() { return *schema_store_; }
+  const SchemaStore& schema_store() const { return *schema_store_; }
+
+  const ResultRetrieverV2& result_retriever() const {
+    return *result_retriever_;
+  }
+
+ private:
+  Filesystem filesystem_;
+  const std::string test_dir_;
+  std::unique_ptr<FakeClock> clock_;
+  std::unique_ptr<LanguageSegmenter> language_segmenter_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<Normalizer> normalizer_;
+  std::unique_ptr<DocumentStore> document_store_;
+  std::unique_ptr<ResultRetrieverV2> result_retriever_;
+};
+
+TEST_F(ResultStateManagerTest, ShouldCacheAndRetrieveFirstPageOnePage) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store().Put(CreateDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store().Put(CreateDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store().Put(CreateDocument(/*id=*/3)));
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, kSectionIdMaskNone, /*score=*/1},
+      {document_id2, kSectionIdMaskNone, /*score=*/1},
+      {document_id3, kSectionIdMaskNone, /*score=*/1}};
+  std::unique_ptr<ScoredDocumentHitsRanker> ranker = std::make_unique<
+      PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+      std::move(scored_document_hits), /*is_descending=*/true);
 
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
-  ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state,
-      result_state_manager.RankAndPaginate(std::move(original_result_state)));
-
-  EXPECT_THAT(page_result_state.next_page_token, Eq(kInvalidNextPageToken));
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
 
-  // Should get the original scored document hits
-  EXPECT_THAT(
-      page_result_state.scored_document_hits,
-      ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/1))));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::move(ranker), /*parent_adjustment_info=*/nullptr,
+          /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/10, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  EXPECT_THAT(page_result_info.first, Eq(kInvalidNextPageToken));
+
+  // Should get docs.
+  ASSERT_THAT(page_result_info.second.results, SizeIs(3));
+  EXPECT_THAT(page_result_info.second.results.at(0).document(),
+              EqualsProto(CreateDocument(/*id=*/3)));
+  EXPECT_THAT(page_result_info.second.results.at(1).document(),
+              EqualsProto(CreateDocument(/*id=*/2)));
+  EXPECT_THAT(page_result_info.second.results.at(2).document(),
+              EqualsProto(CreateDocument(/*id=*/1)));
 }
 
-TEST(ResultStateManagerTest, ShouldRankAndPaginateMultiplePages) {
-  ResultState original_result_state =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2),
-                         CreateScoredDocumentHit(/*document_id=*/3),
-                         CreateScoredDocumentHit(/*document_id=*/4),
-                         CreateScoredDocumentHit(/*document_id=*/5)},
-                        /*num_per_page=*/2);
+TEST_F(ResultStateManagerTest, ShouldCacheAndRetrieveFirstPageMultiplePages) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store().Put(CreateDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store().Put(CreateDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store().Put(CreateDocument(/*id=*/3)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             document_store().Put(CreateDocument(/*id=*/4)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             document_store().Put(CreateDocument(/*id=*/5)));
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, kSectionIdMaskNone, /*score=*/1},
+      {document_id2, kSectionIdMaskNone, /*score=*/1},
+      {document_id3, kSectionIdMaskNone, /*score=*/1},
+      {document_id4, kSectionIdMaskNone, /*score=*/1},
+      {document_id5, kSectionIdMaskNone, /*score=*/1}};
+  std::unique_ptr<ScoredDocumentHitsRanker> ranker = std::make_unique<
+      PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+      std::move(scored_document_hits), /*is_descending=*/true);
 
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
 
   // First page, 2 results
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state1,
-      result_state_manager.RankAndPaginate(std::move(original_result_state)));
-  EXPECT_THAT(
-      page_result_state1.scored_document_hits,
-      ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4))));
-
-  uint64_t next_page_token = page_result_state1.next_page_token;
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::move(ranker), /*parent_adjustment_info=*/nullptr,
+          /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+  EXPECT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken)));
+  ASSERT_THAT(page_result_info1.second.results, SizeIs(2));
+  EXPECT_THAT(page_result_info1.second.results.at(0).document(),
+              EqualsProto(CreateDocument(/*id=*/5)));
+  EXPECT_THAT(page_result_info1.second.results.at(1).document(),
+              EqualsProto(CreateDocument(/*id=*/4)));
+
+  uint64_t next_page_token = page_result_info1.first;
 
   // Second page, 2 results
-  ICING_ASSERT_OK_AND_ASSIGN(PageResultState page_result_state2,
-                             result_state_manager.GetNextPage(next_page_token));
-  EXPECT_THAT(
-      page_result_state2.scored_document_hits,
-      ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2))));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info2,
+      result_state_manager.GetNextPage(next_page_token, result_retriever(),
+                                       clock()->GetSystemTimeMilliseconds()));
+  EXPECT_THAT(page_result_info2.first, Eq(next_page_token));
+  ASSERT_THAT(page_result_info2.second.results, SizeIs(2));
+  EXPECT_THAT(page_result_info2.second.results.at(0).document(),
+              EqualsProto(CreateDocument(/*id=*/3)));
+  EXPECT_THAT(page_result_info2.second.results.at(1).document(),
+              EqualsProto(CreateDocument(/*id=*/2)));
 
   // Third page, 1 result
-  ICING_ASSERT_OK_AND_ASSIGN(PageResultState page_result_state3,
-                             result_state_manager.GetNextPage(next_page_token));
-  EXPECT_THAT(page_result_state3.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(
-                  CreateScoredDocumentHit(/*document_id=*/1))));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info3,
+      result_state_manager.GetNextPage(next_page_token, result_retriever(),
+                                       clock()->GetSystemTimeMilliseconds()));
+  EXPECT_THAT(page_result_info3.first, Eq(kInvalidNextPageToken));
+  ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info3.second.results.at(0).document(),
+              EqualsProto(CreateDocument(/*id=*/1)));
 
   // No results
-  EXPECT_THAT(result_state_manager.GetNextPage(next_page_token),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(next_page_token, result_retriever(),
+                                       clock()->GetSystemTimeMilliseconds()),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST(ResultStateManagerTest, EmptyStateShouldReturnError) {
-  ResultState empty_result_state = CreateResultState({}, /*num_per_page=*/1);
-
+TEST_F(ResultStateManagerTest, NullRankerShouldReturnError) {
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+
   EXPECT_THAT(
-      result_state_manager.RankAndPaginate(std::move(empty_result_state)),
+      result_state_manager.CacheAndRetrieveFirstPage(
+          /*ranker=*/nullptr, /*parent_adjustment_info=*/nullptr,
+          /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()),
       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST(ResultStateManagerTest, ShouldInvalidateOneToken) {
-  ResultState result_state1 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2),
-                         CreateScoredDocumentHit(/*document_id=*/3)},
-                        /*num_per_page=*/1);
-  ResultState result_state2 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/4),
-                         CreateScoredDocumentHit(/*document_id=*/5),
-                         CreateScoredDocumentHit(/*document_id=*/6)},
-                        /*num_per_page=*/1);
+TEST_F(ResultStateManagerTest, EmptyRankerShouldReturnEmptyFirstPage) {
+  ResultStateManager result_state_manager(
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::vector<ScoredDocumentHit>(), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  EXPECT_THAT(page_result_info.first, Eq(kInvalidNextPageToken));
+  EXPECT_THAT(page_result_info.second.results, IsEmpty());
+}
+
+TEST_F(ResultStateManagerTest, ShouldAllowEmptyFirstPage) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store().Put(CreateDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store().Put(CreateDocument(/*id=*/2)));
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, kSectionIdMaskNone, /*score=*/1},
+      {document_id2, kSectionIdMaskNone, /*score=*/1}};
 
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+
+  // Create a ResultSpec that limits "namespace" to 0 results.
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(0);
+  entry->set_namespace_("namespace");
+
+  // First page, no result.
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state1,
-      result_state_manager.RankAndPaginate(std::move(result_state1)));
+      PageResultInfo page_result_info,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          result_spec, document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+  // If the first page has no result, then it should be the last page.
+  EXPECT_THAT(page_result_info.first, Eq(kInvalidNextPageToken));
+  EXPECT_THAT(page_result_info.second.results, IsEmpty());
+}
+
+TEST_F(ResultStateManagerTest, ShouldAllowEmptyLastPage) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store().Put(CreateDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store().Put(CreateDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store().Put(CreateDocument(/*id=*/3)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             document_store().Put(CreateDocument(/*id=*/4)));
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, kSectionIdMaskNone, /*score=*/1},
+      {document_id2, kSectionIdMaskNone, /*score=*/1},
+      {document_id3, kSectionIdMaskNone, /*score=*/1},
+      {document_id4, kSectionIdMaskNone, /*score=*/1}};
+
+  ResultStateManager result_state_manager(
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+
+  // Create a ResultSpec that limits "namespace" to 2 results.
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(2);
+  entry->set_namespace_("namespace");
+
+  // First page, 2 results.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          result_spec, document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+  EXPECT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken)));
+  ASSERT_THAT(page_result_info1.second.results, SizeIs(2));
+  EXPECT_THAT(page_result_info1.second.results.at(0).document(),
+              EqualsProto(CreateDocument(/*id=*/4)));
+  EXPECT_THAT(page_result_info1.second.results.at(1).document(),
+              EqualsProto(CreateDocument(/*id=*/3)));
+
+  uint64_t next_page_token = page_result_info1.first;
+
+  // Second page, all remaining documents will be filtered out by group result
+  // limiter, so we should get an empty page.
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state2,
-      result_state_manager.RankAndPaginate(std::move(result_state2)));
+      PageResultInfo page_result_info2,
+      result_state_manager.GetNextPage(next_page_token, result_retriever(),
+                                       clock()->GetSystemTimeMilliseconds()));
+  EXPECT_THAT(page_result_info2.first, Eq(kInvalidNextPageToken));
+  EXPECT_THAT(page_result_info2.second.results, IsEmpty());
+}
 
-  result_state_manager.InvalidateResultState(
-      page_result_state1.next_page_token);
+TEST_F(ResultStateManagerTest,
+       ShouldInvalidateExpiredTokensWhenCacheAndRetrieveFirstPage) {
+  auto [scored_document_hits1, document_protos1] = AddScoredDocuments(
+      {/*document_id=*/0, /*document_id=*/1, /*document_id=*/2});
+  auto [scored_document_hits2, document_protos2] = AddScoredDocuments(
+      {/*document_id=*/3, /*document_id=*/4, /*document_id=*/5});
 
-  // page_result_state1.next_page_token() shouldn't be found
-  EXPECT_THAT(
-      result_state_manager.GetNextPage(page_result_state1.next_page_token),
-      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  ResultStateManager result_state_manager(
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
 
-  // page_result_state2.next_page_token() should still exist
+  SectionRestrictQueryTermsMap query_terms;
+  SearchSpecProto search_spec;
+  ScoringSpecProto scoring_spec = CreateScoringSpec();
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE);
+
+  // Set time as 1s and add state 1.
+  clock()->SetSystemTimeMilliseconds(1000);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1), /*is_descending=*/true),
+          /*parent_adjustment_info=*/
+          std::make_unique<ResultAdjustmentInfo>(search_spec, scoring_spec,
+                                                 result_spec, &schema_store(),
+                                                 query_terms),
+          /*child_adjustment_info=*/nullptr, result_spec, document_store(),
+          result_retriever(), clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken)));
+
+  // Set time as 1hr1s and add state 2.
+  clock()->SetSystemTimeMilliseconds(kDefaultResultStateTtlInMs + 1000);
   ICING_ASSERT_OK_AND_ASSIGN(
-      page_result_state2,
-      result_state_manager.GetNextPage(page_result_state2.next_page_token));
-  EXPECT_THAT(page_result_state2.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(
-                  CreateScoredDocumentHit(/*document_id=*/5))));
+      PageResultInfo page_result_info2,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2), /*is_descending=*/true),
+          /*parent_adjustment_info=*/
+          std::make_unique<ResultAdjustmentInfo>(search_spec, scoring_spec,
+                                                 result_spec, &schema_store(),
+                                                 query_terms),
+          /*child_adjustment_info=*/nullptr, result_spec, document_store(),
+          result_retriever(), clock()->GetSystemTimeMilliseconds()));
+
+  // Calling CacheAndRetrieveFirstPage() on state 2 should invalidate the
+  // expired state 1 internally.
+  //
+  // We test the behavior by setting time back to 1s, to make sure the
+  // invalidation of state 1 was done by the previous
+  // CacheAndRetrieveFirstPage() instead of the following GetNextPage().
+  clock()->SetSystemTimeMilliseconds(1000);
+  // page_result_info1's token (page_result_info1.first) shouldn't be found.
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info1.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST(ResultStateManagerTest, ShouldInvalidateAllTokens) {
-  ResultState result_state1 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2),
-                         CreateScoredDocumentHit(/*document_id=*/3)},
-                        /*num_per_page=*/1);
-  ResultState result_state2 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/4),
-                         CreateScoredDocumentHit(/*document_id=*/5),
-                         CreateScoredDocumentHit(/*document_id=*/6)},
-                        /*num_per_page=*/1);
+TEST_F(ResultStateManagerTest,
+       ShouldInvalidateExpiredTokensWhenGetNextPageOnOthers) {
+  auto [scored_document_hits1, document_protos1] = AddScoredDocuments(
+      {/*document_id=*/0, /*document_id=*/1, /*document_id=*/2});
+  auto [scored_document_hits2, document_protos2] = AddScoredDocuments(
+      {/*document_id=*/3, /*document_id=*/4, /*document_id=*/5});
 
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+
+  // Set time as 1s and add state 1.
+  clock()->SetSystemTimeMilliseconds(1000);
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state1,
-      result_state_manager.RankAndPaginate(std::move(result_state1)));
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken)));
+
+  // Set time as 2s and add state 2.
+  clock()->SetSystemTimeMilliseconds(2000);
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state2,
-      result_state_manager.RankAndPaginate(std::move(result_state2)));
+      PageResultInfo page_result_info2,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info2.first, Not(Eq(kInvalidNextPageToken)));
+
+  // 1. Set time as 1hr1s.
+  // 2. Call GetNextPage() on state 2. It should correctly invalidate the
+  //    expired state 1.
+  // 3. Then calling GetNextPage() on state 1 shouldn't get anything.
+  clock()->SetSystemTimeMilliseconds(kDefaultResultStateTtlInMs + 1000);
+  // page_result_info2's token (page_result_info2.first) should be found
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info2,
+                             result_state_manager.GetNextPage(
+                                 page_result_info2.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  // We test the behavior by setting time back to 2s, to make sure the
+  // invalidation of state 1 was done by the previous GetNextPage() instead of
+  // the following GetNextPage().
+  clock()->SetSystemTimeMilliseconds(2000);
+  // page_result_info1's token (page_result_info1.first) shouldn't be found.
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info1.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
 
-  result_state_manager.InvalidateAllResultStates();
+TEST_F(ResultStateManagerTest,
+       ShouldInvalidateExpiredTokensWhenGetNextPageOnItself) {
+  auto [scored_document_hits1, document_protos1] = AddScoredDocuments(
+      {/*document_id=*/0, /*document_id=*/1, /*document_id=*/2});
+  auto [scored_document_hits2, document_protos2] = AddScoredDocuments(
+      {/*document_id=*/3, /*document_id=*/4, /*document_id=*/5});
 
-  // page_result_state1.next_page_token() shouldn't be found
-  EXPECT_THAT(
-      result_state_manager.GetNextPage(page_result_state1.next_page_token),
-      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  ResultStateManager result_state_manager(
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
 
-  // page_result_state2.next_page_token() shouldn't be found
-  EXPECT_THAT(
-      result_state_manager.GetNextPage(page_result_state2.next_page_token),
-      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  // Set time as 1s and add state.
+  clock()->SetSystemTimeMilliseconds(1000);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info.first, Not(Eq(kInvalidNextPageToken)));
+
+  // 1. Set time as 1hr1s.
+  // 2. Then calling GetNextPage() on the state shouldn't get anything.
+  clock()->SetSystemTimeMilliseconds(kDefaultResultStateTtlInMs + 1000);
+  // page_result_info's token (page_result_info.first) shouldn't be found.
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST(ResultStateManagerTest, ShouldRemoveOldestResultState) {
-  ResultState result_state1 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2)},
-                        /*num_per_page=*/1);
-  ResultState result_state2 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/3),
-                         CreateScoredDocumentHit(/*document_id=*/4)},
-                        /*num_per_page=*/1);
-  ResultState result_state3 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/5),
-                         CreateScoredDocumentHit(/*document_id=*/6)},
-                        /*num_per_page=*/1);
+TEST_F(ResultStateManagerTest, ShouldInvalidateOneToken) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store().Put(CreateDocument(/*id=*/1)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store().Put(CreateDocument(/*id=*/2)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store().Put(CreateDocument(/*id=*/3)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             document_store().Put(CreateDocument(/*id=*/4)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             document_store().Put(CreateDocument(/*id=*/5)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6,
+                             document_store().Put(CreateDocument(/*id=*/6)));
+  std::vector<ScoredDocumentHit> scored_document_hits1 = {
+      {document_id1, kSectionIdMaskNone, /*score=*/1},
+      {document_id2, kSectionIdMaskNone, /*score=*/1},
+      {document_id3, kSectionIdMaskNone, /*score=*/1}};
+  std::vector<ScoredDocumentHit> scored_document_hits2 = {
+      {document_id4, kSectionIdMaskNone, /*score=*/1},
+      {document_id5, kSectionIdMaskNone, /*score=*/1},
+      {document_id6, kSectionIdMaskNone, /*score=*/1}};
 
   ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/2);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state1,
-      result_state_manager.RankAndPaginate(std::move(result_state1)));
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
+
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state2,
-      result_state_manager.RankAndPaginate(std::move(result_state2)));
-  // Adding state 3 should cause state 1 to be removed.
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state3,
-      result_state_manager.RankAndPaginate(std::move(result_state3)));
+      PageResultInfo page_result_info2,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  // Invalidate first result state by the token.
+  result_state_manager.InvalidateResultState(page_result_info1.first);
+
+  // page_result_info1's token (page_result_info1.first) shouldn't be found
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info1.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  EXPECT_THAT(
-      result_state_manager.GetNextPage(page_result_state1.next_page_token),
-      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  // page_result_info2's token (page_result_info2.first) should still exist
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info2,
+                             result_state_manager.GetNextPage(
+                                 page_result_info2.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  // Should get docs.
+  ASSERT_THAT(page_result_info2.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info2.second.results.at(0).document(),
+              EqualsProto(CreateDocument(/*id=*/5)));
+}
+
+TEST_F(ResultStateManagerTest, ShouldInvalidateAllTokens) {
+  auto [scored_document_hits1, document_protos1] = AddScoredDocuments(
+      {/*document_id=*/0, /*document_id=*/1, /*document_id=*/2});
+  auto [scored_document_hits2, document_protos2] = AddScoredDocuments(
+      {/*document_id=*/3, /*document_id=*/4, /*document_id=*/5});
+
+  ResultStateManager result_state_manager(
+      /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      page_result_state2,
-      result_state_manager.GetNextPage(page_result_state2.next_page_token));
-  EXPECT_THAT(page_result_state2.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
-                  /*document_id=*/3))));
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      page_result_state3,
-      result_state_manager.GetNextPage(page_result_state3.next_page_token));
-  EXPECT_THAT(page_result_state3.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
-                  /*document_id=*/5))));
+      PageResultInfo page_result_info2,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  result_state_manager.InvalidateAllResultStates();
+
+  // page_result_info1's token (page_result_info1.first) shouldn't be found
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info1.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // page_result_info2's token (page_result_info2.first) shouldn't be found
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info2.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST(ResultStateManagerTest,
-     PreviouslyInvalidatedResultStateShouldNotBeCounted) {
-  ResultState result_state1 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2)},
-                        /*num_per_page=*/1);
-  ResultState result_state2 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/3),
-                         CreateScoredDocumentHit(/*document_id=*/4)},
-                        /*num_per_page=*/1);
-  ResultState result_state3 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/5),
-                         CreateScoredDocumentHit(/*document_id=*/6)},
-                        /*num_per_page=*/1);
-  ResultState result_state4 =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/7),
-                         CreateScoredDocumentHit(/*document_id=*/8)},
-                        /*num_per_page=*/1);
+TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) {
+  auto [scored_document_hits1, document_protos1] =
+      AddScoredDocuments({/*document_id=*/0, /*document_id=*/1});
+  auto [scored_document_hits2, document_protos2] =
+      AddScoredDocuments({/*document_id=*/2, /*document_id=*/3});
+  auto [scored_document_hits3, document_protos3] =
+      AddScoredDocuments({/*document_id=*/4, /*document_id=*/5});
+
+  ResultStateManager result_state_manager(/*max_total_hits=*/2,
+                                          document_store());
 
-  ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/3);
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state1,
-      result_state_manager.RankAndPaginate(std::move(result_state1)));
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state2,
-      result_state_manager.RankAndPaginate(std::move(result_state2)));
+      PageResultInfo page_result_info2,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  // Adding state 3 should cause state 1 to be removed.
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state3,
-      result_state_manager.RankAndPaginate(std::move(result_state3)));
+      PageResultInfo page_result_info3,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits3), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info1.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info2,
+                             result_state_manager.GetNextPage(
+                                 page_result_info2.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info2.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info2.second.results.at(0).document(),
+              EqualsProto(document_protos2.at(1)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info3,
+                             result_state_manager.GetNextPage(
+                                 page_result_info3.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info3.second.results.at(0).document(),
+              EqualsProto(document_protos3.at(1)));
+}
 
-  // Invalidates state 2, so that the number of valid tokens becomes 2.
-  result_state_manager.InvalidateResultState(
-      page_result_state2.next_page_token);
+TEST_F(ResultStateManagerTest,
+       InvalidatedResultStateShouldDecreaseCurrentHitsCount) {
+  auto [scored_document_hits1, document_protos1] =
+      AddScoredDocuments({/*document_id=*/0, /*document_id=*/1});
+  auto [scored_document_hits2, document_protos2] =
+      AddScoredDocuments({/*document_id=*/2, /*document_id=*/3});
+  auto [scored_document_hits3, document_protos3] =
+      AddScoredDocuments({/*document_id=*/4, /*document_id=*/5});
+
+  // Add the first three states. Remember, the first page for each result state
+  // won't be cached (since it is returned immediately from
+  // CacheAndRetrieveFirstPage). Each result state has a page size of 1 and a
+  // result set of 2 hits. So each result will take up one hit of our three hit
+  // budget.
+  ResultStateManager result_state_manager(/*max_total_hits=*/3,
+                                          document_store());
 
-  // Adding state 4 shouldn't affect rest of the states
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state4,
-      result_state_manager.RankAndPaginate(std::move(result_state4)));
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      page_result_state1,
-      result_state_manager.GetNextPage(page_result_state1.next_page_token));
-  EXPECT_THAT(page_result_state1.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
-                  /*document_id=*/1))));
+      PageResultInfo page_result_info2,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
-  EXPECT_THAT(
-      result_state_manager.GetNextPage(page_result_state2.next_page_token),
-      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info3,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits3), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  // Invalidates state 2, so that the number of hits current cached should be
+  // decremented to 2.
+  result_state_manager.InvalidateResultState(page_result_info2.first);
+
+  // If invalidating state 2 correctly decremented the current hit count to 2,
+  // then adding state 4 should still be within our budget and no other result
+  // states should be evicted.
+  auto [scored_document_hits4, document_protos4] =
+      AddScoredDocuments({/*document_id=*/6, /*document_id=*/7});
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info4,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits4), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info1,
+                             result_state_manager.GetNextPage(
+                                 page_result_info1.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info1.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info1.second.results.at(0).document(),
+              EqualsProto(document_protos1.at(1)));
+
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info2.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info3,
+                             result_state_manager.GetNextPage(
+                                 page_result_info3.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info3.second.results.at(0).document(),
+              EqualsProto(document_protos3.at(1)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info4,
+                             result_state_manager.GetNextPage(
+                                 page_result_info4.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info4.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info4.second.results.at(0).document(),
+              EqualsProto(document_protos4.at(1)));
+}
+
+TEST_F(ResultStateManagerTest,
+       InvalidatedAllResultStatesShouldResetCurrentHitCount) {
+  auto [scored_document_hits1, document_protos1] =
+      AddScoredDocuments({/*document_id=*/0, /*document_id=*/1});
+  auto [scored_document_hits2, document_protos2] =
+      AddScoredDocuments({/*document_id=*/2, /*document_id=*/3});
+  auto [scored_document_hits3, document_protos3] =
+      AddScoredDocuments({/*document_id=*/4, /*document_id=*/5});
+
+  // Add the first three states. Remember, the first page for each result state
+  // won't be cached (since it is returned immediately from
+  // CacheAndRetrieveFirstPage). Each result state has a page size of 1 and a
+  // result set of 2 hits. So each result will take up one hit of our three hit
+  // budget.
+  ResultStateManager result_state_manager(/*max_total_hits=*/3,
+                                          document_store());
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      page_result_state3,
-      result_state_manager.GetNextPage(page_result_state3.next_page_token));
-  EXPECT_THAT(page_result_state3.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
-                  /*document_id=*/5))));
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      page_result_state4,
-      result_state_manager.GetNextPage(page_result_state4.next_page_token));
-  EXPECT_THAT(page_result_state4.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
-                  /*document_id=*/7))));
-}
+      PageResultInfo page_result_info2,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
-TEST(ResultStateManagerTest, ShouldGetSnippetContext) {
-  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/1);
-  result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
-  result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(5);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info3,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits3), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  // Invalidates all states so that the current hit count will be 0.
+  result_state_manager.InvalidateAllResultStates();
 
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  // If invalidating all states correctly reset the current hit count to 0,
+  // then adding state 4, 5, 6 should still be within our budget and no other
+  // result states should be evicted.
+  auto [scored_document_hits4, document_protos4] =
+      AddScoredDocuments({/*document_id=*/6, /*document_id=*/7});
+  auto [scored_document_hits5, document_protos5] =
+      AddScoredDocuments({/*document_id=*/8, /*document_id=*/9});
+  auto [scored_document_hits6, document_protos6] =
+      AddScoredDocuments({/*document_id=*/10, /*document_id=*/11});
 
-  SectionRestrictQueryTermsMap query_terms_map;
-  query_terms_map.emplace("term1", std::unordered_set<std::string>());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info4,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits4), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
-  ResultState original_result_state = ResultState(
-      /*scored_document_hits=*/{CreateScoredDocumentHit(/*document_id=*/1),
-                                CreateScoredDocumentHit(/*document_id=*/2)},
-      query_terms_map, search_spec, CreateScoringSpec(), result_spec);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info5,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits5), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
-  ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state,
-      result_state_manager.RankAndPaginate(std::move(original_result_state)));
+      PageResultInfo page_result_info6,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits6), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info1.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  ASSERT_THAT(page_result_state.next_page_token, Gt(kInvalidNextPageToken));
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info2.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info3.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  EXPECT_THAT(page_result_state.snippet_context.match_type,
-              Eq(TermMatchType::EXACT_ONLY));
-  EXPECT_TRUE(page_result_state.snippet_context.query_terms.find("term1") !=
-              page_result_state.snippet_context.query_terms.end());
-  EXPECT_THAT(page_result_state.snippet_context.snippet_spec,
-              EqualsProto(result_spec.snippet_spec()));
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info4,
+                             result_state_manager.GetNextPage(
+                                 page_result_info4.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info4.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info4.second.results.at(0).document(),
+              EqualsProto(document_protos4.at(1)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info5,
+                             result_state_manager.GetNextPage(
+                                 page_result_info5.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info5.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info5.second.results.at(0).document(),
+              EqualsProto(document_protos5.at(1)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info6,
+                             result_state_manager.GetNextPage(
+                                 page_result_info6.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info6.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info6.second.results.at(0).document(),
+              EqualsProto(document_protos6.at(1)));
 }
 
-TEST(ResultStateManagerTest, ShouldGetDefaultSnippetContext) {
-  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/1);
-  // 0 indicates no snippeting
-  result_spec.mutable_snippet_spec()->set_num_to_snippet(0);
-  result_spec.mutable_snippet_spec()->set_num_matches_per_property(0);
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(0);
+TEST_F(
+    ResultStateManagerTest,
+    InvalidatedResultStateShouldDecreaseCurrentHitsCountByExactStateHitCount) {
+  auto [scored_document_hits1, document_protos1] =
+      AddScoredDocuments({/*document_id=*/0, /*document_id=*/1});
+  auto [scored_document_hits2, document_protos2] =
+      AddScoredDocuments({/*document_id=*/2, /*document_id=*/3});
+  auto [scored_document_hits3, document_protos3] =
+      AddScoredDocuments({/*document_id=*/4, /*document_id=*/5});
+
+  // Add the first three states. Remember, the first page for each result state
+  // won't be cached (since it is returned immediately from
+  // CacheAndRetrieveFirstPage). Each result state has a page size of 1 and a
+  // result set of 2 hits. So each result will take up one hit of our three hit
+  // budget.
+  ResultStateManager result_state_manager(/*max_total_hits=*/3,
+                                          document_store());
 
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
-  SectionRestrictQueryTermsMap query_terms_map;
-  query_terms_map.emplace("term1", std::unordered_set<std::string>());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info2,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
-  ResultState original_result_state = ResultState(
-      /*scored_document_hits=*/{CreateScoredDocumentHit(/*document_id=*/1),
-                                CreateScoredDocumentHit(/*document_id=*/2)},
-      query_terms_map, search_spec, CreateScoringSpec(), result_spec);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info3,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits3), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  // Invalidates state 2, so that the number of hits current cached should be
+  // decremented to 2.
+  result_state_manager.InvalidateResultState(page_result_info2.first);
+
+  // If invalidating state 2 correctly decremented the current hit count to 2,
+  // then adding state 4 should still be within our budget and no other result
+  // states should be evicted.
+  auto [scored_document_hits4, document_protos4] =
+      AddScoredDocuments({/*document_id=*/6, /*document_id=*/7});
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info4,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits4), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  // If invalidating result state 2 correctly decremented the current hit count
+  // to 2 and adding state 4 correctly incremented it to 3, then adding this
+  // result state should trigger the eviction of state 1.
+  auto [scored_document_hits5, document_protos5] =
+      AddScoredDocuments({/*document_id=*/8, /*document_id=*/9});
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info5,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits5), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info1.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info2.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info3,
+                             result_state_manager.GetNextPage(
+                                 page_result_info3.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info3.second.results.at(0).document(),
+              EqualsProto(document_protos3.at(1)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info4,
+                             result_state_manager.GetNextPage(
+                                 page_result_info4.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info4.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info4.second.results.at(0).document(),
+              EqualsProto(document_protos4.at(1)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info5,
+                             result_state_manager.GetNextPage(
+                                 page_result_info5.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info5.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info5.second.results.at(0).document(),
+              EqualsProto(document_protos5.at(1)));
+}
+
+TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) {
+  auto [scored_document_hits1, document_protos1] =
+      AddScoredDocuments({/*document_id=*/0, /*document_id=*/1});
+  auto [scored_document_hits2, document_protos2] =
+      AddScoredDocuments({/*document_id=*/2, /*document_id=*/3});
+  auto [scored_document_hits3, document_protos3] =
+      AddScoredDocuments({/*document_id=*/4, /*document_id=*/5});
+
+  // Add the first three states. Remember, the first page for each result state
+  // won't be cached (since it is returned immediately from
+  // CacheAndRetrieveFirstPage). Each result state has a page size of 1 and a
+  // result set of 2 hits. So each result will take up one hit of our three hit
+  // budget.
+  ResultStateManager result_state_manager(/*max_total_hits=*/3,
+                                          document_store());
 
-  ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state,
-      result_state_manager.RankAndPaginate(std::move(original_result_state)));
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
-  ASSERT_THAT(page_result_state.next_page_token, Gt(kInvalidNextPageToken));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info2,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
-  EXPECT_THAT(page_result_state.snippet_context.query_terms, IsEmpty());
-  EXPECT_THAT(
-      page_result_state.snippet_context.snippet_spec,
-      EqualsProto(ResultSpecProto::SnippetSpecProto::default_instance()));
-  EXPECT_THAT(page_result_state.snippet_context.match_type,
-              Eq(TermMatchType::UNKNOWN));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info3,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits3), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  // GetNextPage for result state 1 should return its result and decrement the
+  // number of cached hits to 2.
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info1,
+                             result_state_manager.GetNextPage(
+                                 page_result_info1.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info1.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info1.second.results.at(0).document(),
+              EqualsProto(document_protos1.at(1)));
+
+  // If retrieving the next page for result state 1 correctly decremented the
+  // current hit count to 2, then adding state 4 should still be within our
+  // budget and no other result states should be evicted.
+  auto [scored_document_hits4, document_protos4] =
+      AddScoredDocuments({/*document_id=*/6, /*document_id=*/7});
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info4,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits4), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info1.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info2,
+                             result_state_manager.GetNextPage(
+                                 page_result_info2.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info2.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info2.second.results.at(0).document(),
+              EqualsProto(document_protos2.at(1)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info3,
+                             result_state_manager.GetNextPage(
+                                 page_result_info3.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info3.second.results.at(0).document(),
+              EqualsProto(document_protos3.at(1)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info4,
+                             result_state_manager.GetNextPage(
+                                 page_result_info4.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info4.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info4.second.results.at(0).document(),
+              EqualsProto(document_protos4.at(1)));
 }
 
-TEST(ResultStateManagerTest, ShouldGetCorrectNumPreviouslyReturned) {
-  ResultState original_result_state =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2),
-                         CreateScoredDocumentHit(/*document_id=*/3),
-                         CreateScoredDocumentHit(/*document_id=*/4),
-                         CreateScoredDocumentHit(/*document_id=*/5)},
-                        /*num_per_page=*/2);
+TEST_F(ResultStateManagerTest,
+       GetNextPageShouldDecreaseCurrentHitsCountByExactlyOnePage) {
+  auto [scored_document_hits1, document_protos1] =
+      AddScoredDocuments({/*document_id=*/0, /*document_id=*/1});
+  auto [scored_document_hits2, document_protos2] =
+      AddScoredDocuments({/*document_id=*/2, /*document_id=*/3});
+  auto [scored_document_hits3, document_protos3] =
+      AddScoredDocuments({/*document_id=*/4, /*document_id=*/5});
+
+  // Add the first three states. Remember, the first page for each result state
+  // won't be cached (since it is returned immediately from
+  // CacheAndRetrieveFirstPage). Each result state has a page size of 1 and a
+  // result set of 2 hits. So each result will take up one hit of our three hit
+  // budget.
+  ResultStateManager result_state_manager(/*max_total_hits=*/3,
+                                          document_store());
 
-  ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/std::numeric_limits<int>::max(),
-      /*max_result_states=*/std::numeric_limits<int>::max());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
-  // First page, 2 results
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state1,
-      result_state_manager.RankAndPaginate(std::move(original_result_state)));
-  ASSERT_THAT(page_result_state1.scored_document_hits.size(), Eq(2));
+      PageResultInfo page_result_info2,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
-  // No previously returned results
-  EXPECT_THAT(page_result_state1.num_previously_returned, Eq(0));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info3,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits3), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  // GetNextPage for result state 1 should return its result and decrement the
+  // number of cached hits to 2.
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info1,
+                             result_state_manager.GetNextPage(
+                                 page_result_info1.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info1.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info1.second.results.at(0).document(),
+              EqualsProto(document_protos1.at(1)));
+
+  // If retrieving the next page for result state 1 correctly decremented the
+  // current hit count to 2, then adding state 4 should still be within our
+  // budget and no other result states should be evicted.
+  auto [scored_document_hits4, document_protos4] =
+      AddScoredDocuments({/*document_id=*/6, /*document_id=*/7});
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info4,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits4), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  // If retrieving the next page for result state 1 correctly decremented the
+  // current hit count to 2 and adding state 4 correctly incremented it to 3,
+  // then adding this result state should trigger the eviction of state 2.
+  auto [scored_document_hits5, document_protos5] =
+      AddScoredDocuments({/*document_id=*/8, /*document_id=*/9});
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info5,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits5), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info1.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  uint64_t next_page_token = page_result_state1.next_page_token;
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info2.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  // Second page, 2 results
-  ICING_ASSERT_OK_AND_ASSIGN(PageResultState page_result_state2,
-                             result_state_manager.GetNextPage(next_page_token));
-  ASSERT_THAT(page_result_state2.scored_document_hits.size(), Eq(2));
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info3,
+                             result_state_manager.GetNextPage(
+                                 page_result_info3.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info3.second.results.at(0).document(),
+              EqualsProto(document_protos3.at(1)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info4,
+                             result_state_manager.GetNextPage(
+                                 page_result_info4.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info4.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info4.second.results.at(0).document(),
+              EqualsProto(document_protos4.at(1)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info5,
+                             result_state_manager.GetNextPage(
+                                 page_result_info5.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info5.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info5.second.results.at(0).document(),
+              EqualsProto(document_protos5.at(1)));
+}
 
-  // num_previously_returned = size of first page
-  EXPECT_THAT(page_result_state2.num_previously_returned, Eq(2));
+TEST_F(ResultStateManagerTest,
+       AddingOverBudgetResultStateShouldEvictAllStates) {
+  auto [scored_document_hits1, document_protos1] = AddScoredDocuments(
+      {/*document_id=*/0, /*document_id=*/1, /*document_id=*/2});
+  auto [scored_document_hits2, document_protos2] =
+      AddScoredDocuments({/*document_id=*/3, /*document_id=*/4});
 
-  // Third page, 1 result
-  ICING_ASSERT_OK_AND_ASSIGN(PageResultState page_result_state3,
-                             result_state_manager.GetNextPage(next_page_token));
-  ASSERT_THAT(page_result_state3.scored_document_hits.size(), Eq(1));
+  // Add the first two states. Remember, the first page for each result state
+  // won't be cached (since it is returned immediately from
+  // CacheAndRetrieveFirstPage). Each result state has a page size of 1. So 3
+  // hits will remain cached.
+  ResultStateManager result_state_manager(/*max_total_hits=*/4,
+                                          document_store());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
-  // num_previously_returned = size of first and second pages
-  EXPECT_THAT(page_result_state3.num_previously_returned, Eq(4));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info2,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  // Add a result state that is larger than the entire budget. This should
+  // result in all previous result states being evicted, the first hit from
+  // result state 3 being returned and the next four hits being cached (the last
+  // hit should be dropped because it exceeds the max).
+  auto [scored_document_hits3, document_protos3] = AddScoredDocuments(
+      {/*document_id=*/5, /*document_id=*/6, /*document_id=*/7,
+       /*document_id=*/8, /*document_id=*/9, /*document_id=*/10});
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info3,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits3), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+  EXPECT_THAT(page_result_info3.first, Not(Eq(kInvalidNextPageToken)));
+
+  // GetNextPage for result state 1 and 2 should return NOT_FOUND.
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info1.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  // No more results
-  EXPECT_THAT(result_state_manager.GetNextPage(next_page_token),
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info2.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Only the next four results in state 3 should be retrievable.
+  uint64_t next_page_token3 = page_result_info3.first;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_info3,
+      result_state_manager.GetNextPage(next_page_token3, result_retriever(),
+                                       clock()->GetSystemTimeMilliseconds()));
+  EXPECT_THAT(page_result_info3.first, Eq(next_page_token3));
+  ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info3.second.results.at(0).document(),
+              EqualsProto(document_protos3.at(1)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_info3,
+      result_state_manager.GetNextPage(next_page_token3, result_retriever(),
+                                       clock()->GetSystemTimeMilliseconds()));
+  EXPECT_THAT(page_result_info3.first, Eq(next_page_token3));
+  ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info3.second.results.at(0).document(),
+              EqualsProto(document_protos3.at(2)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_info3,
+      result_state_manager.GetNextPage(next_page_token3, result_retriever(),
+                                       clock()->GetSystemTimeMilliseconds()));
+  EXPECT_THAT(page_result_info3.first, Eq(next_page_token3));
+  ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info3.second.results.at(0).document(),
+              EqualsProto(document_protos3.at(3)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      page_result_info3,
+      result_state_manager.GetNextPage(next_page_token3, result_retriever(),
+                                       clock()->GetSystemTimeMilliseconds()));
+  // The final document should have been dropped because it exceeded the budget,
+  // so the next page token of the second last round should be
+  // kInvalidNextPageToken.
+  EXPECT_THAT(page_result_info3.first, Eq(kInvalidNextPageToken));
+  ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info3.second.results.at(0).document(),
+              EqualsProto(document_protos3.at(4)));
+
+  // Double check that next_page_token3 is not retrievable anymore.
+  EXPECT_THAT(
+      result_state_manager.GetNextPage(next_page_token3, result_retriever(),
+                                       clock()->GetSystemTimeMilliseconds()),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST(ResultStateManagerTest, ShouldStoreMaxNumberOfScoredDocumentHits) {
-  ResultState original_result_state =
-      CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
-                         CreateScoredDocumentHit(/*document_id=*/2),
-                         CreateScoredDocumentHit(/*document_id=*/3),
-                         CreateScoredDocumentHit(/*document_id=*/4),
-                         CreateScoredDocumentHit(/*document_id=*/5)},
-                        /*num_per_page=*/2);
+TEST_F(ResultStateManagerTest,
+       AddingResultStateShouldEvictOverBudgetResultState) {
+  // Add a result state that is larger than the entire budget. The entire result
+  // state will still be cached
+  auto [scored_document_hits1, document_protos1] = AddScoredDocuments(
+      {/*document_id=*/0, /*document_id=*/1, /*document_id=*/2,
+       /*document_id=*/3, /*document_id=*/4, /*document_id=*/5});
 
-  ResultStateManager result_state_manager(
-      /*max_hits_per_query=*/3,
-      /*max_result_states=*/std::numeric_limits<int>::max());
+  ResultStateManager result_state_manager(/*max_total_hits=*/4,
+                                          document_store());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits1), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  // Add a result state. Because state2 + state1 is larger than the budget,
+  // state1 should be evicted.
+  auto [scored_document_hits2, document_protos2] =
+      AddScoredDocuments({/*document_id=*/6, /*document_id=*/7});
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info2,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
+
+  // state1 should have been evicted and state2 should still be retrievable.
+  EXPECT_THAT(result_state_manager.GetNextPage(
+                  page_result_info1.first, result_retriever(),
+                  clock()->GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  // The 5 input scored document hits will be truncated to 3.
+  ICING_ASSERT_OK_AND_ASSIGN(page_result_info2,
+                             result_state_manager.GetNextPage(
+                                 page_result_info2.first, result_retriever(),
+                                 clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info2.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info2.second.results.at(0).document(),
+              EqualsProto(document_protos2.at(1)));
+}
+
+TEST_F(ResultStateManagerTest,
+       AddingResultStateShouldNotTruncatedAfterFirstPage) {
+  // Add a result state that is larger than the entire budget, but within the
+  // entire budget after the first page. The entire result state will still be
+  // cached and not truncated.
+  auto [scored_document_hits, document_protos] = AddScoredDocuments(
+      {/*document_id=*/0, /*document_id=*/1, /*document_id=*/2,
+       /*document_id=*/3, /*document_id=*/4});
+
+  ResultStateManager result_state_manager(/*max_total_hits=*/4,
+                                          document_store());
+
+  // The 5 input scored document hits will not be truncated. The first page of
+  // two hits will be returned immediately and the other three hits will fit
+  // within our caching budget.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits), /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
+          document_store(), result_retriever(),
+          clock()->GetSystemTimeMilliseconds()));
 
   // First page, 2 results
+  ASSERT_THAT(page_result_info1.second.results, SizeIs(2));
+  EXPECT_THAT(page_result_info1.second.results.at(0).document(),
+              EqualsProto(document_protos.at(0)));
+  EXPECT_THAT(page_result_info1.second.results.at(1).document(),
+              EqualsProto(document_protos.at(1)));
+
+  uint64_t next_page_token = page_result_info1.first;
+
+  // Second page, 2 results.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info2,
+      result_state_manager.GetNextPage(next_page_token, result_retriever(),
+                                       clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info2.second.results, SizeIs(2));
+  EXPECT_THAT(page_result_info2.second.results.at(0).document(),
+              EqualsProto(document_protos.at(2)));
+  EXPECT_THAT(page_result_info2.second.results.at(1).document(),
+              EqualsProto(document_protos.at(3)));
+
+  // Third page, 1 result.
   ICING_ASSERT_OK_AND_ASSIGN(
-      PageResultState page_result_state1,
-      result_state_manager.RankAndPaginate(std::move(original_result_state)));
+      PageResultInfo page_result_info3,
+      result_state_manager.GetNextPage(next_page_token, result_retriever(),
+                                       clock()->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info3.second.results, SizeIs(1));
+  EXPECT_THAT(page_result_info3.second.results.at(0).document(),
+              EqualsProto(document_protos.at(4)));
+
+  // Fourth page, 0 results.
   EXPECT_THAT(
-      page_result_state1.scored_document_hits,
-      ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4))));
-
-  uint64_t next_page_token = page_result_state1.next_page_token;
-
-  // Second page, 1 results.
-  ICING_ASSERT_OK_AND_ASSIGN(PageResultState page_result_state2,
-                             result_state_manager.GetNextPage(next_page_token));
-  EXPECT_THAT(page_result_state2.scored_document_hits,
-              ElementsAre(EqualsScoredDocumentHit(
-                  CreateScoredDocumentHit(/*document_id=*/3))));
-
-  // No third page.
-  EXPECT_THAT(result_state_manager.GetNextPage(next_page_token),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+      result_state_manager.GetNextPage(next_page_token, result_retriever(),
+                                       clock()->GetSystemTimeMilliseconds()),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
 }  // namespace
diff --git a/icing/result/result-state-manager_thread-safety_test.cc b/icing/result/result-state-manager_thread-safety_test.cc
new file mode 100644
index 0000000..7e7e13c
--- /dev/null
+++ b/icing/result/result-state-manager_thread-safety_test.cc
@@ -0,0 +1,458 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <optional>
+#include <thread>  // NOLINT
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/result/page-result.h"
+#include "icing/result/result-retriever-v2.h"
+#include "icing/result/result-state-manager.h"
+#include "icing/schema/schema-store.h"
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Not;
+using ::testing::SizeIs;
+using PageResultInfo = std::pair<uint64_t, PageResult>;
+
+ResultSpecProto CreateResultSpec(int num_per_page) {
+  ResultSpecProto result_spec;
+  result_spec.set_num_per_page(num_per_page);
+  return result_spec;
+}
+
+DocumentProto CreateDocument(int document_id) {
+  return DocumentBuilder()
+      .SetNamespace("namespace")
+      .SetUri(std::to_string(document_id))
+      .SetSchema("Document")
+      .SetCreationTimestampMs(1574365086666 + document_id)
+      .SetScore(document_id)
+      .Build();
+}
+
+class ResultStateManagerThreadSafetyTest : public testing::Test {
+ protected:
+  ResultStateManagerThreadSafetyTest()
+      : test_dir_(GetTestTempDir() + "/icing") {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  }
+
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    clock_ = std::make_unique<FakeClock>();
+
+    language_segmenter_factory::SegmenterOptions options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        language_segmenter_,
+        language_segmenter_factory::Create(std::move(options)));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, clock_.get()));
+    SchemaProto schema;
+    schema.add_types()->set_schema_type("Document");
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
+                                                /*max_term_byte_size=*/10000));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult result,
+        DocumentStore::Create(
+            &filesystem_, test_dir_, clock_.get(), schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    document_store_ = std::move(result.document_store);
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        result_retriever_, ResultRetrieverV2::Create(
+                               document_store_.get(), schema_store_.get(),
+                               language_segmenter_.get(), normalizer_.get()));
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+    clock_.reset();
+  }
+
+  Filesystem filesystem_;
+  const std::string test_dir_;
+  std::unique_ptr<FakeClock> clock_;
+  std::unique_ptr<LanguageSegmenter> language_segmenter_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<Normalizer> normalizer_;
+  std::unique_ptr<DocumentStore> document_store_;
+  std::unique_ptr<ResultRetrieverV2> result_retriever_;
+};
+
+TEST_F(ResultStateManagerThreadSafetyTest,
+       RequestSameResultStateSimultaneously) {
+  // Create several threads to send GetNextPage requests with the same
+  // ResultState.
+  //
+  // This test verifies the usage of ResultState per instance lock. Only one
+  // thread is allowed to access ResultState, so there should be no crash and
+  // the result documents in a single page should be continuous (i.e. no
+  // interleaf).
+
+  // Prepare documents.
+  constexpr int kNumDocuments = 10000;
+  std::vector<ScoredDocumentHit> scored_document_hits;
+  for (int i = 0; i < kNumDocuments; ++i) {
+    // Put a document with id and score = i.
+    ICING_ASSERT_OK(document_store_->Put(CreateDocument(/*document_id=*/i)));
+    scored_document_hits.push_back(
+        ScoredDocumentHit(/*document_id=*/i, kSectionIdMaskNone, /*score=*/i));
+  }
+
+  constexpr int kNumPerPage = 100;
+  ResultStateManager result_state_manager(/*max_total_hits=*/kNumDocuments,
+                                          *document_store_);
+
+  // Retrieve the first page.
+  // Documents are ordered by score *ascending*, so the first page should
+  // contain documents with scores [0, 1, 2, ..., kNumPerPage - 1].
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits), /*is_descending=*/false),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(kNumPerPage), *document_store_, *result_retriever_,
+          clock_->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info1.second.results, SizeIs(kNumPerPage));
+  for (int i = 0; i < kNumPerPage; ++i) {
+    ASSERT_THAT(page_result_info1.second.results[i].score(), Eq(i));
+  }
+
+  uint64_t next_page_token = page_result_info1.first;
+  ASSERT_THAT(next_page_token, Not(Eq(kInvalidNextPageToken)));
+
+  // Create kNumThreads threads to call GetNextPage() with the same token at the
+  // same time. Each thread should get a valid result.
+  // Use page_results to store the result.
+  constexpr int kNumThreads = 50;
+  std::vector<std::optional<PageResultInfo>> page_results(kNumThreads);
+  auto callable = [&](int thread_id) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<ResultRetrieverV2> result_retriever,
+        ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                  language_segmenter_.get(),
+                                  normalizer_.get()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PageResultInfo page_result_info,
+        result_state_manager.GetNextPage(next_page_token, *result_retriever,
+                                         clock_->GetSystemTimeMilliseconds()));
+    page_results[thread_id] =
+        std::make_optional<PageResultInfo>(std::move(page_result_info));
+  };
+
+  // Spawn threads for GetNextPage().
+  std::vector<std::thread> thread_objs;
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs.emplace_back(callable, /*thread_id=*/i);
+  }
+
+  // Join threads.
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs[i].join();
+    EXPECT_THAT(page_results[i], Not(Eq(std::nullopt)));
+    EXPECT_THAT(page_results[i]->second.results, SizeIs(kNumPerPage));
+  }
+
+  // Since we have per instance lock for ResultState, only one thread is allowed
+  // to access ResultState at a moment. Therefore, every thread should get
+  // continuous scores instead of interleaved scores, regardless of the
+  // execution order. IOW, within a particular page the scores of all results
+  // should be ordered as: [N, N+1, N+2, N+3, ...] where N is dependent on the
+  // execution order. Also there should be no crash.
+  std::vector<int> first_doc_scores;
+  for (const auto& page_result_info : page_results) {
+    first_doc_scores.push_back(page_result_info->second.results[0].score());
+    for (int i = 1; i < kNumPerPage; ++i) {
+      EXPECT_THAT(page_result_info->second.results[i].score(),
+                  Eq(page_result_info->second.results[i - 1].score() + 1));
+    }
+  }
+
+  // Verify all first doc scores of page results are correct. Should be
+  // kNumPerPage * 1, kNumPerPage * 2, ..., etc.
+  // Note: the first score of the first page retrieved via GetNextPage should be
+  // kNumPerPage because the *actual* first page with first score = 0 was
+  // retrieved during CacheAndRetrieveFirstPage.
+  std::sort(first_doc_scores.begin(), first_doc_scores.end());
+  for (int i = 0; i < kNumThreads; ++i) {
+    EXPECT_THAT(first_doc_scores[i], Eq(kNumPerPage * (i + 1)));
+  }
+}
+
+TEST_F(ResultStateManagerThreadSafetyTest, InvalidateResultStateWhileUsing) {
+  // Create several threads to send GetNextPage requests with the same
+  // ResultState and another single thread to invalidate this ResultState.
+  //
+  // This test verifies the usage of std::shared_ptr. Even after invalidating
+  // the original copy of std::shared_ptr in the cache, the ResultState instance
+  // should be still valid and no crash should occur in threads that are still
+  // holding a copy of std::shared_ptr pointing to the same ResultState
+  // instance.
+
+  // Prepare documents.
+  constexpr int kNumDocuments = 10000;
+  std::vector<ScoredDocumentHit> scored_document_hits;
+  for (int i = 0; i < kNumDocuments; ++i) {
+    // Put a document with id and score = i.
+    ICING_ASSERT_OK(document_store_->Put(CreateDocument(/*document_id=*/i)));
+    scored_document_hits.push_back(
+        ScoredDocumentHit(/*document_id=*/i, kSectionIdMaskNone, /*score=*/i));
+  }
+
+  constexpr int kNumPerPage = 100;
+  ResultStateManager result_state_manager(/*max_total_hits=*/kNumDocuments,
+                                          *document_store_);
+
+  // Retrieve the first page.
+  // Documents are ordered by score *ascending*, so the first page should
+  // contain documents with scores [0, 1, 2, ..., kNumPerPage - 1].
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PageResultInfo page_result_info1,
+      result_state_manager.CacheAndRetrieveFirstPage(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits), /*is_descending=*/false),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(kNumPerPage), *document_store_, *result_retriever_,
+          clock_->GetSystemTimeMilliseconds()));
+  ASSERT_THAT(page_result_info1.second.results, SizeIs(kNumPerPage));
+  for (int i = 0; i < kNumPerPage; ++i) {
+    ASSERT_THAT(page_result_info1.second.results[i].score(), Eq(i));
+  }
+
+  uint64_t next_page_token = page_result_info1.first;
+  ASSERT_THAT(next_page_token, Not(Eq(kInvalidNextPageToken)));
+
+  // Create kNumThreads threads to call GetNextPage() with the same token at the
+  // same time. The ResultState might have been invalidated, so it is normal to
+  // get NOT_FOUND error.
+  // Use page_results to store the result.
+  constexpr int kNumThreads = 50;
+  std::vector<std::optional<PageResultInfo>> page_results(kNumThreads);
+  auto callable = [&](int thread_id) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<ResultRetrieverV2> result_retriever,
+        ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                  language_segmenter_.get(),
+                                  normalizer_.get()));
+
+    libtextclassifier3::StatusOr<PageResultInfo> page_result_info_or =
+        result_state_manager.GetNextPage(next_page_token, *result_retriever,
+                                         clock_->GetSystemTimeMilliseconds());
+    if (page_result_info_or.ok()) {
+      page_results[thread_id] = std::make_optional<PageResultInfo>(
+          std::move(page_result_info_or).ValueOrDie());
+    } else {
+      EXPECT_THAT(page_result_info_or,
+                  StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+    }
+  };
+
+  // Spawn threads for GetNextPage().
+  std::vector<std::thread> thread_objs;
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs.emplace_back(callable, /*thread_id=*/i);
+  }
+
+  // Spawn another single thread to invalidate the ResultState.
+  std::thread invalidating_thread([&]() -> void {
+    result_state_manager.InvalidateResultState(next_page_token);
+  });
+
+  // Join threads.
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs[i].join();
+    if (page_results[i] != std::nullopt) {
+      EXPECT_THAT(page_results[i]->second.results, SizeIs(kNumPerPage));
+    }
+  }
+  invalidating_thread.join();
+
+  // Threads fetching ResultState before invalidation will get normal results,
+  // while others will get NOT_FOUND error.
+  std::vector<int> first_doc_scores;
+  for (const auto& page_result_info : page_results) {
+    if (page_result_info == std::nullopt) {
+      continue;
+    }
+
+    first_doc_scores.push_back(page_result_info->second.results[0].score());
+    for (int i = 1; i < kNumPerPage; ++i) {
+      EXPECT_THAT(page_result_info->second.results[i].score(),
+                  Eq(page_result_info->second.results[i - 1].score() + 1));
+    }
+  }
+
+  // Verify all first doc scores of page results are correct. Should be
+  // kNumPerPage * 1, kNumPerPage * 2, ..., etc.
+  std::sort(first_doc_scores.begin(), first_doc_scores.end());
+  for (int i = 0; i < first_doc_scores.size(); ++i) {
+    EXPECT_THAT(first_doc_scores[i], Eq(kNumPerPage * (i + 1)));
+  }
+
+  // Verify num_total_hits should be decremented correctly.
+  EXPECT_THAT(result_state_manager.num_total_hits(), Eq(0));
+}
+
+TEST_F(ResultStateManagerThreadSafetyTest, MultipleResultStates) {
+  // Create several threads to send GetNextPage requests with different
+  // ResultStates.
+  //
+  // This test verifies each ResultState should work independently and correctly
+  // with each thread. Also it verifies there should be no race condition for
+  // num_total_hits, which will be incremented/decremented by multiple threads.
+
+  // Prepare documents.
+  constexpr int kNumDocuments = 2000;
+  std::vector<ScoredDocumentHit> scored_document_hits;
+  for (int i = 0; i < kNumDocuments; ++i) {
+    // Put a document with id and score = i.
+    ICING_ASSERT_OK(document_store_->Put(CreateDocument(/*document_id=*/i)));
+    scored_document_hits.push_back(
+        ScoredDocumentHit(/*document_id=*/i, kSectionIdMaskNone, /*score=*/i));
+  }
+
+  constexpr int kNumThreads = 50;
+  constexpr int kNumPerPage = 30;
+  ResultStateManager result_state_manager(
+      /*max_total_hits=*/kNumDocuments * kNumThreads, *document_store_);
+
+  // Create kNumThreads threads to:
+  // - Call CacheAndRetrieveFirstPage() once to create its own ResultState.
+  // - Call GetNextPage() on its own ResultState for thread_id times.
+  //
+  // Each thread will get (thread_id + 1) pages, i.e. kNumPerPage *
+  // (thread_id + 1) docs.
+  ASSERT_THAT(kNumDocuments, Ge(kNumPerPage * kNumThreads));
+  auto callable = [&](int thread_id) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<ResultRetrieverV2> result_retriever,
+        ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+                                  language_segmenter_.get(),
+                                  normalizer_.get()));
+
+    // Retrieve the first page.
+    // Documents are ordered by score *ascending*, so the first page should
+    // contain documents with scores [0, 1, 2, ..., kNumPerPage - 1].
+    std::vector<ScoredDocumentHit> scored_document_hits_copy(
+        scored_document_hits);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        PageResultInfo page_result_info1,
+        result_state_manager.CacheAndRetrieveFirstPage(
+            std::make_unique<
+                PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+                std::move(scored_document_hits_copy), /*is_descending=*/false),
+            /*parent_adjustment_info=*/nullptr,
+            /*child_adjustment_info=*/nullptr, CreateResultSpec(kNumPerPage),
+            *document_store_, *result_retriever,
+            clock_->GetSystemTimeMilliseconds()));
+    EXPECT_THAT(page_result_info1.second.results, SizeIs(kNumPerPage));
+    for (int i = 0; i < kNumPerPage; ++i) {
+      EXPECT_THAT(page_result_info1.second.results[i].score(), Eq(i));
+    }
+
+    uint64_t next_page_token = page_result_info1.first;
+    ASSERT_THAT(next_page_token, Not(Eq(kInvalidNextPageToken)));
+
+    // Retrieve some of the subsequent pages. We use thread_id as how many
+    // subsequent pages should be retrieved (how many times GetNextPage should
+    // be called) for each thread in order to:
+    // - Vary the number of pages that we're retrieving in each thread.
+    // - Still make the total number of hits remaining (num_total_hits) a
+    //   predictable number.
+    // Then, including the first page (retrieved by CacheAndRetrieveFirstPage),
+    // each thread should retrieve 1, 2, 3, ..., kNumThreads pages.
+    int num_subsequent_pages_to_retrieve = thread_id;
+    for (int i = 0; i < num_subsequent_pages_to_retrieve; ++i) {
+      ICING_ASSERT_OK_AND_ASSIGN(PageResultInfo page_result_info,
+                                 result_state_manager.GetNextPage(
+                                     next_page_token, *result_retriever,
+                                     clock_->GetSystemTimeMilliseconds()));
+      EXPECT_THAT(page_result_info.second.results, SizeIs(kNumPerPage));
+      for (int j = 0; j < kNumPerPage; ++j) {
+        EXPECT_THAT(page_result_info.second.results[j].score(),
+                    Eq(kNumPerPage * (i + 1) + j));
+      }
+    }
+  };
+
+  // Spawn threads.
+  std::vector<std::thread> thread_objs;
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs.emplace_back(callable, /*thread_id=*/i);
+  }
+
+  // Join threads.
+  for (int i = 0; i < kNumThreads; ++i) {
+    thread_objs[i].join();
+  }
+
+  // There will be kNumThreads * kNumDocuments ScoredDocumentHits being created
+  // in the beginning, and kNumPerPage * (1 + 2 + ... + kNumThreads) docs should
+  // be returned after retrieval, since each thread should retrieve 1, 2, 3,
+  // ..., kNumThreads pages. Thus, all retrieved ScoredDocumentHits should be
+  // removed from the cache and num_total_hits should be decremented correctly.
+  int expected_remaining_hits =
+      kNumThreads * kNumDocuments -
+      kNumPerPage * (kNumThreads * (kNumThreads + 1) / 2);
+  EXPECT_THAT(result_state_manager.num_total_hits(),
+              Eq(expected_remaining_hits));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/result-state-v2.cc b/icing/result/result-state-v2.cc
new file mode 100644
index 0000000..3aa9359
--- /dev/null
+++ b/icing/result/result-state-v2.cc
@@ -0,0 +1,84 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/result-state-v2.h"
+
+#include <atomic>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "icing/proto/search.pb.h"
+#include "icing/result/result-adjustment-info.h"
+#include "icing/scoring/scored-document-hits-ranker.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+ResultStateV2::ResultStateV2(
+    std::unique_ptr<ScoredDocumentHitsRanker> scored_document_hits_ranker_in,
+    std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info,
+    std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info,
+    const ResultSpecProto& result_spec, const DocumentStore& document_store)
+    : scored_document_hits_ranker(std::move(scored_document_hits_ranker_in)),
+      num_returned(0),
+      parent_adjustment_info_(std::move(parent_adjustment_info)),
+      child_adjustment_info_(std::move(child_adjustment_info)),
+      num_per_page_(result_spec.num_per_page()),
+      num_total_bytes_per_page_threshold_(
+          result_spec.num_total_bytes_per_page_threshold()),
+      max_joined_children_per_parent_to_return_(
+          result_spec.max_joined_children_per_parent_to_return()),
+      num_total_hits_(nullptr),
+      result_group_type_(result_spec.result_group_type()) {
+  for (const ResultSpecProto::ResultGrouping& result_grouping :
+       result_spec.result_groupings()) {
+    int group_id = group_result_limits.size();
+    group_result_limits.push_back(result_grouping.max_results());
+    for (const ResultSpecProto::ResultGrouping::Entry& entry :
+         result_grouping.entry_groupings()) {
+      const std::string& name_space = entry.namespace_();
+      const std::string& schema = entry.schema();
+      auto entry_id_or = document_store.GetResultGroupingEntryId(
+          result_group_type_, name_space, schema);
+      if (!entry_id_or.ok()) {
+        continue;
+      }
+      int32_t entry_id = entry_id_or.ValueOrDie();
+      entry_id_group_id_map_.insert({entry_id, group_id});
+    }
+  }
+}
+
+ResultStateV2::~ResultStateV2() {
+  IncrementNumTotalHits(-1 * scored_document_hits_ranker->size());
+}
+
+void ResultStateV2::RegisterNumTotalHits(std::atomic<int>* num_total_hits) {
+  // Decrement the original num_total_hits_ before registering a new one.
+  IncrementNumTotalHits(-1 * scored_document_hits_ranker->size());
+  num_total_hits_ = num_total_hits;
+  IncrementNumTotalHits(scored_document_hits_ranker->size());
+}
+
+void ResultStateV2::IncrementNumTotalHits(int increment_by) {
+  if (num_total_hits_ != nullptr) {
+    *num_total_hits_ += increment_by;
+  }
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/result-state-v2.h b/icing/result/result-state-v2.h
new file mode 100644
index 0000000..919710e
--- /dev/null
+++ b/icing/result/result-state-v2.h
@@ -0,0 +1,175 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_RESULT_RESULT_STATE_V2_H_
+#define ICING_RESULT_RESULT_STATE_V2_H_
+
+#include <atomic>
+#include <cstdint>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "icing/absl_ports/mutex.h"
+#include "icing/absl_ports/thread_annotations.h"
+#include "icing/proto/search.pb.h"
+#include "icing/result/result-adjustment-info.h"
+#include "icing/scoring/scored-document-hits-ranker.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+// Used to hold information needed across multiple pagination requests of the
+// same query. Stored in ResultStateManager.
+class ResultStateV2 {
+ public:
+  explicit ResultStateV2(
+      std::unique_ptr<ScoredDocumentHitsRanker> scored_document_hits_ranker_in,
+      std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info,
+      std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info,
+      const ResultSpecProto& result_spec, const DocumentStore& document_store);
+
+  ~ResultStateV2();
+
+  // Register num_total_hits_ and add current scored_document_hits_ranker.size()
+  // to it. When re-registering, it will subtract
+  // scored_document_hits_ranker.size() from the original counter.
+  void RegisterNumTotalHits(std::atomic<int>* num_total_hits)
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex);
+
+  // Increment the global counter num_total_hits_ by increment_by, if
+  // num_total_hits_ has been registered (is not nullptr).
+  // Note that providing a negative value for increment_by is a valid usage,
+  // which will actually decrement num_total_hits_.
+  //
+  // It has to be called when we change scored_document_hits_ranker.
+  void IncrementNumTotalHits(int increment_by)
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex);
+
+  // Returns a nullable pointer to parent adjustment info.
+  ResultAdjustmentInfo* parent_adjustment_info()
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex) {
+    return parent_adjustment_info_.get();
+  }
+
+  // Returns a nullable pointer to parent adjustment info.
+  const ResultAdjustmentInfo* parent_adjustment_info() const
+      ICING_SHARED_LOCKS_REQUIRED(mutex) {
+    return parent_adjustment_info_.get();
+  }
+
+  // Returns a nullable pointer to child adjustment info.
+  ResultAdjustmentInfo* child_adjustment_info()
+      ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex) {
+    return child_adjustment_info_.get();
+  }
+
+  // Returns a nullable pointer to child adjustment info.
+  const ResultAdjustmentInfo* child_adjustment_info() const
+      ICING_SHARED_LOCKS_REQUIRED(mutex) {
+    return child_adjustment_info_.get();
+  }
+
+  const std::unordered_map<int32_t, int>& entry_id_group_id_map() const
+      ICING_SHARED_LOCKS_REQUIRED(mutex) {
+    return entry_id_group_id_map_;
+  }
+
+  int32_t num_per_page() const ICING_SHARED_LOCKS_REQUIRED(mutex) {
+    return num_per_page_;
+  }
+
+  int32_t num_total_bytes_per_page_threshold() const
+      ICING_SHARED_LOCKS_REQUIRED(mutex) {
+    return num_total_bytes_per_page_threshold_;
+  }
+
+  int32_t max_joined_children_per_parent_to_return() const
+      ICING_SHARED_LOCKS_REQUIRED(mutex) {
+    return max_joined_children_per_parent_to_return_;
+  }
+
+  ResultSpecProto::ResultGroupingType result_group_type()
+      ICING_SHARED_LOCKS_REQUIRED(mutex) {
+    return result_group_type_;
+  }
+
+  absl_ports::shared_mutex mutex;
+
+  // When evaluating the next top K hits from scored_document_hits_ranker, some
+  // of them may be filtered out by group_result_limits and won't return to the
+  // client, so they shouldn't be counted into num_returned. Also the logic of
+  // group result limiting depends on retrieval, so it is impossible for
+  // ResultState itself to correctly modify these fields. Thus, we make them
+  // public, so users of this class can modify them directly.
+
+  // The scored document hits ranker.
+  std::unique_ptr<ScoredDocumentHitsRanker> scored_document_hits_ranker
+      ICING_GUARDED_BY(mutex);
+
+  // The count of remaining results to return for a group where group id is the
+  // index.
+  std::vector<int> group_result_limits ICING_GUARDED_BY(mutex);
+
+  // Number of results that have already been returned.
+  int num_returned ICING_GUARDED_BY(mutex);
+
+ private:
+  // Adjustment information for parent documents, including snippet and
+  // projection. Can be nullptr if there is no adjustment info for parent
+  // documents.
+  std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info_
+      ICING_GUARDED_BY(mutex);
+
+  // Adjustment information for child documents, including snippet and
+  // projection. This is only used for join query. Can be nullptr if there is no
+  // adjustment info for child documents.
+  std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info_
+      ICING_GUARDED_BY(mutex);
+
+  // A map between result grouping entry id and the id of the group that it
+  // appears in.
+  std::unordered_map<int32_t, int> entry_id_group_id_map_
+      ICING_GUARDED_BY(mutex);
+
+  // Number of results to return in each page.
+  int32_t num_per_page_ ICING_GUARDED_BY(mutex);
+
+  // The threshold of total bytes of all documents to cutoff, in order to limit
+  // # of bytes in a single page.
+  // Note that it doesn't guarantee the result # of bytes will be smaller, equal
+  // to, or larger than the threshold. Instead, it is just a threshold to
+  // cutoff, and only guarantees total bytes of search results won't exceed the
+  // threshold too much.
+  int32_t num_total_bytes_per_page_threshold_ ICING_GUARDED_BY(mutex);
+
+  // Max # of joined child documents to be attached in the result for each
+  // parent document.
+  int32_t max_joined_children_per_parent_to_return_ ICING_GUARDED_BY(mutex);
+
+  // Pointer to a global counter to sum up the size of scored_document_hits in
+  // all ResultStates.
+  // Does not own.
+  std::atomic<int>* num_total_hits_ ICING_GUARDED_BY(mutex);
+
+  // Value that the search results will get grouped by.
+  ResultSpecProto::ResultGroupingType result_group_type_
+      ICING_GUARDED_BY(mutex);
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_RESULT_RESULT_STATE_V2_H_
diff --git a/icing/result/result-state-v2_test.cc b/icing/result/result-state-v2_test.cc
new file mode 100644
index 0000000..0f88023
--- /dev/null
+++ b/icing/result/result-state-v2_test.cc
@@ -0,0 +1,409 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/result-state-v2.h"
+
+#include <atomic>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/mutex.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/clock.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Pair;
+using ::testing::UnorderedElementsAre;
+
+ResultSpecProto CreateResultSpec(
+    int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) {
+  ResultSpecProto result_spec;
+  result_spec.set_result_group_type(result_group_type);
+  result_spec.set_num_per_page(num_per_page);
+  return result_spec;
+}
+
+class ResultStateV2Test : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    schema_store_base_dir_ = GetTestTempDir() + "/schema_store";
+    filesystem_.CreateDirectoryRecursively(schema_store_base_dir_.c_str());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_base_dir_, &clock_));
+    SchemaProto schema;
+    schema.add_types()->set_schema_type("Document");
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    doc_store_base_dir_ = GetTestTempDir() + "/document_store";
+    filesystem_.CreateDirectoryRecursively(doc_store_base_dir_.c_str());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult result,
+        DocumentStore::Create(
+            &filesystem_, doc_store_base_dir_, &clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    document_store_ = std::move(result.document_store);
+
+    num_total_hits_ = 0;
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(doc_store_base_dir_.c_str());
+    filesystem_.DeleteDirectoryRecursively(schema_store_base_dir_.c_str());
+  }
+
+  ScoredDocumentHit AddScoredDocument(DocumentId document_id) {
+    DocumentProto document;
+    document.set_namespace_("namespace");
+    document.set_uri(std::to_string(document_id));
+    document.set_schema("Document");
+    document_store_->Put(std::move(document));
+    return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1);
+  }
+
+  DocumentStore& document_store() { return *document_store_; }
+
+  std::atomic<int>& num_total_hits() { return num_total_hits_; }
+
+  const std::atomic<int>& num_total_hits() const { return num_total_hits_; }
+
+ private:
+  Filesystem filesystem_;
+  std::string doc_store_base_dir_;
+  std::string schema_store_base_dir_;
+  Clock clock_;
+  std::unique_ptr<DocumentStore> document_store_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::atomic<int> num_total_hits_;
+};
+
+TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToSpecs) {
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+  result_spec.set_num_total_bytes_per_page_threshold(4096);
+  result_spec.set_max_joined_children_per_parent_to_return(2048);
+
+  // Adjustment info is not important in this test.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::vector<ScoredDocumentHit>(), /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, document_store());
+
+  absl_ports::shared_lock l(&result_state.mutex);
+
+  EXPECT_THAT(result_state.num_returned, Eq(0));
+  EXPECT_THAT(result_state.num_per_page(), Eq(result_spec.num_per_page()));
+  EXPECT_THAT(result_state.num_total_bytes_per_page_threshold(),
+              Eq(result_spec.num_total_bytes_per_page_threshold()));
+  EXPECT_THAT(result_state.max_joined_children_per_parent_to_return(),
+              Eq(result_spec.max_joined_children_per_parent_to_return()));
+}
+
+TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToDefaultSpecs) {
+  ResultSpecProto default_result_spec = ResultSpecProto::default_instance();
+  ASSERT_THAT(default_result_spec.num_per_page(), Eq(10));
+  ASSERT_THAT(default_result_spec.num_total_bytes_per_page_threshold(),
+              Eq(std::numeric_limits<int32_t>::max()));
+
+  // Adjustment info is not important in this test.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::vector<ScoredDocumentHit>(),
+          /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      default_result_spec, document_store());
+
+  absl_ports::shared_lock l(&result_state.mutex);
+
+  EXPECT_THAT(result_state.num_returned, Eq(0));
+  EXPECT_THAT(result_state.num_per_page(),
+              Eq(default_result_spec.num_per_page()));
+  EXPECT_THAT(result_state.num_total_bytes_per_page_threshold(),
+              Eq(default_result_spec.num_total_bytes_per_page_threshold()));
+  EXPECT_THAT(
+      result_state.max_joined_children_per_parent_to_return(),
+      Eq(default_result_spec.max_joined_children_per_parent_to_return()));
+}
+
+TEST_F(ResultStateV2Test,
+       ShouldConstructNamespaceGroupIdMapAndGroupResultLimitsAccordingToSpecs) {
+  // Create 3 docs under namespace1, namespace2, namespace3.
+  DocumentProto document1;
+  document1.set_namespace_("namespace1");
+  document1.set_uri("uri/1");
+  document1.set_schema("Document");
+  ICING_ASSERT_OK(document_store().Put(std::move(document1)));
+
+  DocumentProto document2;
+  document2.set_namespace_("namespace2");
+  document2.set_uri("uri/2");
+  document2.set_schema("Document");
+  ICING_ASSERT_OK(document_store().Put(std::move(document2)));
+
+  DocumentProto document3;
+  document3.set_namespace_("namespace3");
+  document3.set_uri("uri/3");
+  document3.set_schema("Document");
+  ICING_ASSERT_OK(document_store().Put(std::move(document3)));
+
+  // Create a ResultSpec that limits "namespace1" to 3 results and limits
+  // "namespace2"+"namespace3" to a total of 2 results. Also add
+  // "nonexistentNamespace1" and "nonexistentNamespace2" to test the behavior.
+  ResultSpecProto::ResultGroupingType result_grouping_type =
+      ResultSpecProto::NAMESPACE;
+  ResultSpecProto result_spec =
+      CreateResultSpec(/*num_per_page=*/5, result_grouping_type);
+  ResultSpecProto::ResultGrouping* result_grouping =
+      result_spec.add_result_groupings();
+  ResultSpecProto::ResultGrouping::Entry* entry =
+      result_grouping->add_entry_groupings();
+  result_grouping->set_max_results(3);
+  entry->set_namespace_("namespace1");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(5);
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("nonexistentNamespace2");
+  result_grouping = result_spec.add_result_groupings();
+  result_grouping->set_max_results(2);
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace2");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("namespace3");
+  entry = result_grouping->add_entry_groupings();
+  entry->set_namespace_("nonexistentNamespace1");
+
+  // Get entry ids.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      int32_t entry_id1, document_store().GetResultGroupingEntryId(
+                             result_grouping_type, "namespace1", "Document"));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      int32_t entry_id2, document_store().GetResultGroupingEntryId(
+                             result_grouping_type, "namespace2", "Document"));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      int32_t entry_id3, document_store().GetResultGroupingEntryId(
+                             result_grouping_type, "namespace3", "Document"));
+
+  // Adjustment info is not important in this test.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::vector<ScoredDocumentHit>(),
+          /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      result_spec, document_store());
+
+  absl_ports::shared_lock l(&result_state.mutex);
+
+  // "namespace1" should be in group 0, and "namespace2"+"namespace3" should be
+  // in group 2.
+  // "nonexistentNamespace1" and "nonexistentNamespace2" shouldn't exist.
+  EXPECT_THAT(result_state.entry_id_group_id_map(),
+              UnorderedElementsAre(Pair(entry_id1, 0), Pair(entry_id2, 2),
+                                   Pair(entry_id3, 2)));
+
+  // group_result_limits should contain 3 (at index 0 for group 0), 5 (at index
+  // 1 for group 1), 2 (at index 2 for group 2), even though there is no valid
+  // namespace in group 1.
+  EXPECT_THAT(result_state.group_result_limits, ElementsAre(3, 5, 2));
+}
+
+TEST_F(ResultStateV2Test, ShouldUpdateNumTotalHits) {
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      AddScoredDocument(/*document_id=*/1),
+      AddScoredDocument(/*document_id=*/0),
+      AddScoredDocument(/*document_id=*/2),
+      AddScoredDocument(/*document_id=*/4),
+      AddScoredDocument(/*document_id=*/3)};
+
+  // Adjustment info is not important in this test.
+  // Creates a ResultState with 5 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits),
+          /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE),
+      document_store());
+
+  absl_ports::unique_lock l(&result_state.mutex);
+
+  EXPECT_THAT(num_total_hits(), Eq(0));
+  result_state.RegisterNumTotalHits(&num_total_hits());
+  EXPECT_THAT(num_total_hits(), Eq(5));
+  result_state.IncrementNumTotalHits(500);
+  EXPECT_THAT(num_total_hits(), Eq(505));
+}
+
+TEST_F(ResultStateV2Test, ShouldUpdateNumTotalHitsWhenDestructed) {
+  std::vector<ScoredDocumentHit> scored_document_hits1 = {
+      AddScoredDocument(/*document_id=*/1),
+      AddScoredDocument(/*document_id=*/0),
+      AddScoredDocument(/*document_id=*/2),
+      AddScoredDocument(/*document_id=*/4),
+      AddScoredDocument(/*document_id=*/3)};
+
+  std::vector<ScoredDocumentHit> scored_document_hits2 = {
+      AddScoredDocument(/*document_id=*/6),
+      AddScoredDocument(/*document_id=*/5)};
+
+  num_total_hits() = 2;
+  {
+    // Adjustment info is not important in this test.
+    // Creates a ResultState with 5 ScoredDocumentHits.
+    ResultStateV2 result_state1(
+        std::make_unique<
+            PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+            std::move(scored_document_hits1),
+            /*is_descending=*/true),
+        /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+        CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE),
+        document_store());
+
+    absl_ports::unique_lock l(&result_state1.mutex);
+
+    result_state1.RegisterNumTotalHits(&num_total_hits());
+    ASSERT_THAT(num_total_hits(), Eq(7));
+
+    {
+      // Adjustment info is not important in this test.
+      // Creates another ResultState with 2 ScoredDocumentHits.
+      ResultStateV2 result_state2(
+          std::make_unique<
+              PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+              std::move(scored_document_hits2),
+              /*is_descending=*/true),
+          /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+          CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE),
+          document_store());
+
+      absl_ports::unique_lock l(&result_state2.mutex);
+
+      result_state2.RegisterNumTotalHits(&num_total_hits());
+      ASSERT_THAT(num_total_hits(), Eq(9));
+    }
+
+    EXPECT_THAT(num_total_hits(), Eq(7));
+  }
+  EXPECT_THAT(num_total_hits(), Eq(2));
+}
+
+TEST_F(ResultStateV2Test, ShouldNotUpdateNumTotalHitsWhenNotRegistered) {
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      AddScoredDocument(/*document_id=*/1),
+      AddScoredDocument(/*document_id=*/0),
+      AddScoredDocument(/*document_id=*/2),
+      AddScoredDocument(/*document_id=*/4),
+      AddScoredDocument(/*document_id=*/3)};
+
+  // Creates a ResultState with 5 ScoredDocumentHits.
+  {
+    // Adjustment info is not important in this test.
+    ResultStateV2 result_state(
+        std::make_unique<
+            PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+            std::move(scored_document_hits),
+            /*is_descending=*/true),
+        /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+        CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE),
+        document_store());
+
+    {
+      absl_ports::unique_lock l(&result_state.mutex);
+
+      EXPECT_THAT(num_total_hits(), Eq(0));
+      result_state.IncrementNumTotalHits(500);
+      EXPECT_THAT(num_total_hits(), Eq(0));
+    }
+  }
+  EXPECT_THAT(num_total_hits(), Eq(0));
+}
+
+TEST_F(ResultStateV2Test, ShouldDecrementOriginalNumTotalHitsWhenReregister) {
+  std::atomic<int> another_num_total_hits = 11;
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      AddScoredDocument(/*document_id=*/1),
+      AddScoredDocument(/*document_id=*/0),
+      AddScoredDocument(/*document_id=*/2),
+      AddScoredDocument(/*document_id=*/4),
+      AddScoredDocument(/*document_id=*/3)};
+
+  // Adjustment info is not important in this test.
+  // Creates a ResultState with 5 ScoredDocumentHits.
+  ResultStateV2 result_state(
+      std::make_unique<
+          PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+          std::move(scored_document_hits),
+          /*is_descending=*/true),
+      /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+      CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE),
+      document_store());
+
+  absl_ports::unique_lock l(&result_state.mutex);
+
+  num_total_hits() = 7;
+  result_state.RegisterNumTotalHits(&num_total_hits());
+  EXPECT_THAT(num_total_hits(), Eq(12));
+
+  result_state.RegisterNumTotalHits(&another_num_total_hits);
+  // The original num_total_hits should be decremented after re-registration.
+  EXPECT_THAT(num_total_hits(), Eq(7));
+  // another_num_total_hits should be incremented after re-registration.
+  EXPECT_THAT(another_num_total_hits, Eq(16));
+
+  result_state.IncrementNumTotalHits(500);
+  // The original num_total_hits should be unchanged.
+  EXPECT_THAT(num_total_hits(), Eq(7));
+  // Increment should be done on another_num_total_hits.
+  EXPECT_THAT(another_num_total_hits, Eq(516));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/result-state.cc b/icing/result/result-state.cc
deleted file mode 100644
index bf28f52..0000000
--- a/icing/result/result-state.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/result/result-state.h"
-
-#include "icing/scoring/ranker.h"
-#include "icing/util/logging.h"
-
-namespace icing {
-namespace lib {
-
-SnippetContext CreateSnippetContext(SectionRestrictQueryTermsMap query_terms,
-                                    const SearchSpecProto& search_spec,
-                                    const ResultSpecProto& result_spec) {
-  if (result_spec.snippet_spec().num_to_snippet() > 0 &&
-      result_spec.snippet_spec().num_matches_per_property() > 0) {
-    // Needs snippeting
-    return SnippetContext(std::move(query_terms), result_spec.snippet_spec(),
-                          search_spec.term_match_type());
-  }
-  return SnippetContext(/*query_terms_in=*/{},
-                        ResultSpecProto::SnippetSpecProto::default_instance(),
-                        TermMatchType::UNKNOWN);
-}
-
-ResultState::ResultState(std::vector<ScoredDocumentHit> scored_document_hits,
-                         SectionRestrictQueryTermsMap query_terms,
-                         const SearchSpecProto& search_spec,
-                         const ScoringSpecProto& scoring_spec,
-                         const ResultSpecProto& result_spec)
-    : scored_document_hits_(std::move(scored_document_hits)),
-      snippet_context_(CreateSnippetContext(std::move(query_terms), search_spec,
-                                            result_spec)),
-      num_per_page_(result_spec.num_per_page()),
-      num_returned_(0),
-      scored_document_hit_comparator_(scoring_spec.order_by() ==
-                                      ScoringSpecProto::Order::DESC) {
-  BuildHeapInPlace(&scored_document_hits_, scored_document_hit_comparator_);
-}
-
-std::vector<ScoredDocumentHit> ResultState::GetNextPage() {
-  std::vector<ScoredDocumentHit> scored_document_hits = PopTopResultsFromHeap(
-      &scored_document_hits_, num_per_page_, scored_document_hit_comparator_);
-  num_returned_ += scored_document_hits.size();
-  return scored_document_hits;
-}
-
-void ResultState::TruncateHitsTo(int new_size) {
-  if (new_size < 0 || scored_document_hits_.size() <= new_size) {
-    return;
-  }
-
-  // Copying the best new_size results.
-  scored_document_hits_ = PopTopResultsFromHeap(
-      &scored_document_hits_, new_size, scored_document_hit_comparator_);
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/result/result-state.h b/icing/result/result-state.h
deleted file mode 100644
index 82e783b..0000000
--- a/icing/result/result-state.h
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_RESULT_RESULT_STATE_H_
-#define ICING_RESULT_RESULT_STATE_H_
-
-#include <vector>
-
-#include "icing/proto/scoring.pb.h"
-#include "icing/proto/search.pb.h"
-#include "icing/result/snippet-context.h"
-#include "icing/scoring/scored-document-hit.h"
-
-namespace icing {
-namespace lib {
-
-// Used to hold information needed across multiple pagination requests of the
-// same query. Stored in ResultStateManager.
-class ResultState {
- public:
-  explicit ResultState(std::vector<ScoredDocumentHit> scored_document_hits,
-                       SectionRestrictQueryTermsMap query_terms,
-                       const SearchSpecProto& search_spec,
-                       const ScoringSpecProto& scoring_spec,
-                       const ResultSpecProto& result_spec);
-
-  // Returns the next page of results. The size of page is passed in from
-  // ResultSpecProto in constructor. Calling this method could increase the
-  // value of num_returned(), so be careful of the order of calling these
-  // methods.
-  std::vector<ScoredDocumentHit> GetNextPage();
-
-  // Truncates the vector of ScoredDocumentHits to the given size. The best
-  // ScoredDocumentHits are kept.
-  void TruncateHitsTo(int new_size);
-
-  // Returns if the current state has more results to return.
-  bool HasMoreResults() const { return !scored_document_hits_.empty(); }
-
-  // Returns a SnippetContext generated from the specs passed in via
-  // constructor.
-  const SnippetContext& snippet_context() const { return snippet_context_; }
-
-  // The number of results that have already been returned. This number is
-  // increased when GetNextPage() is called.
-  int num_returned() const { return num_returned_; }
-
- private:
-  // The scored document hits. It represents a heap data structure when ranking
-  // is required so that we can get top K hits in O(KlgN) time. If no ranking is
-  // required, it's just a vector of ScoredDocumentHits in the original order.
-  std::vector<ScoredDocumentHit> scored_document_hits_;
-
-  // Information needed for snippeting.
-  SnippetContext snippet_context_;
-
-  // Number of results to return in each page.
-  int num_per_page_;
-
-  // Number of results that have already been returned.
-  int num_returned_;
-
-  // Used to compare two scored document hits.
-  ScoredDocumentHitComparator scored_document_hit_comparator_;
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_RESULT_RESULT_STATE_H_
diff --git a/icing/result/result-state_test.cc b/icing/result/result-state_test.cc
deleted file mode 100644
index 85cb242..0000000
--- a/icing/result/result-state_test.cc
+++ /dev/null
@@ -1,214 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/result/result-state.h"
-
-#include "gtest/gtest.h"
-#include "icing/portable/equals-proto.h"
-#include "icing/scoring/scored-document-hit.h"
-#include "icing/testing/common-matchers.h"
-
-namespace icing {
-namespace lib {
-namespace {
-using ::icing::lib::portable_equals_proto::EqualsProto;
-using ::testing::ElementsAre;
-using ::testing::Eq;
-using ::testing::IsEmpty;
-
-ScoredDocumentHit CreateScoredDocumentHit(DocumentId document_id) {
-  return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1);
-}
-
-SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
-  SearchSpecProto search_spec;
-  search_spec.set_term_match_type(match_type);
-  return search_spec;
-}
-
-ScoringSpecProto CreateScoringSpec(bool is_descending_order) {
-  ScoringSpecProto scoring_spec;
-  scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC
-                                                : ScoringSpecProto::Order::ASC);
-  return scoring_spec;
-}
-
-ResultSpecProto CreateResultSpec(int num_per_page) {
-  ResultSpecProto result_spec;
-  result_spec.set_num_per_page(num_per_page);
-  return result_spec;
-}
-
-// ResultState::ResultState() and ResultState::GetNextPage() are calling
-// Ranker::BuildHeapInPlace() and Ranker::PopTopResultsFromHeap() directly, so
-// we don't need to test much on what order is returned as that is tested in
-// Ranker's tests. Here we just need one sanity test to make sure that the
-// correct functions are called.
-TEST(ResultStateTest, ShouldReturnNextPage) {
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      CreateScoredDocumentHit(/*document_id=*/2),
-      CreateScoredDocumentHit(/*document_id=*/1),
-      CreateScoredDocumentHit(/*document_id=*/3),
-      CreateScoredDocumentHit(/*document_id=*/5),
-      CreateScoredDocumentHit(/*document_id=*/4)};
-
-  ResultState result_state(scored_document_hits, /*query_terms=*/{},
-                           CreateSearchSpec(TermMatchType::EXACT_ONLY),
-                           CreateScoringSpec(/*is_descending_order=*/true),
-                           CreateResultSpec(/*num_per_page=*/2));
-
-  EXPECT_THAT(
-      result_state.GetNextPage(),
-      ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4))));
-
-  EXPECT_THAT(
-      result_state.GetNextPage(),
-      ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2))));
-
-  EXPECT_THAT(result_state.GetNextPage(),
-              ElementsAre(EqualsScoredDocumentHit(
-                  CreateScoredDocumentHit(/*document_id=*/1))));
-}
-
-TEST(ResultStateTest, ShouldReturnSnippetContextAccordingToSpecs) {
-  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
-  result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
-  result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(5);
-
-  SectionRestrictQueryTermsMap query_terms_map;
-  query_terms_map.emplace("term1", std::unordered_set<std::string>());
-
-  ResultState result_state(
-      /*scored_document_hits=*/{}, query_terms_map,
-      CreateSearchSpec(TermMatchType::EXACT_ONLY),
-      CreateScoringSpec(/*is_descending_order=*/true), result_spec);
-
-  const SnippetContext& snippet_context = result_state.snippet_context();
-
-  // Snippet context should be derived from the specs above.
-  EXPECT_TRUE(snippet_context.query_terms.find("term1") !=
-              snippet_context.query_terms.end());
-  EXPECT_THAT(snippet_context.snippet_spec,
-              EqualsProto(result_spec.snippet_spec()));
-  EXPECT_THAT(snippet_context.match_type, Eq(TermMatchType::EXACT_ONLY));
-
-  // The same copy can be fetched multiple times.
-  const SnippetContext& snippet_context2 = result_state.snippet_context();
-  EXPECT_TRUE(snippet_context2.query_terms.find("term1") !=
-              snippet_context2.query_terms.end());
-  EXPECT_THAT(snippet_context2.snippet_spec,
-              EqualsProto(result_spec.snippet_spec()));
-  EXPECT_THAT(snippet_context2.match_type, Eq(TermMatchType::EXACT_ONLY));
-}
-
-TEST(ResultStateTest, NoSnippetingShouldReturnNull) {
-  ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
-  // Setting num_to_snippet to 0 so that snippeting info won't be
-  // stored.
-  result_spec.mutable_snippet_spec()->set_num_to_snippet(0);
-  result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(5);
-
-  SectionRestrictQueryTermsMap query_terms_map;
-  query_terms_map.emplace("term1", std::unordered_set<std::string>());
-
-  ResultState result_state(/*scored_document_hits=*/{}, query_terms_map,
-                           CreateSearchSpec(TermMatchType::EXACT_ONLY),
-                           CreateScoringSpec(/*is_descending_order=*/true),
-                           result_spec);
-
-  const SnippetContext& snippet_context = result_state.snippet_context();
-  EXPECT_THAT(snippet_context.query_terms, IsEmpty());
-  EXPECT_THAT(
-      snippet_context.snippet_spec,
-      EqualsProto(ResultSpecProto::SnippetSpecProto::default_instance()));
-  EXPECT_THAT(snippet_context.match_type, TermMatchType::UNKNOWN);
-}
-
-TEST(ResultStateTest, ShouldTruncateToNewSize) {
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      CreateScoredDocumentHit(/*document_id=*/2),
-      CreateScoredDocumentHit(/*document_id=*/1),
-      CreateScoredDocumentHit(/*document_id=*/3),
-      CreateScoredDocumentHit(/*document_id=*/5),
-      CreateScoredDocumentHit(/*document_id=*/4)};
-
-  // Creates a ResultState with 5 ScoredDocumentHits.
-  ResultState result_state(scored_document_hits, /*query_terms=*/{},
-                           CreateSearchSpec(TermMatchType::EXACT_ONLY),
-                           CreateScoringSpec(/*is_descending_order=*/true),
-                           CreateResultSpec(/*num_per_page=*/5));
-
-  result_state.TruncateHitsTo(/*new_size=*/3);
-  // The best 3 are left.
-  EXPECT_THAT(
-      result_state.GetNextPage(),
-      ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3))));
-}
-
-TEST(ResultStateTest, ShouldTruncateToZero) {
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      CreateScoredDocumentHit(/*document_id=*/2),
-      CreateScoredDocumentHit(/*document_id=*/1),
-      CreateScoredDocumentHit(/*document_id=*/3),
-      CreateScoredDocumentHit(/*document_id=*/5),
-      CreateScoredDocumentHit(/*document_id=*/4)};
-
-  // Creates a ResultState with 5 ScoredDocumentHits.
-  ResultState result_state(scored_document_hits, /*query_terms=*/{},
-                           CreateSearchSpec(TermMatchType::EXACT_ONLY),
-                           CreateScoringSpec(/*is_descending_order=*/true),
-                           CreateResultSpec(/*num_per_page=*/5));
-
-  result_state.TruncateHitsTo(/*new_size=*/0);
-  EXPECT_THAT(result_state.GetNextPage(), IsEmpty());
-}
-
-TEST(ResultStateTest, ShouldNotTruncateToNegative) {
-  std::vector<ScoredDocumentHit> scored_document_hits = {
-      CreateScoredDocumentHit(/*document_id=*/2),
-      CreateScoredDocumentHit(/*document_id=*/1),
-      CreateScoredDocumentHit(/*document_id=*/3),
-      CreateScoredDocumentHit(/*document_id=*/5),
-      CreateScoredDocumentHit(/*document_id=*/4)};
-
-  // Creates a ResultState with 5 ScoredDocumentHits.
-  ResultState result_state(scored_document_hits, /*query_terms=*/{},
-                           CreateSearchSpec(TermMatchType::EXACT_ONLY),
-                           CreateScoringSpec(/*is_descending_order=*/true),
-                           CreateResultSpec(/*num_per_page=*/5));
-
-  result_state.TruncateHitsTo(/*new_size=*/-1);
-  // Results are not affected.
-  EXPECT_THAT(
-      result_state.GetNextPage(),
-      ElementsAre(
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2)),
-          EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/1))));
-}
-
-}  // namespace
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/result/snippet-retriever-test-jni-layer.cc b/icing/result/snippet-retriever-test-jni-layer.cc
new file mode 100644
index 0000000..707d9ee
--- /dev/null
+++ b/icing/result/snippet-retriever-test-jni-layer.cc
@@ -0,0 +1,36 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <jni.h>
+
+#include "gtest/gtest.h"
+#include "icing/testing/logging-event-listener.h"
+
+// Global variable used so that the test implementation can access the JNIEnv.
+JNIEnv* g_jenv = nullptr;
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_icing_jni_SnippetRetrieverJniTest_testsMain(JNIEnv* env, jclass ignored) {
+  g_jenv = env;
+
+  std::vector<char*> my_argv;
+  char arg[] = "jni-test-lib";
+  my_argv.push_back(arg);
+  int argc = 1;
+  char** argv = &(my_argv[0]);
+  testing::InitGoogleTest(&argc, argv);
+  testing::UnitTest::GetInstance()->listeners().Append(
+      new icing::lib::LoggingEventListener());
+  return RUN_ALL_TESTS() == 0;
+}
diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc
index 09d0f7a..fcaba4c 100644
--- a/icing/result/snippet-retriever.cc
+++ b/icing/result/snippet-retriever.cc
@@ -15,6 +15,7 @@
 #include "icing/result/snippet-retriever.h"
 
 #include <algorithm>
+#include <iterator>
 #include <memory>
 #include <string>
 #include <string_view>
@@ -25,8 +26,12 @@
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/search.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/query/query-terms.h"
+#include "icing/schema/property-util.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-filter-data.h"
@@ -35,7 +40,9 @@
 #include "icing/tokenization/tokenizer-factory.h"
 #include "icing/tokenization/tokenizer.h"
 #include "icing/transform/normalizer.h"
+#include "icing/util/character-iterator.h"
 #include "icing/util/i18n-utils.h"
+#include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
@@ -43,10 +50,166 @@ namespace lib {
 
 namespace {
 
+inline std::string AddIndexToPath(int values_size, int index,
+                                  const std::string& property_path) {
+  if (values_size == 1) {
+    return property_path;
+  }
+  return absl_ports::StrCat(
+      property_path, property_util::ConvertToPropertyExprIndexStr(index));
+}
+
+// Returns a string of the normalized text of the input Token. Normalization
+// is applied based on the Token's type.
+std::string NormalizeToken(const Normalizer& normalizer, const Token& token) {
+  switch (token.type) {
+    case Token::Type::RFC822_NAME:
+      [[fallthrough]];
+    case Token::Type::RFC822_COMMENT:
+      [[fallthrough]];
+    case Token::Type::RFC822_LOCAL_ADDRESS:
+      [[fallthrough]];
+    case Token::Type::RFC822_HOST_ADDRESS:
+      [[fallthrough]];
+    case Token::Type::RFC822_ADDRESS:
+      [[fallthrough]];
+    case Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL:
+      [[fallthrough]];
+    case Token::Type::RFC822_ADDRESS_COMPONENT_HOST:
+      [[fallthrough]];
+    case Token::Type::RFC822_TOKEN:
+      [[fallthrough]];
+    case Token::Type::URL_SCHEME:
+      [[fallthrough]];
+    case Token::Type::URL_USERNAME:
+      [[fallthrough]];
+    case Token::Type::URL_PASSWORD:
+      [[fallthrough]];
+    case Token::Type::URL_HOST_COMMON_PART:
+      [[fallthrough]];
+    case Token::Type::URL_HOST_SIGNIFICANT_PART:
+      [[fallthrough]];
+    case Token::Type::URL_PORT:
+      [[fallthrough]];
+    case Token::Type::URL_PATH_PART:
+      [[fallthrough]];
+    case Token::Type::URL_QUERY:
+      [[fallthrough]];
+    case Token::Type::URL_REF:
+      [[fallthrough]];
+    case Token::Type::URL_SUFFIX:
+      [[fallthrough]];
+    case Token::Type::URL_SUFFIX_INNERMOST:
+      [[fallthrough]];
+    case Token::Type::REGULAR:
+      return normalizer.NormalizeTerm(token.text);
+    case Token::Type::VERBATIM:
+      return std::string(token.text);
+    case Token::Type::QUERY_EXCLUSION:
+      [[fallthrough]];
+    case Token::Type::QUERY_LEFT_PARENTHESES:
+      [[fallthrough]];
+    case Token::Type::QUERY_RIGHT_PARENTHESES:
+      [[fallthrough]];
+    case Token::Type::QUERY_OR:
+      [[fallthrough]];
+    case Token::Type::QUERY_PROPERTY:
+      [[fallthrough]];
+    case Token::Type::INVALID:
+      ICING_LOG(WARNING) << "Unable to normalize token of type: "
+                         << static_cast<int>(token.type);
+      return std::string(token.text);
+  }
+}
+
+// Returns a CharacterIterator for token's text, advancing one past the last
+// matching character from the query term.
+CharacterIterator FindMatchEnd(const Normalizer& normalizer, const Token& token,
+                               const std::string& match_query_term) {
+  switch (token.type) {
+    case Token::Type::VERBATIM: {
+      // VERBATIM tokens are not normalized. This means the non-normalized
+      // matched query term must be either equal to or a prefix of the token's
+      // text. Therefore, the match must end at the end of the matched query
+      // term.
+      CharacterIterator verbatim_match_end =
+          CharacterIterator(token.text, 0, 0, 0);
+      verbatim_match_end.AdvanceToUtf8(match_query_term.length());
+      return verbatim_match_end;
+    }
+    case Token::Type::QUERY_EXCLUSION:
+      [[fallthrough]];
+    case Token::Type::QUERY_LEFT_PARENTHESES:
+      [[fallthrough]];
+    case Token::Type::QUERY_RIGHT_PARENTHESES:
+      [[fallthrough]];
+    case Token::Type::QUERY_OR:
+      [[fallthrough]];
+    case Token::Type::QUERY_PROPERTY:
+      [[fallthrough]];
+    case Token::Type::INVALID:
+      ICING_LOG(WARNING)
+          << "Unexpected Token type " << static_cast<int>(token.type)
+          << " found when finding match end of query term and token.";
+      [[fallthrough]];
+    case Token::Type::RFC822_NAME:
+      [[fallthrough]];
+    case Token::Type::RFC822_COMMENT:
+      [[fallthrough]];
+    case Token::Type::RFC822_LOCAL_ADDRESS:
+      [[fallthrough]];
+    case Token::Type::RFC822_HOST_ADDRESS:
+      [[fallthrough]];
+    case Token::Type::RFC822_ADDRESS:
+      [[fallthrough]];
+    case Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL:
+      [[fallthrough]];
+    case Token::Type::RFC822_ADDRESS_COMPONENT_HOST:
+      [[fallthrough]];
+    case Token::Type::RFC822_TOKEN:
+      [[fallthrough]];
+    case Token::Type::URL_SCHEME:
+      [[fallthrough]];
+    case Token::Type::URL_USERNAME:
+      [[fallthrough]];
+    case Token::Type::URL_PASSWORD:
+      [[fallthrough]];
+    case Token::Type::URL_HOST_COMMON_PART:
+      [[fallthrough]];
+    case Token::Type::URL_HOST_SIGNIFICANT_PART:
+      [[fallthrough]];
+    case Token::Type::URL_PORT:
+      [[fallthrough]];
+    case Token::Type::URL_QUERY:
+      [[fallthrough]];
+    case Token::Type::URL_PATH_PART:
+      [[fallthrough]];
+    case Token::Type::URL_REF:
+      [[fallthrough]];
+    case Token::Type::URL_SUFFIX:
+      [[fallthrough]];
+    case Token::Type::URL_SUFFIX_INNERMOST:
+      [[fallthrough]];
+    case Token::Type::REGULAR:
+      return normalizer.FindNormalizedMatchEndPosition(token.text,
+                                                       match_query_term);
+  }
+}
+
 class TokenMatcher {
  public:
   virtual ~TokenMatcher() = default;
-  virtual bool Matches(Token token) const = 0;
+
+  // Returns a CharacterIterator pointing just past the end of the substring in
+  // token.text that matches a query term. Note that the utf* indices will be
+  // in relation to token.text's start.
+  //
+  // If there is no match, then it will construct a CharacterIterator with all
+  // of its indices set to -1.
+  //
+  // Ex. With an exact matcher, query terms=["foo","bar"] and token.text="bar",
+  // Matches will return a CharacterIterator(u8:3, u16:3, u32:3).
+  virtual CharacterIterator Matches(Token token) const = 0;
 };
 
 class TokenMatcherExact : public TokenMatcher {
@@ -59,10 +222,18 @@ class TokenMatcherExact : public TokenMatcher {
         restricted_query_terms_(restricted_query_terms),
         normalizer_(normalizer) {}
 
-  bool Matches(Token token) const override {
-    std::string s = normalizer_.NormalizeTerm(token.text);
-    return (unrestricted_query_terms_.count(s) > 0) ||
-           (restricted_query_terms_.count(s) > 0);
+  CharacterIterator Matches(Token token) const override {
+    std::string s = NormalizeToken(normalizer_, token);
+    auto itr = unrestricted_query_terms_.find(s);
+    if (itr == unrestricted_query_terms_.end()) {
+      itr = restricted_query_terms_.find(s);
+    }
+    if (itr != unrestricted_query_terms_.end() &&
+        itr != restricted_query_terms_.end()) {
+      return FindMatchEnd(normalizer_, token, *itr);
+    }
+
+    return CharacterIterator(token.text, -1, -1, -1);
   }
 
  private:
@@ -81,22 +252,21 @@ class TokenMatcherPrefix : public TokenMatcher {
         restricted_query_terms_(restricted_query_terms),
         normalizer_(normalizer) {}
 
-  bool Matches(Token token) const override {
-    std::string s = normalizer_.NormalizeTerm(token.text);
-    if (std::any_of(unrestricted_query_terms_.begin(),
-                    unrestricted_query_terms_.end(),
-                    [&s](const std::string& term) {
-                      return term.length() <= s.length() &&
-                             s.compare(0, term.length(), term) == 0;
-                    })) {
-      return true;
+  CharacterIterator Matches(Token token) const override {
+    std::string s = NormalizeToken(normalizer_, token);
+    for (const std::string& query_term : unrestricted_query_terms_) {
+      if (query_term.length() <= s.length() &&
+          s.compare(0, query_term.length(), query_term) == 0) {
+        return FindMatchEnd(normalizer_, token, query_term);
+      }
     }
-    return std::any_of(restricted_query_terms_.begin(),
-                       restricted_query_terms_.end(),
-                       [&s](const std::string& term) {
-                         return term.length() <= s.length() &&
-                                s.compare(0, term.length(), term) == 0;
-                       });
+    for (const std::string& query_term : restricted_query_terms_) {
+      if (query_term.length() <= s.length() &&
+          s.compare(0, query_term.length(), query_term) == 0) {
+        return FindMatchEnd(normalizer_, token, query_term);
+      }
+    }
+    return CharacterIterator(token.text, -1, -1, -1);
   }
 
  private:
@@ -124,115 +294,165 @@ libtextclassifier3::StatusOr<std::unique_ptr<TokenMatcher>> CreateTokenMatcher(
   }
 }
 
-// Returns true if token matches any of the terms in query terms according to
-// the provided match type.
+// Finds the start position of a valid token that is after
+// window_start_min_exclusive_utf32
 //
 // Returns:
 //   the position of the window start if successful
 //   INTERNAL_ERROR - if a tokenizer error is encountered
-libtextclassifier3::StatusOr<int> DetermineWindowStart(
+libtextclassifier3::StatusOr<CharacterIterator> DetermineWindowStart(
     const ResultSpecProto::SnippetSpecProto& snippet_spec,
-    std::string_view value, int match_mid, Tokenizer::Iterator* iterator) {
-  int window_start_min = (match_mid - snippet_spec.max_window_bytes() / 2) - 1;
-  if (window_start_min < 0) {
-    return 0;
-  }
-  if (!iterator->ResetToTokenAfter(window_start_min)) {
+    std::string_view value, int window_start_min_exclusive_utf32,
+    Tokenizer::Iterator* iterator) {
+  if (!iterator->ResetToTokenStartingAfter(window_start_min_exclusive_utf32)) {
     return absl_ports::InternalError(
         "Couldn't reset tokenizer to determine snippet window!");
   }
-  return iterator->GetToken().text.data() - value.data();
+  return iterator->CalculateTokenStart();
 }
 
 // Increments window_end_exclusive so long as the character at the position
 // of window_end_exclusive is punctuation and does not exceed
-// window_end_max_exclusive.
-int IncludeTrailingPunctuation(std::string_view value, int window_end_exclusive,
-                               int window_end_max_exclusive) {
-  while (window_end_exclusive < window_end_max_exclusive) {
+// window_end_max_exclusive_utf32.
+CharacterIterator IncludeTrailingPunctuation(
+    std::string_view value, CharacterIterator window_end_exclusive,
+    int window_end_max_exclusive_utf32) {
+  size_t max_search_index = value.length() - 1;
+  while (window_end_exclusive.utf8_index() <= max_search_index &&
+         window_end_exclusive.utf32_index() < window_end_max_exclusive_utf32) {
     int char_len = 0;
-    if (!i18n_utils::IsPunctuationAt(value, window_end_exclusive, &char_len)) {
-      break;
-    }
-    if (window_end_exclusive + char_len > window_end_max_exclusive) {
-      // This is punctuation, but it goes beyond the window end max. Don't
-      // include it.
+    if (!i18n_utils::IsPunctuationAt(value, window_end_exclusive.utf8_index(),
+                                     &char_len)) {
       break;
     }
     // Expand window by char_len and check the next character.
-    window_end_exclusive += char_len;
+    window_end_exclusive.AdvanceToUtf32(window_end_exclusive.utf32_index() + 1);
   }
   return window_end_exclusive;
 }
 
+// Finds the end position of a valid token that is before the
+// window_end_max_exclusive_utf32.
+//
 // Returns:
 //   the position of the window end if successful
 //   INTERNAL_ERROR - if a tokenizer error is encountered
-libtextclassifier3::StatusOr<int> DetermineWindowEnd(
+libtextclassifier3::StatusOr<CharacterIterator> DetermineWindowEnd(
     const ResultSpecProto::SnippetSpecProto& snippet_spec,
-    std::string_view value, int match_mid, Tokenizer::Iterator* iterator) {
-  int window_end_max_exclusive =
-      match_mid + snippet_spec.max_window_bytes() / 2;
-  if (window_end_max_exclusive >= value.length()) {
-    return value.length();
-  }
-  if (!iterator->ResetToTokenBefore(window_end_max_exclusive)) {
+    std::string_view value, int window_end_max_exclusive_utf32,
+    Tokenizer::Iterator* iterator) {
+  if (!iterator->ResetToTokenEndingBefore(window_end_max_exclusive_utf32)) {
     return absl_ports::InternalError(
         "Couldn't reset tokenizer to determine snippet window!");
   }
-  int window_end_exclusive = iterator->GetToken().text.data() - value.data() +
-                             iterator->GetToken().text.length();
-  return IncludeTrailingPunctuation(value, window_end_exclusive,
-                                    window_end_max_exclusive);
+  ICING_ASSIGN_OR_RETURN(CharacterIterator end_exclusive,
+                         iterator->CalculateTokenEndExclusive());
+  return IncludeTrailingPunctuation(value, end_exclusive,
+                                    window_end_max_exclusive_utf32);
 }
 
 struct SectionData {
   std::string_view section_name;
   std::string_view section_subcontent;
-  // Identifies which subsection of the section content, section_subcontent has
-  // come from.
-  // Ex. "recipient.address" :
-  //       ["foo@google.com", "bar@google.com", "baz@google.com"]
-  // The subcontent_index of "bar@google.com" is 1.
-  int subcontent_index;
 };
 
+// Creates a snippet match proto for the match pointed to by the iterator,
+// between start_itr and end_itr
+// Returns:
+//   the position of the window start if successful
+//   INTERNAL_ERROR - if a tokenizer error is encountered and iterator is left
+//     in an invalid state
+//   ABORTED_ERROR - if an invalid utf-8 sequence is encountered
 libtextclassifier3::StatusOr<SnippetMatchProto> RetrieveMatch(
     const ResultSpecProto::SnippetSpecProto& snippet_spec,
-    const SectionData& value, Tokenizer::Iterator* iterator) {
+    const SectionData& value, Tokenizer::Iterator* iterator,
+    const CharacterIterator& start_itr, const CharacterIterator& end_itr) {
   SnippetMatchProto snippet_match;
-  snippet_match.set_values_index(value.subcontent_index);
-
-  Token match = iterator->GetToken();
-  int match_pos = match.text.data() - value.section_subcontent.data();
-  int match_mid = match_pos + match.text.length() / 2;
-
-  snippet_match.set_exact_match_position(match_pos);
-  snippet_match.set_exact_match_bytes(match.text.length());
-
-  if (snippet_spec.max_window_bytes() > match.text.length()) {
+  // When finding boundaries,  we have a few cases:
+  //
+  // Case 1:
+  //   If we have an odd length match an odd length window, the window surrounds
+  //   the match perfectly.
+  //     match  = "bar" in "foo bar baz"
+  //     window =              |---|
+  //
+  // Case 2:
+  //   If we have an even length match with an even length window, the window
+  //   surrounds the match perfectly.
+  //     match  = "baar" in "foo baar baz"
+  //     window =               |----|
+  //
+  // Case 3:
+  //   If we have an odd length match with an even length window, we allocate
+  //   that extra window byte to the beginning.
+  //     match  = "bar" in "foo bar baz"
+  //     window =             |----|
+  //
+  // Case 4:
+  //   If we have an even length match with an odd length window, we allocate
+  //   that extra window byte to the end.
+  //     match  = "baar" in "foo baar baz"
+  //     window =               |-----|
+  //
+  // We have do +1/-1 below to get the math to match up.
+  int match_pos_utf32 = start_itr.utf32_index();
+  int match_len_utf32 = end_itr.utf32_index() - match_pos_utf32;
+  int match_mid_utf32 = match_pos_utf32 + match_len_utf32 / 2;
+  int window_start_min_exclusive_utf32 =
+      (match_mid_utf32 - snippet_spec.max_window_utf32_length() / 2) - 1;
+  int window_end_max_exclusive_utf32 =
+      match_mid_utf32 + (snippet_spec.max_window_utf32_length() + 1) / 2;
+
+  snippet_match.set_exact_match_byte_position(start_itr.utf8_index());
+  snippet_match.set_exact_match_utf16_position(start_itr.utf16_index());
+  snippet_match.set_exact_match_byte_length(end_itr.utf8_index() -
+                                            start_itr.utf8_index());
+  snippet_match.set_exact_match_utf16_length(end_itr.utf16_index() -
+                                             start_itr.utf16_index());
+
+  // Only include windows if it'll at least include the matched text. Otherwise,
+  // it'll just be an empty string anyways.
+  if (snippet_spec.max_window_utf32_length() >= match_len_utf32) {
     // Find the beginning of the window.
     ICING_ASSIGN_OR_RETURN(
-        int window_start,
-        DetermineWindowStart(snippet_spec, value.section_subcontent, match_mid,
-                             iterator));
-    snippet_match.set_window_position(window_start);
+        CharacterIterator window_start,
+        DetermineWindowStart(snippet_spec, value.section_subcontent,
+                             window_start_min_exclusive_utf32, iterator));
+
+    // Check. Did we get fewer characters than we requested? If so, then add it
+    // on to the window_end.
+    int extra_window_space =
+        window_start.utf32_index() - 1 - window_start_min_exclusive_utf32;
+    window_end_max_exclusive_utf32 += extra_window_space;
 
     // Find the end of the window.
     ICING_ASSIGN_OR_RETURN(
-        int window_end_exclusive,
-        DetermineWindowEnd(snippet_spec, value.section_subcontent, match_mid,
-                           iterator));
-    snippet_match.set_window_bytes(window_end_exclusive - window_start);
-
-    // DetermineWindowStart/End may change the position of the iterator. So,
-    // reset the iterator back to the original position.
-    bool success = (match_pos > 0) ? iterator->ResetToTokenAfter(match_pos - 1)
-                                   : iterator->ResetToStart();
-    if (!success) {
-      return absl_ports::InternalError(
-          "Couldn't reset tokenizer to determine snippet window!");
+        CharacterIterator window_end,
+        DetermineWindowEnd(snippet_spec, value.section_subcontent,
+                           window_end_max_exclusive_utf32, iterator));
+
+    // Check one more time. Did we get fewer characters than we requested? If
+    // so, then see if we can push the start back again.
+    extra_window_space =
+        window_end_max_exclusive_utf32 - window_end.utf32_index();
+    if (extra_window_space > 0) {
+      window_start_min_exclusive_utf32 =
+          window_start.utf32_index() - 1 - extra_window_space;
+      ICING_ASSIGN_OR_RETURN(
+          window_start,
+          DetermineWindowStart(snippet_spec, value.section_subcontent,
+                               window_start_min_exclusive_utf32, iterator));
     }
+
+    snippet_match.set_window_byte_position(window_start.utf8_index());
+    snippet_match.set_window_utf16_position(window_start.utf16_index());
+    snippet_match.set_window_byte_length(window_end.utf8_index() -
+                                         window_start.utf8_index());
+    snippet_match.set_window_utf16_length(window_end.utf16_index() -
+                                          window_start.utf16_index());
+
+    // DetermineWindowStart/End may change the position of the iterator, but it
+    // will be reset once the entire batch of tokens is checked.
   }
 
   return snippet_match;
@@ -243,33 +463,184 @@ struct MatchOptions {
   int max_matches_remaining;
 };
 
-libtextclassifier3::StatusOr<SnippetProto::EntryProto> RetrieveMatches(
-    const TokenMatcher* matcher, const MatchOptions& match_options,
-    const SectionData& value, const Tokenizer* tokenizer) {
-  SnippetProto::EntryProto snippet_entry;
-  snippet_entry.set_property_name(std::string(value.section_name));
-  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
-                         tokenizer->Tokenize(value.section_subcontent));
-  while (iterator->Advance()) {
-    if (snippet_entry.snippet_matches_size() >=
-        match_options.max_matches_remaining) {
-      break;
+// Retrieves snippets in the string values of current_property.
+// Tokenizer is provided to tokenize string content and matcher is provided to
+// indicate when a token matches content in the query.
+//
+// current_property is the property with the string values to snippet.
+// property_path is the path in the document to current_property.
+//
+// MatchOptions holds the snippet spec and number of desired matches remaining.
+// Each call to GetEntriesFromProperty will decrement max_matches_remaining
+// by the number of entries that it adds to snippet_proto.
+//
+// The SnippetEntries found for matched content will be added to snippet_proto.
+void GetEntriesFromProperty(const PropertyProto* current_property,
+                            const std::string& property_path,
+                            const TokenMatcher* matcher,
+                            const Tokenizer* tokenizer,
+                            MatchOptions* match_options,
+                            SnippetProto* snippet_proto) {
+  // We're at the end. Let's check our values.
+  for (int i = 0; i < current_property->string_values_size(); ++i) {
+    SnippetProto::EntryProto snippet_entry;
+    snippet_entry.set_property_name(AddIndexToPath(
+        current_property->string_values_size(), /*index=*/i, property_path));
+    std::string_view value = current_property->string_values(i);
+    std::unique_ptr<Tokenizer::Iterator> iterator =
+        tokenizer->Tokenize(value).ValueOrDie();
+    // All iterators are moved through positions sequentially. Constructing them
+    // each time resets them to the beginning of the string. This means that,
+    // for t tokens and in a string of n chars, each MoveToUtf8 call from the
+    // beginning of the string is on average O(n/2), whereas calling MoveToUtf8
+    // from the token immediately prior to the desired one is O(n/t).
+    // Constructing each outside of the while-loop ensures that performance will
+    // be O(t * (n/t)) = O(n) rather than O(t * n / 2).
+    CharacterIterator start_itr(value);
+    CharacterIterator end_itr(value);
+    CharacterIterator reset_itr(value);
+    bool encountered_error = false;
+    while (iterator->Advance()) {
+      std::vector<Token> batch_tokens = iterator->GetTokens();
+      if (batch_tokens.empty()) {
+        continue;
+      }
+
+      bool needs_reset = false;
+      reset_itr.MoveToUtf8(batch_tokens.at(0).text.begin() - value.begin());
+      start_itr = reset_itr;
+      end_itr = start_itr;
+      for (int i = 0; i < batch_tokens.size(); ++i) {
+        const Token& token = batch_tokens.at(i);
+        CharacterIterator submatch_end = matcher->Matches(token);
+        // If the token matched a query term, then submatch_end will point to an
+        // actual position within token.text.
+        if (submatch_end.utf8_index() == -1) {
+          continue;
+        }
+        // As snippet matching may move iterator around, we save a reset
+        // iterator so that we can reset to the initial iterator state, and
+        // continue Advancing in order in the next round.
+        if (!start_itr.MoveToUtf8(token.text.begin() - value.begin())) {
+          encountered_error = true;
+          break;
+        }
+        if (!end_itr.MoveToUtf8(token.text.end() - value.begin())) {
+          encountered_error = true;
+          break;
+        }
+        SectionData data = {property_path, value};
+        auto match_or = RetrieveMatch(match_options->snippet_spec, data,
+                                      iterator.get(), start_itr, end_itr);
+        if (!match_or.ok()) {
+          if (absl_ports::IsAborted(match_or.status())) {
+            // Only an aborted. We can't get this match, but we might be able
+            // to retrieve others. Just continue.
+            continue;
+          } else {
+            encountered_error = true;
+            break;
+          }
+        }
+        SnippetMatchProto match = std::move(match_or).ValueOrDie();
+        if (match.window_byte_length() > 0) {
+          needs_reset = true;
+        }
+        // submatch_end refers to a position *within* token.text.
+        // This, conveniently enough, means that index that submatch_end
+        // points to is the length of the submatch (because the submatch
+        // starts at 0 in token.text).
+        match.set_submatch_byte_length(submatch_end.utf8_index());
+        match.set_submatch_utf16_length(submatch_end.utf16_index());
+        // Add the values for the submatch.
+        snippet_entry.mutable_snippet_matches()->Add(std::move(match));
+
+        if (--match_options->max_matches_remaining <= 0) {
+          *snippet_proto->add_entries() = std::move(snippet_entry);
+          return;
+        }
+      }
+
+      if (encountered_error) {
+        break;
+      }
+
+      // RetrieveMatch may call DetermineWindowStart/End if windowing is
+      // requested, which may change the position of the iterator. So, reset the
+      // iterator back to the original position. The first token of the token
+      // batch will be the token to reset to.
+      if (needs_reset) {
+        if (reset_itr.utf8_index() > 0) {
+          encountered_error =
+              !iterator->ResetToTokenStartingAfter(reset_itr.utf32_index() - 1);
+        } else {
+          encountered_error = !iterator->ResetToStart();
+        }
+      }
+      if (encountered_error) {
+        break;
+      }
     }
-    Token token = iterator->GetToken();
-    if (matcher->Matches(token)) {
-      // If there was an error while retrieving the match, the tokenizer
-      // iterator is probably in an invalid state. There's nothing we can do
-      // here, so just return.
-      ICING_ASSIGN_OR_RETURN(
-          SnippetMatchProto match,
-          RetrieveMatch(match_options.snippet_spec, value, iterator.get()));
-      snippet_entry.mutable_snippet_matches()->Add(std::move(match));
+    if (!snippet_entry.snippet_matches().empty()) {
+      *snippet_proto->add_entries() = std::move(snippet_entry);
     }
   }
-  if (snippet_entry.snippet_matches().empty()) {
-    return absl_ports::NotFoundError("No matches found in value!");
+}
+
+// Retrieves snippets in document from content at section_path.
+// Tokenizer is provided to tokenize string content and matcher is provided to
+// indicate when a token matches content in the query.
+//
+// section_path_index refers to the current property that is held by document.
+// current_path is equivalent to the first section_path_index values in
+// section_path, but with value indices present.
+//
+// For example, suppose that a hit appeared somewhere in the "bcc.emailAddress".
+// The arguments for RetrieveSnippetForSection might be
+// {section_path=["bcc", "emailAddress"], section_path_index=0, current_path=""}
+// on the first call and
+// {section_path=["bcc", "emailAddress"], section_path_index=1,
+// current_path="bcc[1]"} on the second recursive call.
+//
+// MatchOptions holds the snippet spec and number of desired matches remaining.
+// Each call to RetrieveSnippetForSection will decrement max_matches_remaining
+// by the number of entries that it adds to snippet_proto.
+//
+// The SnippetEntries found for matched content will be added to snippet_proto.
+void RetrieveSnippetForSection(
+    const DocumentProto& document, const TokenMatcher* matcher,
+    const Tokenizer* tokenizer,
+    const std::vector<std::string_view>& section_path, int section_path_index,
+    const std::string& current_path, MatchOptions* match_options,
+    SnippetProto* snippet_proto) {
+  std::string_view next_property_name = section_path.at(section_path_index);
+  const PropertyProto* current_property =
+      property_util::GetPropertyProto(document, next_property_name);
+  if (current_property == nullptr) {
+    ICING_VLOG(1) << "No property " << next_property_name << " found at path "
+                  << current_path;
+    return;
+  }
+  std::string property_path = property_util::ConcatenatePropertyPathExpr(
+      current_path, next_property_name);
+  if (section_path_index == section_path.size() - 1) {
+    // We're at the end. Let's check our values.
+    GetEntriesFromProperty(current_property, property_path, matcher, tokenizer,
+                           match_options, snippet_proto);
+  } else {
+    // Still got more to go. Let's look through our subdocuments.
+    std::vector<SnippetProto::EntryProto> entries;
+    for (int i = 0; i < current_property->document_values_size(); ++i) {
+      std::string new_path = AddIndexToPath(
+          current_property->document_values_size(), /*index=*/i, property_path);
+      RetrieveSnippetForSection(current_property->document_values(i), matcher,
+                                tokenizer, section_path, section_path_index + 1,
+                                new_path, match_options, snippet_proto);
+      if (match_options->max_matches_remaining <= 0) {
+        break;
+      }
+    }
   }
-  return snippet_entry;
 }
 
 }  // namespace
@@ -300,9 +671,13 @@ SnippetProto SnippetRetriever::RetrieveSnippet(
   const std::unordered_set<std::string>& unrestricted_set =
       (itr != query_terms.end()) ? itr->second : empty_set;
   while (section_id_mask != kSectionIdMaskNone) {
-    SectionId section_id = __builtin_ctz(section_id_mask);
+    SectionId section_id = __builtin_ctzll(section_id_mask);
     // Remove this section from the mask.
-    section_id_mask &= ~(1u << section_id);
+    section_id_mask &= ~(UINT64_C(1) << section_id);
+
+    MatchOptions match_options = {snippet_spec};
+    match_options.max_matches_remaining =
+        snippet_spec.num_matches_per_property();
 
     // Determine the section name and match type.
     auto section_metadata_or =
@@ -311,7 +686,9 @@ SnippetProto SnippetRetriever::RetrieveSnippet(
       continue;
     }
     const SectionMetadata* metadata = section_metadata_or.ValueOrDie();
-    MatchOptions match_options = {snippet_spec};
+    std::vector<std::string_view> section_path =
+        property_util::SplitPropertyPathExpr(metadata->path);
+
     // Match type must be as restrictive as possible. Prefix matches for a
     // snippet should only be included if both the query is Prefix and the
     // section has prefixes enabled.
@@ -330,37 +707,18 @@ SnippetProto SnippetRetriever::RetrieveSnippet(
     if (!matcher_or.ok()) {
       continue;
     }
-    match_options.max_matches_remaining =
-        snippet_spec.num_matches_per_property();
+    std::unique_ptr<TokenMatcher> matcher = std::move(matcher_or).ValueOrDie();
 
-    // Retrieve values and snippet them.
-    auto values_or = schema_store_.GetSectionContent(document, metadata->path);
-    if (!values_or.ok()) {
-      continue;
-    }
     auto tokenizer_or = tokenizer_factory::CreateIndexingTokenizer(
         metadata->tokenizer, &language_segmenter_);
     if (!tokenizer_or.ok()) {
       // If we couldn't create the tokenizer properly, just skip this section.
       continue;
     }
-    std::vector<std::string> values = values_or.ValueOrDie();
-    for (int value_index = 0; value_index < values.size(); ++value_index) {
-      if (match_options.max_matches_remaining <= 0) {
-        break;
-      }
-      SectionData value = {metadata->path, values.at(value_index), value_index};
-      auto entry_or =
-          RetrieveMatches(matcher_or.ValueOrDie().get(), match_options, value,
-                          tokenizer_or.ValueOrDie().get());
-
-      // Drop any entries that encountered errors or didn't find any matches.
-      if (entry_or.ok()) {
-        match_options.max_matches_remaining -=
-            entry_or.ValueOrDie().snippet_matches_size();
-        snippet_proto.mutable_entries()->Add(std::move(entry_or).ValueOrDie());
-      }
-    }
+    std::unique_ptr<Tokenizer> tokenizer = std::move(tokenizer_or).ValueOrDie();
+    RetrieveSnippetForSection(
+        document, matcher.get(), tokenizer.get(), section_path,
+        /*section_path_index=*/0, "", &match_options, &snippet_proto);
   }
   return snippet_proto;
 }
diff --git a/icing/result/snippet-retriever_benchmark.cc b/icing/result/snippet-retriever_benchmark.cc
new file mode 100644
index 0000000..e574325
--- /dev/null
+++ b/icing/result/snippet-retriever_benchmark.cc
@@ -0,0 +1,333 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "third_party/absl/flags/flag.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/result/snippet-retriever.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/util/clock.h"
+#include "icing/util/logging.h"
+#include "unicode/uloc.h"
+
+// Run on a Linux workstation:
+//    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/result:snippet-retriever_benchmark
+//
+//    $ blaze-bin/icing/result/snippet-retriever_benchmark
+//    --benchmark_filter=all
+//
+// Run on an Android device:
+//    Make target //icing/tokenization:language-segmenter depend on
+//    //third_party/icu
+//
+//    Make target //icing/transform:normalizer depend on
+//    //third_party/icu
+//
+//    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//    --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/result:snippet-retriever_benchmark
+//
+//    $ adb push blaze-bin/icing/result/snippet-retriever_benchmark
+//    /data/local/tmp/
+//
+//    $ adb shell /data/local/tmp/snippet-retriever_benchmark
+//    --benchmark_filter=all --adb
+
+// Flag to tell the benchmark that it'll be run on an Android device via adb,
+// the benchmark will set up data files accordingly.
+ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device");
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::SizeIs;
+
+void BM_SnippetOneProperty(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
+        GetTestFilePath("icing/icu.dat")));
+  }
+
+  const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
+  const std::string schema_dir = base_dir + "/schema";
+  Filesystem filesystem;
+  filesystem.DeleteDirectoryRecursively(base_dir.c_str());
+  if (!filesystem.CreateDirectoryRecursively(schema_dir.c_str())) {
+    ICING_LOG(ERROR) << "Failed to create test directories";
+  }
+
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  std::unique_ptr<LanguageSegmenter> language_segmenter =
+      language_segmenter_factory::Create(std::move(options)).ValueOrDie();
+  std::unique_ptr<Normalizer> normalizer =
+      normalizer_factory::Create(
+          /*max_term_byte_size=*/std::numeric_limits<int>::max())
+          .ValueOrDie();
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("type1").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop1")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  Clock clock;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem, schema_dir, &clock));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  auto snippet_retriever =
+      SnippetRetriever::Create(schema_store.get(), language_segmenter.get(),
+                               normalizer.get())
+          .ValueOrDie();
+
+  int num_matches = state.range(0);
+  int total_terms = state.range(1);
+
+  std::default_random_engine random;
+  std::vector<std::string> language =
+      CreateLanguages(/*language_size=*/1000, &random);
+  std::uniform_int_distribution<size_t> uniform(0u, language.size() - 1);
+  std::uniform_real_distribution<double> uniform_double(0.0, 1.0);
+
+  std::string text;
+  int num_actual_matches = 0;
+  double match_chance;
+  while (total_terms-- > 0) {
+    std::string term;
+    match_chance = static_cast<double>(num_matches) / total_terms;
+    if (uniform_double(random) <= match_chance) {
+      --num_matches;
+      ++num_actual_matches;
+      term = "foo";
+    } else {
+      term = language.at(uniform(random));
+    }
+    absl_ports::StrAppend(&text, " ", term);
+  }
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "uri1")
+                               .SetSchema("type1")
+                               .AddStringProperty("prop1", text)
+                               .Build();
+  SectionRestrictQueryTermsMap query_terms = {{"", {"foo"}}};
+  ResultSpecProto::SnippetSpecProto snippet_spec;
+  snippet_spec.set_num_to_snippet(100000);
+  snippet_spec.set_num_matches_per_property(100000);
+  snippet_spec.set_max_window_utf32_length(64);
+
+  SectionIdMask section_id_mask = 0x01;
+  SnippetProto snippet_proto;
+  for (auto _ : state) {
+    snippet_proto = snippet_retriever->RetrieveSnippet(
+        query_terms, TERM_MATCH_PREFIX, snippet_spec, document,
+        section_id_mask);
+    ASSERT_THAT(snippet_proto.entries(), SizeIs(1));
+    ASSERT_THAT(snippet_proto.entries(0).snippet_matches(),
+                SizeIs(num_actual_matches));
+  }
+
+  // Destroy the schema store before the whole directory is removed because they
+  // persist data in destructor.
+  schema_store.reset();
+  filesystem.DeleteDirectoryRecursively(base_dir.c_str());
+}
+BENCHMARK(BM_SnippetOneProperty)
+    // Arguments: num_matches, total_terms
+    ->ArgPair(1, 1)
+    ->ArgPair(1, 16)          // single match
+    ->ArgPair(2, 16)          // ~10% matches
+    ->ArgPair(3, 16)          // ~20% matches
+    ->ArgPair(8, 16)          // 50% matches
+    ->ArgPair(16, 16)         // 100% matches
+    ->ArgPair(1, 128)         // single match
+    ->ArgPair(13, 128)        // ~10% matches
+    ->ArgPair(26, 128)        // ~20% matches
+    ->ArgPair(64, 128)        // 50% matches
+    ->ArgPair(128, 128)       // 100% matches
+    ->ArgPair(1, 512)         // single match
+    ->ArgPair(51, 512)        // ~10% matches
+    ->ArgPair(102, 512)       // ~20% matches
+    ->ArgPair(256, 512)       // 50% matches
+    ->ArgPair(512, 512)       // 100% matches
+    ->ArgPair(1, 1024)        // single match
+    ->ArgPair(102, 1024)      // ~10% matches
+    ->ArgPair(205, 1024)      // ~20% matches
+    ->ArgPair(512, 1024)      // 50% matches
+    ->ArgPair(1024, 1024)     // 100% matches
+    ->ArgPair(1, 4096)        // single match
+    ->ArgPair(410, 4096)      // ~10% matches
+    ->ArgPair(819, 4096)      // ~20% matches
+    ->ArgPair(2048, 4096)     // 50% matches
+    ->ArgPair(4096, 4096)     // 100% matches
+    ->ArgPair(1, 16384)       // single match
+    ->ArgPair(1638, 16384)    // ~10% matches
+    ->ArgPair(3277, 16384)    // ~20% matches
+    ->ArgPair(8192, 16384)    // 50% matches
+    ->ArgPair(16384, 16384);  // 100% matches
+
+void BM_SnippetRfcOneProperty(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
+        GetTestFilePath("icing/icu.dat")));
+  }
+
+  const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
+  const std::string schema_dir = base_dir + "/schema";
+  Filesystem filesystem;
+  filesystem.DeleteDirectoryRecursively(base_dir.c_str());
+  if (!filesystem.CreateDirectoryRecursively(schema_dir.c_str())) {
+    ICING_LOG(ERROR) << "Failed to create test directories";
+  }
+
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  std::unique_ptr<LanguageSegmenter> language_segmenter =
+      language_segmenter_factory::Create(std::move(options)).ValueOrDie();
+  std::unique_ptr<Normalizer> normalizer =
+      normalizer_factory::Create(
+          /*max_term_byte_size=*/std::numeric_limits<int>::max())
+          .ValueOrDie();
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("type1").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop1")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  Clock clock;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem, schema_dir, &clock));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  auto snippet_retriever =
+      SnippetRetriever::Create(schema_store.get(), language_segmenter.get(),
+                               normalizer.get())
+          .ValueOrDie();
+
+  int num_matches = state.range(0);
+  int total_terms = state.range(1);
+
+  std::default_random_engine random;
+  std::vector<std::string> language =
+      CreateLanguages(/*language_size=*/1000, &random);
+  std::uniform_int_distribution<size_t> uniform(0u, language.size() - 1);
+  std::uniform_real_distribution<double> uniform_double(0.0, 1.0);
+
+  std::string text;
+  int num_actual_matches = 0;
+  double match_chance;
+  while (total_terms-- > 0) {
+    std::string term;
+    match_chance = static_cast<double>(num_matches) / total_terms;
+    if (uniform_double(random) <= match_chance) {
+      --num_matches;
+      ++num_actual_matches;
+      term = "foo@google.com";
+    } else {
+      term = absl_ports::StrCat(language.at(uniform(random)), "@google.com");
+    }
+    absl_ports::StrAppend(&text, ",", term);
+  }
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "uri1")
+                               .SetSchema("type1")
+                               .AddStringProperty("prop1", text)
+                               .Build();
+  SectionRestrictQueryTermsMap query_terms = {{"", {"foo"}}};
+  ResultSpecProto::SnippetSpecProto snippet_spec;
+  snippet_spec.set_num_to_snippet(100000);
+  snippet_spec.set_num_matches_per_property(100000);
+  snippet_spec.set_max_window_utf32_length(64);
+
+  SectionIdMask section_id_mask = 0x01;
+  SnippetProto snippet_proto;
+  for (auto _ : state) {
+    snippet_proto = snippet_retriever->RetrieveSnippet(
+        query_terms, TERM_MATCH_PREFIX, snippet_spec, document,
+        section_id_mask);
+    ASSERT_THAT(snippet_proto.entries(), SizeIs(1));
+    ASSERT_THAT(snippet_proto.entries(0).snippet_matches(),
+                SizeIs(num_actual_matches));
+  }
+
+  // Destroy the schema store before the whole directory is removed because they
+  // persist data in destructor.
+  schema_store.reset();
+  filesystem.DeleteDirectoryRecursively(base_dir.c_str());
+}
+BENCHMARK(BM_SnippetRfcOneProperty)
+    // Arguments: num_matches, total_terms
+    ->ArgPair(1, 1)
+    ->ArgPair(1, 16)          // single match
+    ->ArgPair(2, 16)          // ~10% matches
+    ->ArgPair(3, 16)          // ~20% matches
+    ->ArgPair(8, 16)          // 50% matches
+    ->ArgPair(16, 16)         // 100% matches
+    ->ArgPair(1, 128)         // single match
+    ->ArgPair(13, 128)        // ~10% matches
+    ->ArgPair(26, 128)        // ~20% matches
+    ->ArgPair(64, 128)        // 50% matches
+    ->ArgPair(128, 128)       // 100% matches
+    ->ArgPair(1, 512)         // single match
+    ->ArgPair(51, 512)        // ~10% matches
+    ->ArgPair(102, 512)       // ~20% matches
+    ->ArgPair(256, 512)       // 50% matches
+    ->ArgPair(512, 512)       // 100% matches
+    ->ArgPair(1, 1024)        // single match
+    ->ArgPair(102, 1024)      // ~10% matches
+    ->ArgPair(205, 1024)      // ~20% matches
+    ->ArgPair(512, 1024)      // 50% matches
+    ->ArgPair(1024, 1024)     // 100% matches
+    ->ArgPair(1, 4096)        // single match
+    ->ArgPair(410, 4096)      // ~10% matches
+    ->ArgPair(819, 4096)      // ~20% matches
+    ->ArgPair(2048, 4096)     // 50% matches
+    ->ArgPair(4096, 4096)     // 100% matches
+    ->ArgPair(1, 16384)       // single match
+    ->ArgPair(1638, 16384)    // ~10% matches
+    ->ArgPair(3277, 16384)    // ~20% matches
+    ->ArgPair(8192, 16384)    // 50% matches
+    ->ArgPair(16384, 16384);  // 100% matches
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/result/snippet-retriever_test.cc b/icing/result/snippet-retriever_test.cc
index 3b3bf61..8d81b43 100644
--- a/icing/result/snippet-retriever_test.cc
+++ b/icing/result/snippet-retriever_test.cc
@@ -22,70 +22,100 @@
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/mock-filesystem.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
 #include "icing/query/query-terms.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section-manager.h"
 #include "icing/store/document-id.h"
 #include "icing/store/key-mapper.h"
 #include "icing/testing/common-matchers.h"
-#include "icing/testing/snippet-helpers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/map/map-normalizer.h"
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
+#include "icing/util/snippet-helpers.h"
+#include "unicode/uloc.h"
 
 namespace icing {
 namespace lib {
 
 namespace {
 
+using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::IsEmpty;
 using ::testing::SizeIs;
 
+// TODO (b/246964044): remove ifdef guard when url-tokenizer is ready for export
+// to Android. Also move it to schema-builder.h
+#ifdef ENABLE_URL_TOKENIZER
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_URL =
+    StringIndexingConfig::TokenizerType::URL;
+#endif  // ENABLE_URL_TOKENIZER
+
+std::vector<std::string_view> GetPropertyPaths(const SnippetProto& snippet) {
+  std::vector<std::string_view> paths;
+  for (const SnippetProto::EntryProto& entry : snippet.entries()) {
+    paths.push_back(entry.property_name());
+  }
+  return paths;
+}
+
 class SnippetRetrieverTest : public testing::Test {
  protected:
   void SetUp() override {
     test_dir_ = GetTestTempDir() + "/icing";
     filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
 
-    ICING_ASSERT_OK(
-        // File generated via icu_data_file rule in //icing/BUILD.
-        icu_data_file_helper::SetUpICUDataFile(
-            GetTestFilePath("icing/icu.dat")));
-    ICING_ASSERT_OK_AND_ASSIGN(language_segmenter_,
-                               language_segmenter_factory::Create());
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    jni_cache_ = GetTestJniCache();
+    language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                         jni_cache_.get());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        language_segmenter_,
+        language_segmenter_factory::Create(std::move(options)));
 
     // Setup the schema
-    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                               SchemaStore::Create(&filesystem_, test_dir_));
-    SchemaProto schema;
-    SchemaTypeConfigProto* type_config = schema.add_types();
-    type_config->set_schema_type("email");
-    PropertyConfigProto* prop_config = type_config->add_properties();
-    prop_config->set_property_name("subject");
-    prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
-    prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    prop_config->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::PREFIX);
-    prop_config->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
-    prop_config = type_config->add_properties();
-    prop_config->set_property_name("body");
-    prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
-    prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    prop_config->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    prop_config->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
-    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("email")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("subject")
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("body")
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
 
     ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
                                                 /*max_term_byte_size=*/10000));
@@ -99,7 +129,7 @@ class SnippetRetrieverTest : public testing::Test {
     snippet_spec_.set_num_to_snippet(std::numeric_limits<int32_t>::max());
     snippet_spec_.set_num_matches_per_property(
         std::numeric_limits<int32_t>::max());
-    snippet_spec_.set_max_window_bytes(64);
+    snippet_spec_.set_max_window_utf32_length(64);
   }
 
   void TearDown() override {
@@ -107,10 +137,12 @@ class SnippetRetrieverTest : public testing::Test {
   }
 
   Filesystem filesystem_;
+  FakeClock fake_clock_;
   std::unique_ptr<SchemaStore> schema_store_;
   std::unique_ptr<LanguageSegmenter> language_segmenter_;
   std::unique_ptr<SnippetRetriever> snippet_retriever_;
   std::unique_ptr<Normalizer> normalizer_;
+  std::unique_ptr<const JniCache> jni_cache_;
   ResultSpecProto::SnippetSpecProto snippet_spec_;
   std::string test_dir_;
 };
@@ -144,13 +176,67 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeSmallerThanMatch) {
 
   // Window starts at the beginning of "three" and ends in the middle of
   // "three". len=4, orig_window= "thre"
-  snippet_spec_.set_max_window_bytes(4);
+  snippet_spec_.set_max_window_utf32_length(4);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre(""));
+}
+
+TEST_F(SnippetRetrieverTest,
+       SnippetingWindowMaxWindowSizeEqualToMatch_OddLengthMatch) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
+
+  // Window starts at the beginning of "three" and at the exact end of
+  // "three". len=5, orig_window= "three"
+  snippet_spec_.set_max_window_utf32_length(5);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq(""));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("three"));
+}
+
+TEST_F(SnippetRetrieverTest,
+       SnippetingWindowMaxWindowSizeEqualToMatch_EvenLengthMatch) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"four"}}};
+
+  // Window starts at the beginning of "four" and at the exact end of
+  // "four". len=4, orig_window= "four"
+  snippet_spec_.set_max_window_utf32_length(4);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("four"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsInWhitespace) {
@@ -165,16 +251,25 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsInWhitespace) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
 
-  // Window starts at the space between "one" and "two". Window ends in the
-  // middle of "four".
-  // len=14, orig_window=" two three fou"
-  snippet_spec_.set_max_window_bytes(14);
+  // String:      "one two three four.... five"
+  //               ^   ^   ^     ^        ^   ^
+  // UTF-8 idx:    0   4   8     14       23  27
+  // UTF-32 idx:   0   4   8     14       23  27
+  //
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (2,17).
+  //   2. trimmed, no-shifting window [4,13) "two three"
+  //   3. trimmed, shifted window [4,18) "two three four"
+  snippet_spec_.set_max_window_utf32_length(14);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("two three"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("two three four"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsMidToken) {
@@ -189,15 +284,25 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsMidToken) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
 
-  // Window starts in the middle of "one" and ends at the end of "four".
-  // len=16, orig_window="e two three four"
-  snippet_spec_.set_max_window_bytes(16);
+  // String:      "one two three four.... five"
+  //               ^   ^   ^     ^        ^   ^
+  // UTF-8 idx:    0   4   8     14       23  27
+  // UTF-32 idx:   0   4   8     14       23  27
+  //
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (1,18).
+  //   2. trimmed, no-shifting window [4,18) "two three four"
+  //   3. trimmed, shifted window [4,20) "two three four.."
+  snippet_spec_.set_max_window_utf32_length(16);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("two three four"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("two three four.."));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInPunctuation) {
@@ -214,17 +319,20 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInPunctuation) {
 
   // Window ends in the middle of all the punctuation and window starts at 0.
   // len=20, orig_window="one two three four.."
-  snippet_spec_.set_max_window_bytes(20);
+  snippet_spec_.set_max_window_utf32_length(20);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("one two three four.."));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four.."));
 }
 
 TEST_F(SnippetRetrieverTest,
-       SnippetingWindowMaxWindowEndsInMiddleOfMultiBytePunctuation) {
+       SnippetingWindowMaxWindowEndsMultiBytePunctuation) {
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "email/1")
@@ -238,18 +346,21 @@ TEST_F(SnippetRetrieverTest,
   SectionRestrictQueryTermsMap query_terms{{"", {"in"}}};
 
   // Window ends in the middle of all the punctuation and window starts at 0.
-  // len=26, orig_window="pside down in Australia\xC2"
-  snippet_spec_.set_max_window_bytes(24);
+  // len=26, orig_window="pside down in Australia¿"
+  snippet_spec_.set_max_window_utf32_length(24);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("down in Australia"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("down in Australia¿"));
 }
 
 TEST_F(SnippetRetrieverTest,
-       SnippetingWindowMaxWindowEndsInMultiBytePunctuation) {
+       SnippetingWindowMaxWindowBeyondMultiBytePunctuation) {
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "email/1")
@@ -263,14 +374,17 @@ TEST_F(SnippetRetrieverTest,
   SectionRestrictQueryTermsMap query_terms{{"", {"in"}}};
 
   // Window ends in the middle of all the punctuation and window starts at 0.
-  // len=26, orig_window="upside down in Australia\xC2\xBF"
-  snippet_spec_.set_max_window_bytes(26);
+  // len=26, orig_window="upside down in Australia¿ "
+  snippet_spec_.set_max_window_utf32_length(26);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("upside down in Australia¿"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("upside down in Australia¿"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsBeforeValueStart) {
@@ -285,15 +399,25 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsBeforeValueStart) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
 
-  // Window starts before 0.
-  // len=22, orig_window="one two three four..."
-  snippet_spec_.set_max_window_bytes(22);
+  // String:      "one two three four.... five"
+  //               ^   ^   ^     ^        ^   ^
+  // UTF-8 idx:    0   4   8     14       23  27
+  // UTF-32 idx:   0   4   8     14       23  27
+  //
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (-2,21).
+  //   2. trimmed, no-shifting window [0,21) "one two three four..."
+  //   3. trimmed, shifted window [0,22) "one two three four...."
+  snippet_spec_.set_max_window_utf32_length(22);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("one two three four..."));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four...."));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInWhitespace) {
@@ -310,13 +434,16 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInWhitespace) {
 
   // Window ends before "five" but after all the punctuation
   // len=26, orig_window="one two three four.... "
-  snippet_spec_.set_max_window_bytes(26);
+  snippet_spec_.set_max_window_utf32_length(26);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("one two three four...."));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four...."));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsMidToken) {
@@ -331,15 +458,25 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsMidToken) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
 
-  // Window ends in the middle of "five"
-  // len=32, orig_window="one two three four.... fiv"
-  snippet_spec_.set_max_window_bytes(32);
+  // String:      "one two three four.... five"
+  //               ^   ^   ^     ^        ^   ^
+  // UTF-8 idx:    0   4   8     14       23  27
+  // UTF-32 idx:   0   4   8     14       23  27
+  //
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be ((-7,26).
+  //   2. trimmed, no-shifting window [0,26) "one two three four...."
+  //   3. trimmed, shifted window [0,27) "one two three four.... five"
+  snippet_spec_.set_max_window_utf32_length(32);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("one two three four...."));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four.... five"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeEqualToValueSize) {
@@ -356,13 +493,16 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeEqualToValueSize) {
 
   // Max window size equals the size of the value.
   // len=34, orig_window="one two three four.... five"
-  snippet_spec_.set_max_window_bytes(34);
+  snippet_spec_.set_max_window_utf32_length(34);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("one two three four.... five"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four.... five"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeLargerThanValueSize) {
@@ -379,13 +519,152 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeLargerThanValueSize) {
 
   // Max window size exceeds the size of the value.
   // len=36, orig_window="one two three four.... five"
-  snippet_spec_.set_max_window_bytes(36);
+  snippet_spec_.set_max_window_utf32_length(36);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four.... five"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextStart) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five six")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"two"}}};
+
+  // String:      "one two three four.... five six"
+  //               ^   ^   ^     ^        ^    ^  ^
+  // UTF-8 idx:    0   4   8     14       23  28  31
+  // UTF-32 idx:   0   4   8     14       23  28  31
+  //
+  // Window size will go past the start of the window.
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (-10,19).
+  //   2. trimmed, no-shifting window [0,19) "one two three four."
+  //   3. trimmed, shifted window [0,27) "one two three four.... five"
+  snippet_spec_.set_max_window_utf32_length(28);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four.... five"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextEnd) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five six")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"five"}}};
+
+  // String:      "one two three four.... five six"
+  //               ^   ^   ^     ^        ^    ^  ^
+  // UTF-8 idx:    0   4   8     14       23  28  31
+  // UTF-32 idx:   0   4   8     14       23  28  31
+  //
+  // Window size will go past the end of the window.
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (10,39).
+  //   2. trimmed, no-shifting window [14,31) "four.... five six"
+  //   3. trimmed, shifted window [4,31) "two three four.... five six"
+  snippet_spec_.set_max_window_utf32_length(28);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("two three four.... five six"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextStartShortText) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four....")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"two"}}};
+
+  // String:      "one two three four...."
+  //               ^   ^   ^     ^       ^
+  // UTF-8 idx:    0   4   8     14      22
+  // UTF-32 idx:   0   4   8     14      22
+  //
+  // Window size will go past the start of the window.
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (-10,19).
+  //   2. trimmed, no-shifting window [0, 19) "one two three four."
+  //   3. trimmed, shifted window [0, 22) "one two three four...."
+  snippet_spec_.set_max_window_utf32_length(28);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four...."));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextEndShortText) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four....")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"four"}}};
+
+  // String:      "one two three four...."
+  //               ^   ^   ^     ^       ^
+  // UTF-8 idx:    0   4   8     14      22
+  // UTF-32 idx:   0   4   8     14      22
+  //
+  // Window size will go past the start of the window.
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (1,30).
+  //   2. trimmed, no-shifting window [4, 22) "two three four...."
+  //   3. trimmed, shifted window [0, 22) "one two three four...."
+  snippet_spec_.set_max_window_utf32_length(28);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
-              Eq("one two three four.... five"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("one two three four...."));
 }
 
 TEST_F(SnippetRetrieverTest, PrefixSnippeting) {
@@ -399,14 +678,18 @@ TEST_F(SnippetRetrieverTest, PrefixSnippeting) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"f"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
 
   // Check the snippets. 'f' should match prefix-enabled property 'subject', but
   // not exact-only property 'body'
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("subject foo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("f"));
 }
 
 TEST_F(SnippetRetrieverTest, ExactSnippeting) {
@@ -421,8 +704,7 @@ TEST_F(SnippetRetrieverTest, ExactSnippeting) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"f"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
 
   // Check the snippets
   EXPECT_THAT(snippet.entries(), IsEmpty());
@@ -437,18 +719,21 @@ TEST_F(SnippetRetrieverTest, SimpleSnippetingNoWindowing) {
           .AddStringProperty("body", "Only a fool would match this content.")
           .Build();
 
-  snippet_spec_.set_max_window_bytes(0);
+  snippet_spec_.set_max_window_utf32_length(0);
 
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"foo"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
 
   // Check the snippets
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), IsEmpty());
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre(""));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("foo"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingMultipleMatches) {
@@ -461,23 +746,49 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatches) {
                              "Concerning the subject of foo, we need to begin "
                              "considering our options regarding body bar.")
           .Build();
+  // String:      "Concerning the subject of foo, we need to begin considering "
+  //               ^          ^   ^       ^  ^    ^  ^    ^  ^     ^
+  // UTF-8 idx:    0          11  15     23  26  31  34  39  42    48
+  // UTF-32 idx:   0          11  15     23  26  31  34  39  42    48
+  //
+  // String ctd:  "our options regarding body bar."
+  //               ^   ^       ^         ^    ^   ^
+  // UTF-8 idx:    60  64      72        82   87  91
+  // UTF-32 idx:   60  64      72        82   87  91
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
 
   // Check the snippets
   EXPECT_THAT(snippet.entries(), SizeIs(2));
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  // The first window will be:
+  //   1. untrimmed, no-shifting window will be (-6,59).
+  //   2. trimmed, no-shifting window [0, 59) "Concerning... considering".
+  //   3. trimmed, shifted window [0, 63) "Concerning... our"
+  // The second window will be:
+  //   1. untrimmed, no-shifting window will be (54,91).
+  //   2. trimmed, no-shifting window [60, 91) "our... bar.".
+  //   3. trimmed, shifted window [31, 91) "we... bar."
   EXPECT_THAT(
-      GetWindow(document, snippet, "body", 0),
-      Eq("Concerning the subject of foo, we need to begin considering"));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
-  EXPECT_THAT(GetWindow(document, snippet, "body", 1),
-              Eq("our options regarding body bar."));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("bar"));
+      GetWindows(content, snippet.entries(0)),
+      ElementsAre(
+          "Concerning the subject of foo, we need to begin considering our",
+          "we need to begin considering our options regarding body bar."));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("foo", "bar"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+              ElementsAre("foo", "bar"));
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)),
+              ElementsAre("subject foo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("foo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("foo"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrict) {
@@ -490,23 +801,45 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrict) {
                              "Concerning the subject of foo, we need to begin "
                              "considering our options regarding body bar.")
           .Build();
+  // String:      "Concerning the subject of foo, we need to begin considering "
+  //               ^          ^   ^       ^  ^    ^  ^    ^  ^     ^
+  // UTF-8 idx:    0          11  15     23  26  31  34  39  42    48
+  // UTF-32 idx:   0          11  15     23  26  31  34  39  42    48
+  //
+  // String ctd:  "our options regarding body bar."
+  //               ^   ^       ^         ^    ^   ^
+  // UTF-8 idx:    60  64      72        82   87  91
+  // UTF-32 idx:   60  64      72        82   87  91
+  //
   // Section 1 "subject" is not in the section_mask, so no snippet information
   // from that section should be returned by the SnippetRetriever.
   SectionIdMask section_mask = 0b00000001;
   SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
 
   // Check the snippets
   EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  // The first window will be:
+  //   1. untrimmed, no-shifting window will be (-6,59).
+  //   2. trimmed, no-shifting window [0, 59) "Concerning... considering".
+  //   3. trimmed, shifted window [0, 63) "Concerning... our"
+  // The second window will be:
+  //   1. untrimmed, no-shifting window will be (54,91).
+  //   2. trimmed, no-shifting window [60, 91) "our... bar.".
+  //   3. trimmed, shifted window [31, 91) "we... bar."
   EXPECT_THAT(
-      GetWindow(document, snippet, "body", 0),
-      Eq("Concerning the subject of foo, we need to begin considering"));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
-  EXPECT_THAT(GetWindow(document, snippet, "body", 1),
-              Eq("our options regarding body bar."));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("bar"));
+      GetWindows(content, snippet.entries(0)),
+      ElementsAre(
+          "Concerning the subject of foo, we need to begin considering our",
+          "we need to begin considering our options regarding body bar."));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("foo", "bar"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+              ElementsAre("foo", "bar"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrictedTerm) {
@@ -519,6 +852,15 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrictedTerm) {
                              "Concerning the subject of foo, we need to begin "
                              "considering our options regarding body bar.")
           .Build();
+  // String:      "Concerning the subject of foo, we need to begin considering "
+  //               ^          ^   ^       ^  ^    ^  ^    ^  ^     ^
+  // UTF-8 idx:    0          11  15     23  26  31  34  39  42    48
+  // UTF-32 idx:   0          11  15     23  26  31  34  39  42    48
+  //
+  // String ctd:  "our options regarding body bar."
+  //               ^   ^       ^         ^    ^   ^
+  // UTF-8 idx:    60  64      72        82   87  91
+  // UTF-32 idx:   60  64      72        82   87  91
   SectionIdMask section_mask = 0b00000011;
   // "subject" should match in both sections, but "foo" is restricted to "body"
   // so it should only match in the 'body' section and not the 'subject'
@@ -526,25 +868,38 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrictedTerm) {
   SectionRestrictQueryTermsMap query_terms{{"", {"subject"}},
                                            {"body", {"foo"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
 
   // Check the snippets
   EXPECT_THAT(snippet.entries(), SizeIs(2));
-  // 'subject' section should only have the one match for "subject".
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("subject"));
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 1), IsEmpty());
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 1), IsEmpty());
-
-  // 'body' section should have matches for "subject" and "foo".
-  EXPECT_THAT(GetWindow(document, snippet, "body", 0),
-              Eq("Concerning the subject of foo, we need to begin"));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("subject"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  // The first window will be:
+  //   1. untrimmed, no-shifting window will be (-15,50).
+  //   2. trimmed, no-shifting window [0, 47) "Concerning... begin".
+  //   3. trimmed, shifted window [0, 63) "Concerning... our"
+  // The second window will be:
+  //   1. untrimmed, no-shifting window will be (-6,59).
+  //   2. trimmed, no-shifting window [0, 59) "Concerning... considering".
+  //   3. trimmed, shifted window [0, 63) "Concerning... our"
   EXPECT_THAT(
-      GetWindow(document, snippet, "body", 1),
-      Eq("Concerning the subject of foo, we need to begin considering"));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("foo"));
+      GetWindows(content, snippet.entries(0)),
+      ElementsAre(
+          "Concerning the subject of foo, we need to begin considering our",
+          "Concerning the subject of foo, we need to begin considering our"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("subject", "foo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+              ElementsAre("subject", "foo"));
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)),
+              ElementsAre("subject foo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("subject"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)),
+              ElementsAre("subject"));
 }
 
 TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesOneMatchPerProperty) {
@@ -558,24 +913,44 @@ TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesOneMatchPerProperty) {
                              "considering our options regarding body bar.")
           .Build();
 
+  // String:      "Concerning the subject of foo, we need to begin considering "
+  //               ^          ^   ^       ^  ^    ^  ^    ^  ^     ^
+  // UTF-8 idx:    0          11  15     23  26  31  34  39  42    48
+  // UTF-32 idx:   0          11  15     23  26  31  34  39  42    48
+  //
+  // String ctd:  "our options regarding body bar."
+  //               ^   ^       ^         ^    ^   ^
+  // UTF-8 idx:    60  64      72        82   87  91
+  // UTF-32 idx:   60  64      72        82   87  91
   snippet_spec_.set_num_matches_per_property(1);
 
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
 
   // Check the snippets
   EXPECT_THAT(snippet.entries(), SizeIs(2));
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (-6,59).
+  //   2. trimmed, no-shifting window [0, 59) "Concerning... considering".
+  //   3. trimmed, shifted window [0, 63) "Concerning... our"
   EXPECT_THAT(
-      GetWindow(document, snippet, "body", 0),
-      Eq("Concerning the subject of foo, we need to begin considering"));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
-  EXPECT_THAT(GetWindow(document, snippet, "body", 1), IsEmpty());
-  EXPECT_THAT(GetMatch(document, snippet, "body", 1), IsEmpty());
+      GetWindows(content, snippet.entries(0)),
+      ElementsAre(
+          "Concerning the subject of foo, we need to begin considering our"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("foo"));
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)),
+              ElementsAre("subject foo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("foo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("foo"));
 }
 
 TEST_F(SnippetRetrieverTest, PrefixSnippetingNormalization) {
@@ -589,12 +964,15 @@ TEST_F(SnippetRetrieverTest, PrefixSnippetingNormalization) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"md"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
 
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("MDI team"));
-  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("MDI"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("MDI team"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("MDI"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("MD"));
 }
 
 TEST_F(SnippetRetrieverTest, ExactSnippetingNormalization) {
@@ -609,14 +987,1025 @@ TEST_F(SnippetRetrieverTest, ExactSnippetingNormalization) {
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"zurich"}}};
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
-      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
-      section_mask);
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
 
   EXPECT_THAT(snippet.entries(), SizeIs(1));
-  EXPECT_THAT(GetWindow(document, snippet, "body", 0),
-              Eq("Some members are in Zürich."));
-  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("Zürich"));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("Some members are in Zürich."));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("Zürich"));
+
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+              ElementsAre("Zürich"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestOneLevel) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("SingleLevelType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("X")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Y")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Z")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true,
+      /*allow_circular_schema_definitions=*/false));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
+  DocumentProto document;
+  document.set_schema("SingleLevelType");
+  PropertyProto* prop = document.add_properties();
+  prop->set_name("X");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+  prop = document.add_properties();
+  prop->set_name("Y");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+  prop = document.add_properties();
+  prop->set_name("Z");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+
+  SectionIdMask section_mask = 0b00000111;
+  SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(6));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("X[1]"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("polo"));
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("X[3]"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("polo"));
+
+  EXPECT_THAT(GetPropertyPaths(snippet),
+              ElementsAre("X[1]", "X[3]", "Y[1]", "Y[3]", "Z[1]", "Z[3]"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevel) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("SingleLevelType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("X")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Y")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Z")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("MultiLevelType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("A")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("B")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("C")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true,
+      /*allow_circular_schema_definitions=*/false));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
+  DocumentProto subdocument;
+  PropertyProto* prop = subdocument.add_properties();
+  prop->set_name("X");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+  prop = subdocument.add_properties();
+  prop->set_name("Y");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+  prop = subdocument.add_properties();
+  prop->set_name("Z");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+
+  DocumentProto document;
+  document.set_schema("MultiLevelType");
+  prop = document.add_properties();
+  prop->set_name("A");
+  *prop->add_document_values() = subdocument;
+
+  prop = document.add_properties();
+  prop->set_name("B");
+  *prop->add_document_values() = subdocument;
+
+  prop = document.add_properties();
+  prop->set_name("C");
+  *prop->add_document_values() = subdocument;
+
+  SectionIdMask section_mask = 0b111111111;
+  SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(18));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("A.X[1]"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("polo"));
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("A.X[3]"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("polo"));
+
+  EXPECT_THAT(
+      GetPropertyPaths(snippet),
+      ElementsAre("A.X[1]", "A.X[3]", "A.Y[1]", "A.Y[3]", "A.Z[1]", "A.Z[3]",
+                  "B.X[1]", "B.X[3]", "B.Y[1]", "B.Y[3]", "B.Z[1]", "B.Z[3]",
+                  "C.X[1]", "C.X[3]", "C.Y[1]", "C.Y[3]", "C.Z[1]", "C.Z[3]"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelRepeated) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("SingleLevelType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("X")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Y")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Z")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("MultiLevelType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("A")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("B")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("C")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true,
+      /*allow_circular_schema_definitions=*/false));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
+  DocumentProto subdocument;
+  PropertyProto* prop = subdocument.add_properties();
+  prop->set_name("X");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+  prop = subdocument.add_properties();
+  prop->set_name("Y");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+  prop = subdocument.add_properties();
+  prop->set_name("Z");
+  for (const std::string& s : string_values) {
+    prop->add_string_values(s);
+  }
+
+  DocumentProto document;
+  document.set_schema("MultiLevelType");
+  prop = document.add_properties();
+  prop->set_name("A");
+  *prop->add_document_values() = subdocument;
+  *prop->add_document_values() = subdocument;
+
+  prop = document.add_properties();
+  prop->set_name("B");
+  *prop->add_document_values() = subdocument;
+  *prop->add_document_values() = subdocument;
+
+  prop = document.add_properties();
+  prop->set_name("C");
+  *prop->add_document_values() = subdocument;
+  *prop->add_document_values() = subdocument;
+
+  SectionIdMask section_mask = 0b111111111;
+  SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(36));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("A[0].X[1]"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("polo"));
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("A[0].X[3]"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("polo"));
+
+  EXPECT_THAT(GetPropertyPaths(snippet),
+              ElementsAre("A[0].X[1]", "A[0].X[3]", "A[1].X[1]", "A[1].X[3]",
+                          "A[0].Y[1]", "A[0].Y[3]", "A[1].Y[1]", "A[1].Y[3]",
+                          "A[0].Z[1]", "A[0].Z[3]", "A[1].Z[1]", "A[1].Z[3]",
+                          "B[0].X[1]", "B[0].X[3]", "B[1].X[1]", "B[1].X[3]",
+                          "B[0].Y[1]", "B[0].Y[3]", "B[1].Y[1]", "B[1].Y[3]",
+                          "B[0].Z[1]", "B[0].Z[3]", "B[1].Z[1]", "B[1].Z[3]",
+                          "C[0].X[1]", "C[0].X[3]", "C[1].X[1]", "C[1].X[3]",
+                          "C[0].Y[1]", "C[0].Y[3]", "C[1].Y[1]", "C[1].Y[3]",
+                          "C[0].Z[1]", "C[0].Z[3]", "C[1].Z[1]", "C[1].Z[3]"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelSingleValue) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("SingleLevelType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("X")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Y")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Z")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("MultiLevelType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("A")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("B")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("C")
+                                        .SetDataTypeDocument(
+                                            "SingleLevelType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true,
+      /*allow_circular_schema_definitions=*/false));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  DocumentProto subdocument;
+  PropertyProto* prop = subdocument.add_properties();
+  prop->set_name("X");
+  prop->add_string_values("polo");
+  prop = subdocument.add_properties();
+  prop->set_name("Y");
+  prop->add_string_values("marco");
+  prop = subdocument.add_properties();
+  prop->set_name("Z");
+  prop->add_string_values("polo");
+
+  DocumentProto document;
+  document.set_schema("MultiLevelType");
+  prop = document.add_properties();
+  prop->set_name("A");
+  *prop->add_document_values() = subdocument;
+  *prop->add_document_values() = subdocument;
+
+  prop = document.add_properties();
+  prop->set_name("B");
+  *prop->add_document_values() = subdocument;
+  *prop->add_document_values() = subdocument;
+
+  prop = document.add_properties();
+  prop->set_name("C");
+  *prop->add_document_values() = subdocument;
+  *prop->add_document_values() = subdocument;
+
+  SectionIdMask section_mask = 0b111111111;
+  SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  EXPECT_THAT(snippet.entries(), SizeIs(12));
+  EXPECT_THAT(snippet.entries(0).property_name(), Eq("A[0].X"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("polo"));
+
+  EXPECT_THAT(snippet.entries(1).property_name(), Eq("A[1].X"));
+  content = GetString(&document, snippet.entries(1).property_name());
+  EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("polo"));
+
+  EXPECT_THAT(
+      GetPropertyPaths(snippet),
+      ElementsAre("A[0].X", "A[1].X", "A[0].Z", "A[1].Z", "B[0].X", "B[1].X",
+                  "B[0].Z", "B[1].Z", "C[0].X", "C[1].X", "C[0].Z", "C[1].Z"));
+}
+
+TEST_F(SnippetRetrieverTest, CJKSnippetMatchTest) {
+  // String:     "我每天走路去上班。"
+  //              ^ ^  ^   ^^
+  // UTF8 idx:    0 3  9  15 18
+  // UTF16 idx:   0 1  3   5 6
+  // Breaks into segments: "我", "每天", "走路", "去", "上班"
+  constexpr std::string_view kChinese = "我每天走路去上班。";
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", kChinese)
+          .AddStringProperty("body",
+                             "Concerning the subject of foo, we need to begin "
+                             "considering our options regarding body bar.")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"走"}}};
+
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  // Ensure that one and only one property was matched and it was "body"
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  const SnippetProto::EntryProto* entry = &snippet.entries(0);
+  EXPECT_THAT(entry->property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+
+  // Ensure that there is one and only one match within "subject"
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+  // Ensure that the match is correct.
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("走路"));
+  EXPECT_THAT(GetSubMatches(content, *entry), ElementsAre("走"));
+
+  // Ensure that the utf-16 values are also as expected
+  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3));
+  EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
+  EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(1));
+}
+
+TEST_F(SnippetRetrieverTest, CJKSnippetWindowTest) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_SIMPLIFIED_CHINESE,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      language_segmenter_,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  // String:     "我每天走路去上班。"
+  //              ^ ^  ^   ^^
+  // UTF8 idx:    0 3  9  15 18
+  // UTF16 idx:   0 1  3   5 6
+  // UTF32 idx:   0 1  3   5 6
+  // Breaks into segments: "我", "每天", "走路", "去", "上班"
+  constexpr std::string_view kChinese = "我每天走路去上班。";
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", kChinese)
+          .AddStringProperty("body",
+                             "Concerning the subject of foo, we need to begin "
+                             "considering our options regarding body bar.")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"走"}}};
+
+  // The window will be:
+  //   1. untrimmed, no-shifting window will be (0,7).
+  //   2. trimmed, no-shifting window [1, 6) "每天走路去".
+  //   3. trimmed, shifted window [0, 6) "我每天走路去"
+  snippet_spec_.set_max_window_utf32_length(6);
+
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  // Ensure that one and only one property was matched and it was "body"
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  const SnippetProto::EntryProto* entry = &snippet.entries(0);
+  EXPECT_THAT(entry->property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+
+  // Ensure that there is one and only one match within "subject"
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+  // Ensure that the match is correct.
+  EXPECT_THAT(GetWindows(content, *entry), ElementsAre("我每天走路去"));
+
+  // Ensure that the utf-16 values are also as expected
+  EXPECT_THAT(match_proto.window_utf16_position(), Eq(0));
+  EXPECT_THAT(match_proto.window_utf16_length(), Eq(6));
+}
+
+TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitSnippetMatchTest) {
+  // The following string has four-byte UTF-8 characters. Most importantly, it
+  // is also two code units in UTF-16.
+  // String:     "𐀀𐀁 𐀂𐀃 𐀄"
+  //              ^  ^  ^
+  // UTF8 idx:    0  9  18
+  // UTF16 idx:   0  5  10
+  // Breaks into segments: "𐀀𐀁", "𐀂𐀃", "𐀄"
+  constexpr std::string_view kText = "𐀀𐀁 𐀂𐀃 𐀄";
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", kText)
+          .AddStringProperty("body",
+                             "Concerning the subject of foo, we need to begin "
+                             "considering our options regarding body bar.")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"𐀂"}}};
+
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  // Ensure that one and only one property was matched and it was "body"
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  const SnippetProto::EntryProto* entry = &snippet.entries(0);
+  EXPECT_THAT(entry->property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+
+  // Ensure that there is one and only one match within "subject"
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+  // Ensure that the match is correct.
+  EXPECT_THAT(GetMatches(content, *entry), ElementsAre("𐀂𐀃"));
+  EXPECT_THAT(GetSubMatches(content, *entry), ElementsAre("𐀂"));
+
+  // Ensure that the utf-16 values are also as expected
+  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(5));
+  EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(4));
+  EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(2));
+}
+
+TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitWindowTest) {
+  // The following string has four-byte UTF-8 characters. Most importantly, it
+  // is also two code units in UTF-16.
+  // String:     "𐀀𐀁 𐀂𐀃 𐀄"
+  //              ^  ^  ^
+  // UTF8 idx:    0  9  18
+  // UTF16 idx:   0  5  10
+  // UTF32 idx:   0  3  6
+  // Breaks into segments: "𐀀𐀁", "𐀂𐀃", "𐀄"
+  constexpr std::string_view kText = "𐀀𐀁 𐀂𐀃 𐀄";
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", kText)
+          .AddStringProperty("body",
+                             "Concerning the subject of foo, we need to begin "
+                             "considering our options regarding body bar.")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"𐀂"}}};
+
+  // Set a six character window. This will produce a window like this:
+  // String:     "𐀀𐀁 𐀂𐀃 𐀄"
+  //                 ^   ^
+  // UTF8 idx:       9   22
+  // UTF16 idx:      5   12
+  // UTF32 idx:      3   7
+  snippet_spec_.set_max_window_utf32_length(6);
+
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  // Ensure that one and only one property was matched and it was "body"
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  const SnippetProto::EntryProto* entry = &snippet.entries(0);
+  EXPECT_THAT(entry->property_name(), Eq("subject"));
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+
+  // Ensure that there is one and only one match within "subject"
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+  // Ensure that the match is correct.
+  EXPECT_THAT(GetWindows(content, *entry), ElementsAre("𐀂𐀃 𐀄"));
+
+  // Ensure that the utf-16 values are also as expected
+  EXPECT_THAT(match_proto.window_utf16_position(), Eq(5));
+  EXPECT_THAT(match_proto.window_utf16_length(), Eq(7));
+}
+
+TEST_F(SnippetRetrieverTest, SnippettingVerbatimAscii) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("verbatimType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("verbatim")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_VERBATIM)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true,
+      /*allow_circular_schema_definitions=*/false));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "verbatim/1")
+                               .SetSchema("verbatimType")
+                               .AddStringProperty("verbatim", "Hello, world!")
+                               .Build();
+
+  SectionIdMask section_mask = 0b00000001;
+  SectionRestrictQueryTermsMap query_terms{{"", {"Hello, world!"}}};
+
+  snippet_spec_.set_max_window_utf32_length(13);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  // There should only be one snippet entry and match, the verbatim token in its
+  // entirety.
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+
+  const SnippetProto::EntryProto* entry = &snippet.entries(0);
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  ASSERT_THAT(entry->property_name(), "verbatim");
+
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+  // We expect the match to begin at position 0, and to span the entire token
+  // which contains 13 characters.
+  EXPECT_THAT(match_proto.window_byte_position(), Eq(0));
+  EXPECT_THAT(match_proto.window_utf16_length(), Eq(13));
+
+  // We expect the submatch to begin at position 0 of the verbatim token and
+  // span the length of our query term "Hello, world!", which has utf-16 length
+  // of 13. The submatch length is equal to the window length as the query the
+  // snippet is retrieved with an exact term match.
+  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(0));
+  EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(13));
+}
+
+TEST_F(SnippetRetrieverTest, SnippettingVerbatimCJK) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("verbatimType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("verbatim")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_VERBATIM)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true,
+      /*allow_circular_schema_definitions=*/false));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  // String:     "我每天走路去上班。"
+  //              ^ ^  ^   ^^
+  // UTF8 idx:    0 3  9  15 18
+  // UTF16 idx:   0 1  3   5 6
+  // UTF32 idx:   0 1  3   5 6
+  // Breaks into segments: "我", "每天", "走路", "去", "上班"
+  std::string chinese_string = "我每天走路去上班。";
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "verbatim/1")
+                               .SetSchema("verbatimType")
+                               .AddStringProperty("verbatim", chinese_string)
+                               .Build();
+
+  SectionIdMask section_mask = 0b00000001;
+  SectionRestrictQueryTermsMap query_terms{{"", {"我每"}}};
+
+  snippet_spec_.set_max_window_utf32_length(9);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  // There should only be one snippet entry and match, the verbatim token in its
+  // entirety.
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+
+  const SnippetProto::EntryProto* entry = &snippet.entries(0);
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  ASSERT_THAT(entry->property_name(), "verbatim");
+
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+  // We expect the match to begin at position 0, and to span the entire token
+  // which has utf-16 length of 9.
+  EXPECT_THAT(match_proto.window_byte_position(), Eq(0));
+  EXPECT_THAT(match_proto.window_utf16_length(), Eq(9));
+
+  // We expect the submatch to begin at position 0 of the verbatim token and
+  // span the length of our query term "我每", which has utf-16 length of 2.
+  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(0));
+  EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(2));
+}
+
+TEST_F(SnippetRetrieverTest, SnippettingRfc822Ascii) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("rfc822Type")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("rfc822")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_RFC822)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true,
+      /*allow_circular_schema_definitions=*/false));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "rfc822/1")
+          .SetSchema("rfc822Type")
+          .AddStringProperty("rfc822",
+                             "Alexander Sav <tom.bar@google.com>, Very Long "
+                             "Name Example <tjbarron@google.com>")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000001;
+
+  // This should match both the first name token as well as the entire RFC822.
+  SectionRestrictQueryTermsMap query_terms{{"", {"alexand"}}};
+
+  snippet_spec_.set_max_window_utf32_length(35);
+
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), "rfc822");
+
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("Alexander Sav <tom.bar@google.com>,",
+                          "Alexander Sav <tom.bar@google.com>,"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("Alexander Sav <tom.bar@google.com>", "Alexander"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+              ElementsAre("Alexand", "Alexand"));
+
+  // "tom" should match the local component, local address, and address tokens.
+  query_terms = SectionRestrictQueryTermsMap{{"", {"tom"}}};
+  snippet_spec_.set_max_window_utf32_length(36);
+
+  snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), "rfc822");
+
+  content = GetString(&document, snippet.entries(0).property_name());
+
+  // TODO(b/248362902) Stop returning duplicate matches.
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("Alexander Sav <tom.bar@google.com>,",
+                          "Alexander Sav <tom.bar@google.com>,",
+                          "Alexander Sav <tom.bar@google.com>,"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("tom.bar", "tom.bar@google.com", "tom"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+              ElementsAre("tom", "tom", "tom"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippettingRfc822CJK) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("rfc822Type")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("rfc822")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_RFC822)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true,
+      /*allow_circular_schema_definitions=*/false));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  std::string chinese_string = "我, 每天@走路, 去@上班";
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "rfc822/1")
+                               .SetSchema("rfc822Type")
+                               .AddStringProperty("rfc822", chinese_string)
+                               .Build();
+
+  SectionIdMask section_mask = 0b00000001;
+
+  SectionRestrictQueryTermsMap query_terms{{"", {"走"}}};
+
+  snippet_spec_.set_max_window_utf32_length(8);
+
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  // There should only be one snippet entry and match, the local component token
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), "rfc822");
+
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+
+  // The local component, address, local address, and token will all match. The
+  // windows for address and token are "" as the snippet window is too small.
+  // TODO(b/248362902) Stop returning duplicate matches.
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("每天@走路,", "每天@走路,"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("走路", "走路"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+              ElementsAre("走", "走"));
+}
+
+#ifdef ENABLE_URL_TOKENIZER
+TEST_F(SnippetRetrieverTest, SnippettingUrlAscii) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("urlType").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("url")
+                  .SetDataTypeString(MATCH_PREFIX, TOKENIZER_URL)
+                  .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "url/1")
+          .SetSchema("urlType")
+          .AddStringProperty("url", "https://mail.google.com/calendar/google/")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000001;
+
+  // Query with single url split-token match
+  SectionRestrictQueryTermsMap query_terms{{"", {"com"}}};
+  // 40 is the length of the url.
+  // Window that is the size of the url should return entire url.
+  snippet_spec_.set_max_window_utf32_length(40);
+
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), "url");
+
+  std::string_view content =
+      GetString(&document, snippet.entries(0).property_name());
+
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("https://mail.google.com/calendar/google/"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("com"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("com"));
+
+  // Query with single url suffix-token match
+  query_terms = SectionRestrictQueryTermsMap{{"", {"mail.goo"}}};
+  snippet_spec_.set_max_window_utf32_length(40);
+
+  snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), "url");
+
+  content = GetString(&document, snippet.entries(0).property_name());
+
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("https://mail.google.com/calendar/google/"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("mail.google.com/calendar/google/"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+              ElementsAre("mail.goo"));
+
+  // Query with multiple url split-token matches
+  query_terms = SectionRestrictQueryTermsMap{{"", {"goog"}}};
+  snippet_spec_.set_max_window_utf32_length(40);
+
+  snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), "url");
+
+  content = GetString(&document, snippet.entries(0).property_name());
+
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("https://mail.google.com/calendar/google/",
+                          "https://mail.google.com/calendar/google/"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("google", "google"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+              ElementsAre("goog", "goog"));
+
+  // Query with both url split-token and suffix-token matches
+  query_terms = SectionRestrictQueryTermsMap{{"", {"mail"}}};
+  snippet_spec_.set_max_window_utf32_length(40);
+
+  snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), "url");
+
+  content = GetString(&document, snippet.entries(0).property_name());
+
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("https://mail.google.com/calendar/google/",
+                          "https://mail.google.com/calendar/google/"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("mail", "mail.google.com/calendar/google/"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+              ElementsAre("mail", "mail"));
+
+  // Prefix query with both url split-token and suffix-token matches
+  query_terms = SectionRestrictQueryTermsMap{{"", {"http"}}};
+  snippet_spec_.set_max_window_utf32_length(40);
+
+  snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), "url");
+
+  content = GetString(&document, snippet.entries(0).property_name());
+
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("https://mail.google.com/calendar/google/",
+                          "https://mail.google.com/calendar/google/"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("https", "https://mail.google.com/calendar/google/"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+              ElementsAre("http", "http"));
+
+  // Window that's smaller than the input size should not return any matches.
+  query_terms = SectionRestrictQueryTermsMap{{"", {"google"}}};
+  snippet_spec_.set_max_window_utf32_length(10);
+
+  snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  ASSERT_THAT(snippet.entries(), SizeIs(0));
+
+  // Test case with more than two matches
+  document =
+      DocumentBuilder()
+          .SetKey("icing", "url/1")
+          .SetSchema("urlType")
+          .AddStringProperty("url", "https://www.google.com/calendar/google/")
+          .Build();
+
+  // Prefix query with both url split-token and suffix-token matches
+  query_terms = SectionRestrictQueryTermsMap{{"", {"google"}}};
+  snippet_spec_.set_max_window_utf32_length(39);
+
+  snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(snippet.entries(0).property_name(), "url");
+
+  content = GetString(&document, snippet.entries(0).property_name());
+
+  EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+              ElementsAre("https://www.google.com/calendar/google/",
+                          "https://www.google.com/calendar/google/",
+                          "https://www.google.com/calendar/google/"));
+  EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+              ElementsAre("google", "google", "google.com/calendar/google/"));
+  EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
+              ElementsAre("google", "google", "google"));
 }
+#endif  // ENABLE_URL_TOKENIZER
 
 }  // namespace
 
diff --git a/icing/schema-builder.h b/icing/schema-builder.h
new file mode 100644
index 0000000..c74505e
--- /dev/null
+++ b/icing/schema-builder.h
@@ -0,0 +1,227 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_BUILDER_H_
+#define ICING_SCHEMA_BUILDER_H_
+
+#include <cstdint>
+#include <initializer_list>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+
+namespace icing {
+namespace lib {
+
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_UNKNOWN =
+    PropertyConfigProto::Cardinality::UNKNOWN;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REPEATED =
+    PropertyConfigProto::Cardinality::REPEATED;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto::Cardinality::OPTIONAL;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REQUIRED =
+    PropertyConfigProto::Cardinality::REQUIRED;
+
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_NONE =
+    StringIndexingConfig::TokenizerType::NONE;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+    StringIndexingConfig::TokenizerType::PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_VERBATIM =
+    StringIndexingConfig::TokenizerType::VERBATIM;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_RFC822 =
+    StringIndexingConfig::TokenizerType::RFC822;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_URL =
+    StringIndexingConfig::TokenizerType::URL;
+
+constexpr TermMatchType::Code TERM_MATCH_UNKNOWN = TermMatchType::UNKNOWN;
+constexpr TermMatchType::Code TERM_MATCH_EXACT = TermMatchType::EXACT_ONLY;
+constexpr TermMatchType::Code TERM_MATCH_PREFIX = TermMatchType::PREFIX;
+
+constexpr IntegerIndexingConfig::NumericMatchType::Code NUMERIC_MATCH_UNKNOWN =
+    IntegerIndexingConfig::NumericMatchType::UNKNOWN;
+constexpr IntegerIndexingConfig::NumericMatchType::Code NUMERIC_MATCH_RANGE =
+    IntegerIndexingConfig::NumericMatchType::RANGE;
+
+constexpr PropertyConfigProto::DataType::Code TYPE_UNKNOWN =
+    PropertyConfigProto::DataType::UNKNOWN;
+constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
+    PropertyConfigProto::DataType::STRING;
+constexpr PropertyConfigProto::DataType::Code TYPE_INT64 =
+    PropertyConfigProto::DataType::INT64;
+constexpr PropertyConfigProto::DataType::Code TYPE_DOUBLE =
+    PropertyConfigProto::DataType::DOUBLE;
+constexpr PropertyConfigProto::DataType::Code TYPE_BOOLEAN =
+    PropertyConfigProto::DataType::BOOLEAN;
+constexpr PropertyConfigProto::DataType::Code TYPE_BYTES =
+    PropertyConfigProto::DataType::BYTES;
+constexpr PropertyConfigProto::DataType::Code TYPE_DOCUMENT =
+    PropertyConfigProto::DataType::DOCUMENT;
+
+constexpr JoinableConfig::ValueType::Code JOINABLE_VALUE_TYPE_NONE =
+    JoinableConfig::ValueType::NONE;
+constexpr JoinableConfig::ValueType::Code JOINABLE_VALUE_TYPE_QUALIFIED_ID =
+    JoinableConfig::ValueType::QUALIFIED_ID;
+
+class PropertyConfigBuilder {
+ public:
+  PropertyConfigBuilder() = default;
+  explicit PropertyConfigBuilder(PropertyConfigProto property)
+      : property_(std::move(property)) {}
+
+  PropertyConfigBuilder& SetName(std::string_view name) {
+    property_.set_property_name(std::string(name));
+    return *this;
+  }
+
+  PropertyConfigBuilder& SetDataType(
+      PropertyConfigProto::DataType::Code data_type) {
+    property_.set_data_type(data_type);
+    return *this;
+  }
+
+  PropertyConfigBuilder& SetDataTypeString(
+      TermMatchType::Code match_type,
+      StringIndexingConfig::TokenizerType::Code tokenizer) {
+    property_.set_data_type(PropertyConfigProto::DataType::STRING);
+    property_.mutable_string_indexing_config()->set_term_match_type(match_type);
+    property_.mutable_string_indexing_config()->set_tokenizer_type(tokenizer);
+    return *this;
+  }
+
+  PropertyConfigBuilder& SetDataTypeJoinableString(
+      JoinableConfig::ValueType::Code join_value_type,
+      TermMatchType::Code match_type = TERM_MATCH_UNKNOWN,
+      StringIndexingConfig::TokenizerType::Code tokenizer = TOKENIZER_NONE) {
+    property_.set_data_type(PropertyConfigProto::DataType::STRING);
+    property_.mutable_joinable_config()->set_value_type(join_value_type);
+    property_.mutable_string_indexing_config()->set_term_match_type(match_type);
+    property_.mutable_string_indexing_config()->set_tokenizer_type(tokenizer);
+    return *this;
+  }
+
+  PropertyConfigBuilder& SetDataTypeInt64(
+      IntegerIndexingConfig::NumericMatchType::Code numeric_match_type) {
+    property_.set_data_type(PropertyConfigProto::DataType::INT64);
+    property_.mutable_integer_indexing_config()->set_numeric_match_type(
+        numeric_match_type);
+    return *this;
+  }
+
+  PropertyConfigBuilder& SetDataTypeDocument(std::string_view schema_type,
+                                             bool index_nested_properties) {
+    property_.set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+    property_.set_schema_type(std::string(schema_type));
+    property_.mutable_document_indexing_config()->set_index_nested_properties(
+        index_nested_properties);
+    property_.mutable_document_indexing_config()
+        ->clear_indexable_nested_properties_list();
+    return *this;
+  }
+
+  PropertyConfigBuilder& SetDataTypeDocument(
+      std::string_view schema_type,
+      std::initializer_list<std::string> indexable_nested_properties_list) {
+    property_.set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+    property_.set_schema_type(std::string(schema_type));
+    property_.mutable_document_indexing_config()->set_index_nested_properties(
+        false);
+    for (const std::string& property : indexable_nested_properties_list) {
+      property_.mutable_document_indexing_config()
+          ->add_indexable_nested_properties_list(property);
+    }
+    return *this;
+  }
+
+  PropertyConfigBuilder& SetJoinable(
+      JoinableConfig::ValueType::Code join_value_type, bool propagate_delete) {
+    property_.mutable_joinable_config()->set_value_type(join_value_type);
+    property_.mutable_joinable_config()->set_propagate_delete(propagate_delete);
+    return *this;
+  }
+
+  PropertyConfigBuilder& SetCardinality(
+      PropertyConfigProto::Cardinality::Code cardinality) {
+    property_.set_cardinality(cardinality);
+    return *this;
+  }
+
+  PropertyConfigProto Build() const { return std::move(property_); }
+
+ private:
+  PropertyConfigProto property_;
+};
+
+class SchemaTypeConfigBuilder {
+ public:
+  SchemaTypeConfigBuilder() = default;
+  SchemaTypeConfigBuilder(SchemaTypeConfigProto type_config)
+      : type_config_(std::move(type_config)) {}
+
+  SchemaTypeConfigBuilder& SetType(std::string_view type) {
+    type_config_.set_schema_type(std::string(type));
+    return *this;
+  }
+
+  SchemaTypeConfigBuilder& AddParentType(std::string_view parent_type) {
+    type_config_.add_parent_types(std::string(parent_type));
+    return *this;
+  }
+
+  SchemaTypeConfigBuilder& SetVersion(int version) {
+    type_config_.set_version(version);
+    return *this;
+  }
+
+  SchemaTypeConfigBuilder& AddProperty(PropertyConfigProto property) {
+    *type_config_.add_properties() = std::move(property);
+    return *this;
+  }
+  SchemaTypeConfigBuilder& AddProperty(PropertyConfigBuilder property_builder) {
+    *type_config_.add_properties() = property_builder.Build();
+    return *this;
+  }
+
+  SchemaTypeConfigProto Build() { return std::move(type_config_); }
+
+ private:
+  SchemaTypeConfigProto type_config_;
+};
+
+class SchemaBuilder {
+ public:
+  SchemaBuilder() = default;
+  SchemaBuilder(SchemaProto schema) : schema_(std::move(schema)) {}
+
+  SchemaBuilder& AddType(SchemaTypeConfigProto type) {
+    *schema_.add_types() = std::move(type);
+    return *this;
+  }
+  SchemaBuilder& AddType(SchemaTypeConfigBuilder type_builder) {
+    *schema_.add_types() = type_builder.Build();
+    return *this;
+  }
+
+  SchemaProto Build() { return std::move(schema_); }
+
+ private:
+  SchemaProto schema_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCHEMA_BUILDER_H_
diff --git a/icing/schema/backup-schema-producer.cc b/icing/schema/backup-schema-producer.cc
new file mode 100644
index 0000000..d0a0554
--- /dev/null
+++ b/icing/schema/backup-schema-producer.cc
@@ -0,0 +1,164 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/backup-schema-producer.h"
+
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/section.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Creates a map of property to indexed id count based on the list of indexed
+// properties provided by metadata_list.
+// For all non-document properties, the value will always be 1.
+// For document properties, the value will be the number of nested properties
+// that are indexed with that document type.
+std::unordered_map<std::string_view, int> CreateIndexedIdCountMap(
+    const std::vector<SectionMetadata>* metadata_list) {
+  std::unordered_map<std::string_view, int> property_indexed_id_count_map;
+  for (const SectionMetadata& metadata : *metadata_list) {
+    std::string_view top_level_property;
+    size_t separator_pos =
+        metadata.path.find(property_util::kPropertyPathSeparator);
+    if (separator_pos == std::string::npos) {
+      top_level_property = metadata.path;
+    } else {
+      top_level_property =
+          std::string_view(metadata.path.c_str(), separator_pos);
+    }
+    int& count = property_indexed_id_count_map[top_level_property];
+    ++count;
+  }
+  return property_indexed_id_count_map;
+}
+
+// Returns the indices (within schema.types()) of all types that are rollback
+// incompatible (old code cannot handle these types if they are unmodified).
+//
+// Currently, this means types that:
+//   1. Use RFC822 tokenization for any properties
+//   2. Use more than 16 indexed properties
+libtextclassifier3::StatusOr<std::vector<int>>
+GetRollbackIncompatibleTypeIndices(const SchemaProto& schema,
+                                   const SectionManager& type_manager) {
+  std::vector<int> invalid_type_indices;
+  for (int i = 0; i < schema.types_size(); ++i) {
+    const SchemaTypeConfigProto& type = schema.types(i);
+    bool rollback_incompatible = false;
+    for (const PropertyConfigProto& property : type.properties()) {
+      if (property.string_indexing_config().tokenizer_type() ==
+          StringIndexingConfig::TokenizerType::RFC822) {
+        rollback_incompatible = true;
+        break;
+      }
+    }
+    if (rollback_incompatible) {
+      invalid_type_indices.push_back(i);
+      continue;
+    }
+
+    ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
+                           type_manager.GetMetadataList(type.schema_type()));
+    if (metadata_list->size() > kOldTotalNumSections) {
+      invalid_type_indices.push_back(i);
+    }
+  }
+  return invalid_type_indices;
+}
+
+}  // namespace
+
+/* static */ libtextclassifier3::StatusOr<BackupSchemaProducer>
+BackupSchemaProducer::Create(const SchemaProto& schema,
+                             const SectionManager& type_manager) {
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<int> invalid_type_indices,
+      GetRollbackIncompatibleTypeIndices(schema, type_manager));
+  if (invalid_type_indices.empty()) {
+    return BackupSchemaProducer();
+  }
+
+  SchemaProto backup_schema(schema);
+  std::unordered_map<std::string_view, int> type_indexed_property_count;
+  for (int i : invalid_type_indices) {
+    SchemaTypeConfigProto* type = backup_schema.mutable_types(i);
+
+    // This should never cause an error - every type should have an entry in the
+    // type_manager.
+    ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
+                           type_manager.GetMetadataList(type->schema_type()));
+    int num_indexed_sections = metadata_list->size();
+    std::unordered_map<std::string_view, int> property_indexed_id_count_map;
+    if (num_indexed_sections > kOldTotalNumSections) {
+      property_indexed_id_count_map = CreateIndexedIdCountMap(metadata_list);
+    }
+
+    // Step 1. Switch all properties with RFC tokenizer as unindexed.
+    for (PropertyConfigProto& property : *type->mutable_properties()) {
+      // If the property uses the RFC tokenizer, then we need to set it to NONE
+      // and set match type UNKNOWN.
+      if (property.string_indexing_config().tokenizer_type() ==
+          StringIndexingConfig::TokenizerType::RFC822) {
+        property.clear_string_indexing_config();
+        --num_indexed_sections;
+        property_indexed_id_count_map.erase(property.property_name());
+      }
+    }
+
+    // Step 2. If there are any types that exceed the old indexed property
+    // limit, then mark indexed properties as unindexed until we're back under
+    // the limit.
+    if (num_indexed_sections <= kOldTotalNumSections) {
+      continue;
+    }
+
+    // We expect that the last properties were the ones added most recently and
+    // are the least crucial, so we do removal in reverse order. This is a bit
+    // arbitrary, but we don't really have sufficient information to make this
+    // judgment anyways.
+    for (auto itr = type->mutable_properties()->rbegin();
+         itr != type->mutable_properties()->rend(); ++itr) {
+      auto indexed_count_itr =
+          property_indexed_id_count_map.find(itr->property_name());
+      if (indexed_count_itr == property_indexed_id_count_map.end()) {
+        continue;
+      }
+
+      // Mark this property as unindexed and subtract all indexed property ids
+      // consumed by this property.
+      PropertyConfigProto& property = *itr;
+      property.clear_document_indexing_config();
+      property.clear_string_indexing_config();
+      property.clear_integer_indexing_config();
+      num_indexed_sections -= indexed_count_itr->second;
+      if (num_indexed_sections <= kOldTotalNumSections) {
+        break;
+      }
+    }
+  }
+  return BackupSchemaProducer(std::move(backup_schema));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/schema/backup-schema-producer.h b/icing/schema/backup-schema-producer.h
new file mode 100644
index 0000000..61dcde6
--- /dev/null
+++ b/icing/schema/backup-schema-producer.h
@@ -0,0 +1,55 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_BACKUP_SCHEMA_PRODUCER_H_
+#define ICING_SCHEMA_BACKUP_SCHEMA_PRODUCER_H_
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/section-manager.h"
+#include "icing/schema/section.h"
+
+namespace icing {
+namespace lib {
+
+class BackupSchemaProducer {
+ public:
+  // Creates a BackupSchemaProducer based off of schema.
+  // If schema doesn't require a backup schema (because it is fully
+  // rollback-proof) then no copies will be made and `is_backup_necessary` will
+  // return false.
+  // If schema *does* require a backup schema, then `is_backup_necessary` will
+  // return true and the backup schema can be retrieved by calling `Produce`.
+  // Returns:
+  //   - On success, a BackupSchemaProducer
+  //   - INTERNAL_ERROR if the schema is inconsistent with the type_manager.
+  static libtextclassifier3::StatusOr<BackupSchemaProducer> Create(
+      const SchemaProto& schema, const SectionManager& type_manager);
+
+  SchemaProto Produce() && { return std::move(cached_schema_); }
+
+  bool is_backup_necessary() const { return !cached_schema_.types().empty(); }
+
+ private:
+  BackupSchemaProducer() = default;
+  explicit BackupSchemaProducer(SchemaProto&& schema)
+      : cached_schema_(std::move(schema)) {}
+
+  SchemaProto cached_schema_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCHEMA_BACKUP_SCHEMA_PRODUCER_H_
diff --git a/icing/schema/backup-schema-producer_test.cc b/icing/schema/backup-schema-producer_test.cc
new file mode 100644
index 0000000..dbd033f
--- /dev/null
+++ b/icing/schema/backup-schema-producer_test.cc
@@ -0,0 +1,737 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/backup-schema-producer.h"
+
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-type-manager.h"
+#include "icing/schema/schema-util.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+
+class BackupSchemaProducerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/icing";
+    schema_store_dir_ = test_dir_ + "/schema_store";
+    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+  }
+
+  void TearDown() override {
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+  }
+
+  Filesystem filesystem_;
+  std::string test_dir_;
+  std::string schema_store_dir_;
+};
+
+TEST_F(BackupSchemaProducerTest, EmptySchema) {
+  SchemaProto empty;
+  SchemaUtil::TypeConfigMap type_config_map;
+  SchemaUtil::BuildTypeConfigMap(empty, &type_config_map);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+                                                 /*maximum_size_bytes=*/10000));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      BackupSchemaProducer backup_producer,
+      BackupSchemaProducer::Create(empty,
+                                   schema_type_manager->section_manager()));
+  EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(false));
+}
+
+TEST_F(BackupSchemaProducerTest, NoIndexedPropertySchema) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("TypeA")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetCardinality(CARDINALITY_OPTIONAL)
+                                        .SetDataType(TYPE_STRING))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetCardinality(CARDINALITY_REQUIRED)
+                                        .SetDataType(TYPE_INT64)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("TypeB")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("prop3")
+                               .SetCardinality(CARDINALITY_OPTIONAL)
+                               .SetDataTypeDocument(
+                                   "TypeA", /*index_nested_properties=*/false))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop4")
+                                        .SetCardinality(CARDINALITY_REPEATED)
+                                        .SetDataType(TYPE_STRING)))
+          .Build();
+
+  SchemaUtil::TypeConfigMap type_config_map;
+  SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+                                                 /*maximum_size_bytes=*/10000));
+  ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+  ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      BackupSchemaProducer backup_producer,
+      BackupSchemaProducer::Create(schema,
+                                   schema_type_manager->section_manager()));
+  EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(false));
+}
+
+TEST_F(BackupSchemaProducerTest, RollbackCompatibleSchema) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("TypeA")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetCardinality(CARDINALITY_OPTIONAL)
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetCardinality(CARDINALITY_REQUIRED)
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("TypeB")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("prop3")
+                               .SetCardinality(CARDINALITY_OPTIONAL)
+                               .SetDataTypeDocument(
+                                   "TypeA", /*index_nested_properties=*/true))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop4")
+                                        .SetCardinality(CARDINALITY_REPEATED)
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_VERBATIM)))
+          .Build();
+
+  SchemaUtil::TypeConfigMap type_config_map;
+  SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+                                                 /*maximum_size_bytes=*/10000));
+  ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+  ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      BackupSchemaProducer backup_producer,
+      BackupSchemaProducer::Create(schema,
+                                   schema_type_manager->section_manager()));
+  EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(false));
+}
+
+TEST_F(BackupSchemaProducerTest, RemoveRfc822) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop1")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)))
+          .Build();
+
+  SchemaUtil::TypeConfigMap type_config_map;
+  SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+                                                 /*maximum_size_bytes=*/10000));
+  ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      BackupSchemaProducer backup_producer,
+      BackupSchemaProducer::Create(schema,
+                                   schema_type_manager->section_manager()));
+  EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+  SchemaProto backup = std::move(backup_producer).Produce();
+
+  SchemaProto expected_backup =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop1")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataType(TYPE_STRING)))
+          .Build();
+  EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeExtraStringIndexedPropertiesUnindexed) {
+  PropertyConfigBuilder indexed_string_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+  SchemaTypeConfigProto type =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeA")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .AddProperty(indexed_string_property_builder.SetName("prop1"))
+          .AddProperty(indexed_string_property_builder.SetName("prop2"))
+          .AddProperty(indexed_string_property_builder.SetName("prop3"))
+          .AddProperty(indexed_string_property_builder.SetName("prop4"))
+          .AddProperty(indexed_string_property_builder.SetName("prop5"))
+          .AddProperty(indexed_string_property_builder.SetName("prop6"))
+          .AddProperty(indexed_string_property_builder.SetName("prop7"))
+          .AddProperty(indexed_string_property_builder.SetName("prop8"))
+          .AddProperty(indexed_string_property_builder.SetName("prop9"))
+          .AddProperty(indexed_string_property_builder.SetName("prop10"))
+          .AddProperty(indexed_string_property_builder.SetName("prop11"))
+          .AddProperty(indexed_string_property_builder.SetName("prop12"))
+          .AddProperty(indexed_string_property_builder.SetName("prop13"))
+          .AddProperty(indexed_string_property_builder.SetName("prop14"))
+          .AddProperty(indexed_string_property_builder.SetName("prop15"))
+          .AddProperty(indexed_string_property_builder.SetName("prop16"))
+          .AddProperty(indexed_string_property_builder.SetName("prop17"))
+          .AddProperty(indexed_string_property_builder.SetName("prop18"))
+          .AddProperty(indexed_string_property_builder.SetName("prop19"))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type).Build();
+
+  SchemaUtil::TypeConfigMap type_config_map;
+  SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+                                                 /*maximum_size_bytes=*/10000));
+  ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      BackupSchemaProducer backup_producer,
+      BackupSchemaProducer::Create(schema,
+                                   schema_type_manager->section_manager()));
+  EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+  SchemaProto backup = std::move(backup_producer).Produce();
+
+  PropertyConfigBuilder unindexed_string_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataType(TYPE_STRING);
+  SchemaTypeConfigProto expected_type =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeA")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .AddProperty(indexed_string_property_builder.SetName("prop1"))
+          .AddProperty(indexed_string_property_builder.SetName("prop2"))
+          .AddProperty(indexed_string_property_builder.SetName("prop3"))
+          .AddProperty(indexed_string_property_builder.SetName("prop4"))
+          .AddProperty(indexed_string_property_builder.SetName("prop5"))
+          .AddProperty(indexed_string_property_builder.SetName("prop6"))
+          .AddProperty(indexed_string_property_builder.SetName("prop7"))
+          .AddProperty(indexed_string_property_builder.SetName("prop8"))
+          .AddProperty(indexed_string_property_builder.SetName("prop9"))
+          .AddProperty(indexed_string_property_builder.SetName("prop10"))
+          .AddProperty(indexed_string_property_builder.SetName("prop11"))
+          .AddProperty(indexed_string_property_builder.SetName("prop12"))
+          .AddProperty(indexed_string_property_builder.SetName("prop13"))
+          .AddProperty(indexed_string_property_builder.SetName("prop14"))
+          .AddProperty(indexed_string_property_builder.SetName("prop15"))
+          .AddProperty(unindexed_string_property_builder.SetName("prop16"))
+          .AddProperty(unindexed_string_property_builder.SetName("prop17"))
+          .AddProperty(unindexed_string_property_builder.SetName("prop18"))
+          .AddProperty(unindexed_string_property_builder.SetName("prop19"))
+          .Build();
+  SchemaProto expected_backup = SchemaBuilder().AddType(expected_type).Build();
+  EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeExtraIntIndexedPropertiesUnindexed) {
+  PropertyConfigBuilder indexed_int_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeInt64(NUMERIC_MATCH_RANGE);
+  SchemaTypeConfigProto type =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeA")
+          .AddProperty(indexed_int_property_builder.SetName("prop0"))
+          .AddProperty(indexed_int_property_builder.SetName("prop1"))
+          .AddProperty(indexed_int_property_builder.SetName("prop2"))
+          .AddProperty(indexed_int_property_builder.SetName("prop3"))
+          .AddProperty(indexed_int_property_builder.SetName("prop4"))
+          .AddProperty(indexed_int_property_builder.SetName("prop5"))
+          .AddProperty(indexed_int_property_builder.SetName("prop6"))
+          .AddProperty(indexed_int_property_builder.SetName("prop7"))
+          .AddProperty(indexed_int_property_builder.SetName("prop8"))
+          .AddProperty(indexed_int_property_builder.SetName("prop9"))
+          .AddProperty(indexed_int_property_builder.SetName("prop10"))
+          .AddProperty(indexed_int_property_builder.SetName("prop11"))
+          .AddProperty(indexed_int_property_builder.SetName("prop12"))
+          .AddProperty(indexed_int_property_builder.SetName("prop13"))
+          .AddProperty(indexed_int_property_builder.SetName("prop14"))
+          .AddProperty(indexed_int_property_builder.SetName("prop15"))
+          .AddProperty(indexed_int_property_builder.SetName("prop16"))
+          .AddProperty(indexed_int_property_builder.SetName("prop17"))
+          .AddProperty(indexed_int_property_builder.SetName("prop18"))
+          .AddProperty(indexed_int_property_builder.SetName("prop19"))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type).Build();
+
+  SchemaUtil::TypeConfigMap type_config_map;
+  SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+                                                 /*maximum_size_bytes=*/10000));
+  ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      BackupSchemaProducer backup_producer,
+      BackupSchemaProducer::Create(schema,
+                                   schema_type_manager->section_manager()));
+  EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+  SchemaProto backup = std::move(backup_producer).Produce();
+
+  PropertyConfigBuilder unindexed_int_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataType(TYPE_INT64);
+  SchemaTypeConfigProto expected_type =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeA")
+          .AddProperty(indexed_int_property_builder.SetName("prop0"))
+          .AddProperty(indexed_int_property_builder.SetName("prop1"))
+          .AddProperty(indexed_int_property_builder.SetName("prop2"))
+          .AddProperty(indexed_int_property_builder.SetName("prop3"))
+          .AddProperty(indexed_int_property_builder.SetName("prop4"))
+          .AddProperty(indexed_int_property_builder.SetName("prop5"))
+          .AddProperty(indexed_int_property_builder.SetName("prop6"))
+          .AddProperty(indexed_int_property_builder.SetName("prop7"))
+          .AddProperty(indexed_int_property_builder.SetName("prop8"))
+          .AddProperty(indexed_int_property_builder.SetName("prop9"))
+          .AddProperty(indexed_int_property_builder.SetName("prop10"))
+          .AddProperty(indexed_int_property_builder.SetName("prop11"))
+          .AddProperty(indexed_int_property_builder.SetName("prop12"))
+          .AddProperty(indexed_int_property_builder.SetName("prop13"))
+          .AddProperty(indexed_int_property_builder.SetName("prop14"))
+          .AddProperty(indexed_int_property_builder.SetName("prop15"))
+          .AddProperty(unindexed_int_property_builder.SetName("prop16"))
+          .AddProperty(unindexed_int_property_builder.SetName("prop17"))
+          .AddProperty(unindexed_int_property_builder.SetName("prop18"))
+          .AddProperty(unindexed_int_property_builder.SetName("prop19"))
+          .Build();
+  SchemaProto expected_backup = SchemaBuilder().AddType(expected_type).Build();
+  EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeExtraDocumentIndexedPropertiesUnindexed) {
+  PropertyConfigBuilder indexed_string_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+  SchemaTypeConfigProto typeB =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeB")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .AddProperty(indexed_string_property_builder.SetName("prop1"))
+          .AddProperty(indexed_string_property_builder.SetName("prop2"))
+          .AddProperty(indexed_string_property_builder.SetName("prop3"))
+          .AddProperty(indexed_string_property_builder.SetName("prop4"))
+          .AddProperty(indexed_string_property_builder.SetName("prop5"))
+          .AddProperty(indexed_string_property_builder.SetName("prop6"))
+          .AddProperty(indexed_string_property_builder.SetName("prop7"))
+          .AddProperty(indexed_string_property_builder.SetName("prop8"))
+          .AddProperty(indexed_string_property_builder.SetName("prop9"))
+          .Build();
+
+  PropertyConfigBuilder indexed_document_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeDocument("TypeB", /*index_nested_properties=*/true);
+  SchemaTypeConfigProto typeA =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeA")
+          .AddProperty(indexed_document_property_builder.SetName("propA"))
+          .AddProperty(indexed_document_property_builder.SetName("propB"))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(typeA).AddType(typeB).Build();
+
+  SchemaUtil::TypeConfigMap type_config_map;
+  SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+                                                 /*maximum_size_bytes=*/10000));
+  ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+  ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      BackupSchemaProducer backup_producer,
+      BackupSchemaProducer::Create(schema,
+                                   schema_type_manager->section_manager()));
+  EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+  SchemaProto backup = std::move(backup_producer).Produce();
+
+  PropertyConfigProto unindexed_document_property =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataType(TYPE_DOCUMENT)
+          .Build();
+  unindexed_document_property.set_schema_type("TypeB");
+  PropertyConfigBuilder unindexed_document_property_builder(
+      unindexed_document_property);
+  SchemaTypeConfigProto expected_typeA =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeA")
+          .AddProperty(indexed_document_property_builder.SetName("propA"))
+          .AddProperty(unindexed_document_property_builder.SetName("propB"))
+          .Build();
+  SchemaProto expected_backup =
+      SchemaBuilder().AddType(expected_typeA).AddType(typeB).Build();
+  EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+TEST_F(
+    BackupSchemaProducerTest,
+    MakeExtraDocumentIndexedPropertiesWithIndexableNestedPropertiesListUnindexed) {
+  PropertyConfigBuilder indexed_string_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+  PropertyConfigBuilder indexed_int_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeInt64(NUMERIC_MATCH_RANGE);
+  SchemaTypeConfigProto typeB =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeB")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .AddProperty(indexed_int_property_builder.SetName("prop1"))
+          .AddProperty(indexed_string_property_builder.SetName("prop2"))
+          .AddProperty(indexed_int_property_builder.SetName("prop3"))
+          .AddProperty(indexed_string_property_builder.SetName("prop4"))
+          .AddProperty(indexed_int_property_builder.SetName("prop5"))
+          .AddProperty(indexed_string_property_builder.SetName("prop6"))
+          .AddProperty(indexed_int_property_builder.SetName("prop7"))
+          .AddProperty(indexed_string_property_builder.SetName("prop8"))
+          .AddProperty(indexed_int_property_builder.SetName("prop9"))
+          .Build();
+
+  // Create indexed document property by using indexable nested properties list.
+  PropertyConfigBuilder indexed_document_property_with_list_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeDocument(
+              "TypeB", /*indexable_nested_properties_list=*/{
+                  "prop0", "prop1", "prop2", "prop3", "prop4", "prop5",
+                  "unknown1", "unknown2", "unknown3"});
+  SchemaTypeConfigProto typeA =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeA")
+          .AddProperty(
+              indexed_document_property_with_list_builder.SetName("propA"))
+          .AddProperty(
+              indexed_document_property_with_list_builder.SetName("propB"))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(typeA).AddType(typeB).Build();
+
+  SchemaUtil::TypeConfigMap type_config_map;
+  SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+                                                 /*maximum_size_bytes=*/10000));
+  ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+  ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+  ASSERT_THAT(schema_type_manager->section_manager().GetMetadataList("TypeA"),
+              IsOkAndHolds(Pointee(SizeIs(18))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      BackupSchemaProducer backup_producer,
+      BackupSchemaProducer::Create(schema,
+                                   schema_type_manager->section_manager()));
+  EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+  SchemaProto backup = std::move(backup_producer).Produce();
+
+  PropertyConfigProto unindexed_document_property =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataType(TYPE_DOCUMENT)
+          .Build();
+  unindexed_document_property.set_schema_type("TypeB");
+  PropertyConfigBuilder unindexed_document_property_builder(
+      unindexed_document_property);
+
+  // "propA" and "propB" both have 9 sections respectively, so we have to drop
+  // "propB" indexing config to make total # of sections <= 16.
+  SchemaTypeConfigProto expected_typeA =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeA")
+          .AddProperty(
+              indexed_document_property_with_list_builder.SetName("propA"))
+          .AddProperty(unindexed_document_property_builder.SetName("propB"))
+          .Build();
+  SchemaProto expected_backup =
+      SchemaBuilder().AddType(expected_typeA).AddType(typeB).Build();
+  EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeRfcPropertiesUnindexedFirst) {
+  PropertyConfigBuilder indexed_string_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+  SchemaTypeConfigProto typeA =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeA")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .AddProperty(indexed_string_property_builder.SetName("prop1"))
+          .AddProperty(indexed_string_property_builder.SetName("prop2"))
+          .AddProperty(indexed_string_property_builder.SetName("prop3"))
+          .AddProperty(indexed_string_property_builder.SetName("prop4"))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+          .AddProperty(indexed_string_property_builder.SetName("prop6"))
+          .AddProperty(indexed_string_property_builder.SetName("prop7"))
+          .AddProperty(indexed_string_property_builder.SetName("prop8"))
+          .AddProperty(indexed_string_property_builder.SetName("prop9"))
+          .AddProperty(indexed_string_property_builder.SetName("prop10"))
+          .AddProperty(indexed_string_property_builder.SetName("prop11"))
+          .AddProperty(indexed_string_property_builder.SetName("prop12"))
+          .AddProperty(indexed_string_property_builder.SetName("prop13"))
+          .AddProperty(indexed_string_property_builder.SetName("prop14"))
+          .AddProperty(indexed_string_property_builder.SetName("prop15"))
+          .AddProperty(indexed_string_property_builder.SetName("prop16"))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(typeA).Build();
+
+  SchemaUtil::TypeConfigMap type_config_map;
+  SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+                                                 /*maximum_size_bytes=*/10000));
+  ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      BackupSchemaProducer backup_producer,
+      BackupSchemaProducer::Create(schema,
+                                   schema_type_manager->section_manager()));
+  EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+  SchemaProto backup = std::move(backup_producer).Produce();
+
+  SchemaTypeConfigProto expected_typeA =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeA")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .AddProperty(indexed_string_property_builder.SetName("prop1"))
+          .AddProperty(indexed_string_property_builder.SetName("prop2"))
+          .AddProperty(indexed_string_property_builder.SetName("prop3"))
+          .AddProperty(indexed_string_property_builder.SetName("prop4"))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("propRfc")
+                           .SetCardinality(CARDINALITY_OPTIONAL)
+                           .SetDataType(TYPE_STRING))
+          .AddProperty(indexed_string_property_builder.SetName("prop6"))
+          .AddProperty(indexed_string_property_builder.SetName("prop7"))
+          .AddProperty(indexed_string_property_builder.SetName("prop8"))
+          .AddProperty(indexed_string_property_builder.SetName("prop9"))
+          .AddProperty(indexed_string_property_builder.SetName("prop10"))
+          .AddProperty(indexed_string_property_builder.SetName("prop11"))
+          .AddProperty(indexed_string_property_builder.SetName("prop12"))
+          .AddProperty(indexed_string_property_builder.SetName("prop13"))
+          .AddProperty(indexed_string_property_builder.SetName("prop14"))
+          .AddProperty(indexed_string_property_builder.SetName("prop15"))
+          .AddProperty(indexed_string_property_builder.SetName("prop16"))
+          .Build();
+  SchemaProto expected_backup = SchemaBuilder().AddType(expected_typeA).Build();
+  EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeExtraPropertiesUnindexedMultipleTypes) {
+  PropertyConfigBuilder indexed_string_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+  PropertyConfigBuilder indexed_int_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeInt64(NUMERIC_MATCH_RANGE);
+  SchemaTypeConfigProto typeB =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeB")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .AddProperty(indexed_int_property_builder.SetName("prop1"))
+          .AddProperty(indexed_string_property_builder.SetName("prop2"))
+          .AddProperty(indexed_int_property_builder.SetName("prop3"))
+          .AddProperty(indexed_string_property_builder.SetName("prop4"))
+          .Build();
+
+  PropertyConfigBuilder indexed_document_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeDocument("TypeB", /*index_nested_properties=*/true);
+  PropertyConfigBuilder indexed_document_property_with_list_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeDocument(
+              "TypeB", /*indexable_nested_properties_list=*/{
+                  "prop0", "prop4", "unknown1", "unknown2", "unknown3"});
+  SchemaTypeConfigProto typeA =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeA")
+          .AddProperty(indexed_string_property_builder.SetName("propA"))
+          .AddProperty(
+              indexed_document_property_with_list_builder.SetName("propB"))
+          .AddProperty(indexed_string_property_builder.SetName("propC"))
+          .AddProperty(indexed_document_property_builder.SetName("propD"))
+          .AddProperty(indexed_string_property_builder.SetName("propE"))
+          .AddProperty(indexed_int_property_builder.SetName("propF"))
+          .AddProperty(indexed_document_property_builder.SetName("propG"))
+          .AddProperty(indexed_string_property_builder.SetName("propH"))
+          .AddProperty(indexed_int_property_builder.SetName("propI"))
+          .AddProperty(
+              indexed_document_property_with_list_builder.SetName("propJ"))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(typeA).AddType(typeB).Build();
+
+  SchemaUtil::TypeConfigMap type_config_map;
+  SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+                                                 /*maximum_size_bytes=*/10000));
+  ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+  ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+  ASSERT_THAT(schema_type_manager->section_manager().GetMetadataList("TypeA"),
+              IsOkAndHolds(Pointee(SizeIs(26))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      BackupSchemaProducer backup_producer,
+      BackupSchemaProducer::Create(schema,
+                                   schema_type_manager->section_manager()));
+  EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+  SchemaProto backup = std::move(backup_producer).Produce();
+
+  PropertyConfigBuilder unindexed_string_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataType(TYPE_STRING);
+  PropertyConfigBuilder unindexed_int_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataType(TYPE_INT64);
+  PropertyConfigProto unindexed_document_property =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataType(TYPE_DOCUMENT)
+          .Build();
+  unindexed_document_property.set_schema_type("TypeB");
+  PropertyConfigBuilder unindexed_document_property_builder(
+      unindexed_document_property);
+
+  // On version 0 (Android T):
+  // - Only "propA", "propC", "propD.prop0", "propD.prop1", "propD.prop2",
+  //   "propD.prop3", "propD.prop4", "propE", "propF" will be assigned sections.
+  // - Unlike version 2, "propB.prop0", "propB.prop4", "propB.unknown1",
+  //   "propB.unknown2", "propB.unknown3" will be ignored because version 0
+  //   doesn't recognize indexable nested properties list.
+  // - So there will be only 9 sections on version 0. We still have potential to
+  //   avoid dropping "propG", "propH", "propI" indexing configs on version 0
+  //   (in this case it will be 16 sections), but it is ok to make it simple as
+  //   long as total # of sections <= 16.
+  SchemaTypeConfigProto expected_typeA =
+      SchemaTypeConfigBuilder()
+          .SetType("TypeA")
+          .AddProperty(indexed_string_property_builder.SetName("propA"))
+          .AddProperty(
+              indexed_document_property_with_list_builder.SetName("propB"))
+          .AddProperty(indexed_string_property_builder.SetName("propC"))
+          .AddProperty(indexed_document_property_builder.SetName("propD"))
+          .AddProperty(indexed_string_property_builder.SetName("propE"))
+          .AddProperty(indexed_int_property_builder.SetName("propF"))
+          .AddProperty(unindexed_document_property_builder.SetName("propG"))
+          .AddProperty(unindexed_string_property_builder.SetName("propH"))
+          .AddProperty(unindexed_int_property_builder.SetName("propI"))
+          .AddProperty(unindexed_document_property_builder.SetName("propJ"))
+          .Build();
+  SchemaProto expected_backup =
+      SchemaBuilder().AddType(expected_typeA).AddType(typeB).Build();
+  EXPECT_THAT(backup, portable_equals_proto::EqualsProto(expected_backup));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/schema/joinable-property-manager-builder_test.cc b/icing/schema/joinable-property-manager-builder_test.cc
new file mode 100644
index 0000000..ac48faa
--- /dev/null
+++ b/icing/schema/joinable-property-manager-builder_test.cc
@@ -0,0 +1,446 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property-manager.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Pointee;
+
+class JoinablePropertyManagerBuilderTest : public ::testing::Test {
+ protected:
+  void SetUp() override { test_dir_ = GetTestTempDir() + "/icing"; }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  std::string test_dir_;
+};
+
+TEST_F(JoinablePropertyManagerBuilderTest, Build) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeOne", 0));
+  ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeTwo", 1));
+
+  PropertyConfigProto prop_foo =
+      PropertyConfigBuilder()
+          .SetDataType(TYPE_STRING)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/true)
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .Build();
+  PropertyConfigProto prop_bar =
+      PropertyConfigBuilder()
+          .SetDataType(TYPE_STRING)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/false)
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .Build();
+  PropertyConfigProto prop_baz =
+      PropertyConfigBuilder()
+          .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/true)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build();
+
+  JoinablePropertyManager::Builder builder(*schema_type_mapper);
+  // Add "foo" and "bar" to "SchemaTypeOne" (schema_type_id = 0).
+  ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+      /*schema_type_id=*/0, prop_foo, /*property_path=*/"foo"));
+  ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+      /*schema_type_id=*/0, prop_bar, /*property_path=*/"bar"));
+  // Add "baz" to "SchemaTypeTwo" (schema_type_id = 1).
+  ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+      /*schema_type_id=*/1, prop_baz, /*property_path=*/"baz"));
+
+  std::unique_ptr<JoinablePropertyManager> joinable_property_manager =
+      std::move(builder).Build();
+  // Check "SchemaTypeOne"
+  EXPECT_THAT(
+      joinable_property_manager->GetMetadataList("SchemaTypeOne"),
+      IsOkAndHolds(Pointee(ElementsAre(
+          EqualsJoinablePropertyMetadata(
+              /*expected_id=*/0, /*expected_property_path=*/"foo", prop_foo),
+          EqualsJoinablePropertyMetadata(/*expected_id=*/1,
+                                         /*expected_property_path=*/"bar",
+                                         prop_bar)))));
+  // Check "SchemaTypeTwo"
+  EXPECT_THAT(
+      joinable_property_manager->GetMetadataList("SchemaTypeTwo"),
+      IsOkAndHolds(Pointee(ElementsAre(EqualsJoinablePropertyMetadata(
+          /*expected_id=*/0, /*expected_property_path=*/"baz", prop_baz)))));
+}
+
+TEST_F(JoinablePropertyManagerBuilderTest, TooManyPropertiesShouldFail) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put("SchemaType", 0));
+
+  JoinablePropertyManager::Builder builder(*schema_type_mapper);
+  // Add kTotalNumJoinableProperties joinable properties
+  for (int i = 0; i < kTotalNumJoinableProperties; i++) {
+    PropertyConfigProto property_config =
+        PropertyConfigBuilder()
+            .SetDataType(TYPE_STRING)
+            .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                         /*propagate_delete=*/true)
+            .SetCardinality(CARDINALITY_REQUIRED)
+            .Build();
+    ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+        /*schema_type_id=*/0, property_config,
+        /*property_path=*/"property" + std::to_string(i)));
+  }
+
+  // Add another joinable property. This should fail.
+  PropertyConfigProto property_config =
+      PropertyConfigBuilder()
+          .SetDataType(TYPE_STRING)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/true)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build();
+  EXPECT_THAT(builder.ProcessSchemaTypePropertyConfig(
+                  /*schema_type_id=*/0, property_config,
+                  /*property_path=*/"propertyExceed"),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
+                       HasSubstr("Too many properties")));
+}
+
+TEST_F(JoinablePropertyManagerBuilderTest, InvalidSchemaTypeIdShouldFail) {
+  // Create a schema type mapper with invalid schema type id.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put("SchemaType", 0));
+
+  PropertyConfigProto property_config =
+      PropertyConfigBuilder()
+          .SetDataType(TYPE_STRING)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/true)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build();
+
+  JoinablePropertyManager::Builder builder(*schema_type_mapper);
+  EXPECT_THAT(
+      builder.ProcessSchemaTypePropertyConfig(
+          /*schema_type_id=*/-1, property_config, /*property_path=*/"property"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(JoinablePropertyManagerBuilderTest,
+       SchemaTypeIdInconsistentWithSchemaTypeMapperSizeShouldFail) {
+  // Create a schema type mapper with schema type id = 2, but size of mapper is
+  // 2.
+  // Since JoinablePropertyManagerBuilder expects 2 schema type ids = [0, 1],
+  // building with schema type id = 2 should fail even though id = 2 is in
+  // schema type mapper.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeOne", 0));
+  ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeTwo", 2));
+
+  PropertyConfigProto property_config =
+      PropertyConfigBuilder()
+          .SetDataType(TYPE_STRING)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/true)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build();
+
+  JoinablePropertyManager::Builder builder(*schema_type_mapper);
+  EXPECT_THAT(
+      builder.ProcessSchemaTypePropertyConfig(
+          /*schema_type_id=*/2, property_config, /*property_path=*/"property"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(JoinablePropertyManagerBuilderTest,
+       NonStringPropertiesWithQualifiedIdJoinableConfigShouldNotProcess) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeOne", 0));
+  ICING_ASSERT_OK(schema_type_mapper->Put("SchemaTypeTwo", 1));
+
+  // Create non-string properties with QUALIFIED_ID joinable value type.
+  std::vector<PropertyConfigProto> properties = {
+      PropertyConfigBuilder()
+          .SetName("int1")
+          .SetDataType(TYPE_INT64)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/true)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build(),
+      PropertyConfigBuilder()
+          .SetName("int2")
+          .SetDataType(TYPE_INT64)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/false)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build(),
+      PropertyConfigBuilder()
+          .SetName("double1")
+          .SetDataType(TYPE_DOUBLE)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/true)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build(),
+      PropertyConfigBuilder()
+          .SetName("double2")
+          .SetDataType(TYPE_DOUBLE)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/false)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build(),
+      PropertyConfigBuilder()
+          .SetName("boolean1")
+          .SetDataType(TYPE_BOOLEAN)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/true)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build(),
+      PropertyConfigBuilder()
+          .SetName("boolean2")
+          .SetDataType(TYPE_BOOLEAN)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/false)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build(),
+      PropertyConfigBuilder()
+          .SetName("bytes1")
+          .SetDataType(TYPE_BYTES)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/true)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build(),
+      PropertyConfigBuilder()
+          .SetName("bytes2")
+          .SetDataType(TYPE_BYTES)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/false)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build(),
+      PropertyConfigBuilder()
+          .SetName("document1")
+          .SetDataTypeDocument(/*schema_type=*/"SchemaTypeTwo",
+                               /*index_nested_properties=*/true)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/true)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build(),
+      PropertyConfigBuilder()
+          .SetName("document2")
+          .SetDataTypeDocument(/*schema_type=*/"SchemaTypeTwo",
+                               /*index_nested_properties=*/true)
+          .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                       /*propagate_delete=*/false)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build()};
+
+  JoinablePropertyManager::Builder builder(*schema_type_mapper);
+  for (const PropertyConfigProto& property_config : properties) {
+    ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+        /*schema_type_id=*/0, property_config,
+        std::string(property_config.property_name())));
+  }
+
+  std::unique_ptr<JoinablePropertyManager> joinable_property_manager =
+      std::move(builder).Build();
+  EXPECT_THAT(joinable_property_manager->GetMetadataList("SchemaTypeOne"),
+              IsOkAndHolds(Pointee(IsEmpty())));
+}
+
+class JoinablePropertyManagerBuilderWithJoinablePropertyTest
+    : public JoinablePropertyManagerBuilderTest,
+      public ::testing::WithParamInterface<PropertyConfigProto> {};
+
+TEST_P(JoinablePropertyManagerBuilderWithJoinablePropertyTest, Build) {
+  static constexpr std::string_view kSchemaType = "type";
+  static constexpr std::string_view kPropertyPath = "foo.bar";
+  const PropertyConfigProto& property_config = GetParam();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put(kSchemaType, 0));
+
+  JoinablePropertyManager::Builder builder(*schema_type_mapper);
+  ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+      /*schema_type_id=*/0, property_config, std::string(kPropertyPath)));
+
+  std::unique_ptr<JoinablePropertyManager> joinable_property_manager =
+      std::move(builder).Build();
+  EXPECT_THAT(
+      joinable_property_manager->GetMetadataList(std::string(kSchemaType)),
+      IsOkAndHolds(Pointee(ElementsAre(EqualsJoinablePropertyMetadata(
+          /*expected_id=*/0, kPropertyPath, property_config)))));
+}
+
+// The following type is considered joinable:
+// - String with QUALIFIED_ID joinable value type
+INSTANTIATE_TEST_SUITE_P(
+    JoinablePropertyManagerBuilderWithJoinablePropertyTest,
+    JoinablePropertyManagerBuilderWithJoinablePropertyTest,
+    testing::Values(PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataType(TYPE_STRING)
+                        .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                     /*propagate_delete=*/true)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataType(TYPE_STRING)
+                        .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                     /*propagate_delete=*/false)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    // Indexable string can be configured joinable as well. For
+                    // convenience, just test one indexable string config.
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                        .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                     /*propagate_delete=*/true)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                        .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                     /*propagate_delete=*/false)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build()));
+
+class JoinablePropertyManagerBuilderWithNonJoinablePropertyTest
+    : public JoinablePropertyManagerBuilderTest,
+      public ::testing::WithParamInterface<PropertyConfigProto> {};
+
+TEST_P(JoinablePropertyManagerBuilderWithNonJoinablePropertyTest, Build) {
+  static constexpr std::string_view kSchemaType = "type";
+  static constexpr std::string_view kPropertyPath = "foo.bar";
+  const PropertyConfigProto& property_config = GetParam();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put(kSchemaType, 0));
+
+  JoinablePropertyManager::Builder builder(*schema_type_mapper);
+  ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+      /*schema_type_id=*/0, property_config, std::string(kPropertyPath)));
+
+  std::unique_ptr<JoinablePropertyManager> joinable_property_manager =
+      std::move(builder).Build();
+  EXPECT_THAT(
+      joinable_property_manager->GetMetadataList(std::string(kSchemaType)),
+      IsOkAndHolds(Pointee(IsEmpty())));
+}
+
+// All types without JoinableConfig (i.e. joinable value type = NONE by default)
+// are considered non-joinable. Other mismatching types (e.g. non-string
+// properties with QUALIFIED_ID joinable value type) were tested individually
+// above.
+INSTANTIATE_TEST_SUITE_P(
+    JoinablePropertyManagerBuilderWithNonJoinablePropertyTest,
+    JoinablePropertyManagerBuilderWithNonJoinablePropertyTest,
+    testing::Values(PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataType(TYPE_STRING)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    // Indexable but non-joinable string
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataType(TYPE_INT64)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataType(TYPE_DOUBLE)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataType(TYPE_BOOLEAN)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataType(TYPE_BYTES)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeDocument("anotherSchema",
+                                             /*index_nested_properties=*/true)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeDocument("anotherSchema",
+                                             /*index_nested_properties=*/false)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build()));
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/schema/joinable-property-manager.cc b/icing/schema/joinable-property-manager.cc
new file mode 100644
index 0000000..1606abb
--- /dev/null
+++ b/icing/schema/joinable-property-manager.cc
@@ -0,0 +1,203 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/joinable-property-manager.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/property-util.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Helper function to append a new joinable property metadata
+libtextclassifier3::Status AppendNewJoinablePropertyMetadata(
+    JoinablePropertyManager::JoinablePropertyMetadataListWrapper*
+        metadata_list_wrapper,
+    std::string&& concatenated_path,
+    PropertyConfigProto::DataType::Code data_type,
+    JoinableConfig::ValueType::Code value_type) {
+  // Validates next joinable property id, makes sure that joinable property id
+  // is the same as the list index so that we could find any joinable property
+  // metadata by id in O(1) later.
+  JoinablePropertyId new_id = static_cast<JoinablePropertyId>(
+      metadata_list_wrapper->metadata_list.size());
+  if (!IsJoinablePropertyIdValid(new_id)) {
+    // Max number of joinable properties reached
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Too many properties to be joinable, max "
+                                      "number of properties allowed: %d",
+                                      kTotalNumJoinableProperties));
+  }
+
+  // Creates joinable property metadata
+  metadata_list_wrapper->metadata_list.push_back(JoinablePropertyMetadata(
+      new_id, data_type, value_type, std::move(concatenated_path)));
+  metadata_list_wrapper->property_path_to_id_map.insert(
+      {metadata_list_wrapper->metadata_list.back().path, new_id});
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+void AppendJoinablePropertyContent(
+    JoinablePropertyMetadata joinable_property_metadata,
+    libtextclassifier3::StatusOr<std::vector<T>>&& joinable_property_content_or,
+    std::vector<JoinableProperty<T>>& joinable_property_out) {
+  if (!joinable_property_content_or.ok()) {
+    return;
+  }
+
+  std::vector<T> joinable_property_content =
+      std::move(joinable_property_content_or).ValueOrDie();
+  if (!joinable_property_content.empty()) {
+    // Adds to result vector if joinable property is found in document
+    joinable_property_out.emplace_back(std::move(joinable_property_metadata),
+                                       std::move(joinable_property_content));
+  }
+}
+
+}  // namespace
+
+libtextclassifier3::Status
+JoinablePropertyManager::Builder::ProcessSchemaTypePropertyConfig(
+    SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
+    std::string&& property_path) {
+  if (schema_type_id < 0 ||
+      schema_type_id >=
+          static_cast<int64_t>(joinable_property_metadata_cache_.size())) {
+    return absl_ports::InvalidArgumentError("Invalid schema type id");
+  }
+
+  switch (property_config.data_type()) {
+    case PropertyConfigProto::DataType::STRING: {
+      if (property_config.joinable_config().value_type() ==
+          JoinableConfig::ValueType::QUALIFIED_ID) {
+        ICING_RETURN_IF_ERROR(AppendNewJoinablePropertyMetadata(
+            &joinable_property_metadata_cache_[schema_type_id],
+            std::move(property_path), PropertyConfigProto::DataType::STRING,
+            JoinableConfig::ValueType::QUALIFIED_ID));
+      }
+      break;
+    }
+    default: {
+      // Skip other data types.
+      break;
+    }
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<JoinablePropertyGroup>
+JoinablePropertyManager::ExtractJoinableProperties(
+    const DocumentProto& document) const {
+  ICING_ASSIGN_OR_RETURN(
+      const std::vector<JoinablePropertyMetadata>* metadata_list,
+      GetMetadataList(document.schema()));
+  JoinablePropertyGroup joinable_property_group;
+  for (const JoinablePropertyMetadata& joinable_property_metadata :
+       *metadata_list) {
+    switch (joinable_property_metadata.data_type) {
+      case PropertyConfigProto::DataType::STRING: {
+        if (joinable_property_metadata.value_type ==
+            JoinableConfig::ValueType::QUALIFIED_ID) {
+          AppendJoinablePropertyContent(
+              joinable_property_metadata,
+              property_util::ExtractPropertyValuesFromDocument<
+                  std::string_view>(document, joinable_property_metadata.path),
+              joinable_property_group.qualified_id_properties);
+        }
+        break;
+      }
+      default: {
+        // Skip other data types.
+        break;
+      }
+    }
+  }
+  return joinable_property_group;
+}
+
+libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+JoinablePropertyManager::GetJoinablePropertyMetadata(
+    SchemaTypeId schema_type_id, const std::string& property_path) const {
+  if (schema_type_id < 0 ||
+      schema_type_id >=
+          static_cast<int64_t>(joinable_property_metadata_cache_.size())) {
+    return absl_ports::InvalidArgumentError("Invalid schema type id");
+  }
+
+  const auto iter = joinable_property_metadata_cache_[schema_type_id]
+                        .property_path_to_id_map.find(property_path);
+  if (iter == joinable_property_metadata_cache_[schema_type_id]
+                  .property_path_to_id_map.end()) {
+    return nullptr;
+  }
+
+  JoinablePropertyId joinable_property_id = iter->second;
+  return &joinable_property_metadata_cache_[schema_type_id]
+              .metadata_list[joinable_property_id];
+}
+
+libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+JoinablePropertyManager::GetJoinablePropertyMetadata(
+    SchemaTypeId schema_type_id,
+    JoinablePropertyId joinable_property_id) const {
+  if (schema_type_id < 0 ||
+      schema_type_id >=
+          static_cast<int64_t>(joinable_property_metadata_cache_.size())) {
+    return absl_ports::InvalidArgumentError("Invalid schema type id");
+  }
+  if (!IsJoinablePropertyIdValid(joinable_property_id)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Invalid joinable property id %d", joinable_property_id));
+  }
+
+  const std::vector<JoinablePropertyMetadata>& metadata_list =
+      joinable_property_metadata_cache_[schema_type_id].metadata_list;
+  if (joinable_property_id >= metadata_list.size()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Joinable property with id %d doesn't exist in type config id %d",
+        joinable_property_id, schema_type_id));
+  }
+
+  // The index of metadata list is the same as the joinable property id, so we
+  // can use joinable property id as the index.
+  return &metadata_list[joinable_property_id];
+}
+
+libtextclassifier3::StatusOr<const std::vector<JoinablePropertyMetadata>*>
+JoinablePropertyManager::GetMetadataList(
+    const std::string& type_config_name) const {
+  ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+                         schema_type_mapper_.Get(type_config_name));
+  return &joinable_property_metadata_cache_.at(schema_type_id).metadata_list;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/schema/joinable-property-manager.h b/icing/schema/joinable-property-manager.h
new file mode 100644
index 0000000..3ee5963
--- /dev/null
+++ b/icing/schema/joinable-property-manager.h
@@ -0,0 +1,160 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_JOINABLE_PROPERTY_MANAGER_H_
+#define ICING_SCHEMA_JOINABLE_PROPERTY_MANAGER_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/document.pb.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/key-mapper.h"
+
+namespace icing {
+namespace lib {
+
+// This class provides joinable-property-related operations. It assigns joinable
+// properties according to JoinableConfig and extracts joinable property values
+// from documents.
+class JoinablePropertyManager {
+ public:
+  // A wrapper class that contains a vector of metadatas and property path to
+  // JoinablePropertyId reverse lookup map.
+  struct JoinablePropertyMetadataListWrapper {
+    std::vector<JoinablePropertyMetadata> metadata_list;
+    std::unordered_map<std::string, JoinablePropertyId> property_path_to_id_map;
+  };
+
+  // Builder class to create a JoinablePropertyManager which does not take
+  // ownership of any input components, and all pointers must refer to valid
+  // objects that outlive the created JoinablePropertyManager instance.
+  class Builder {
+   public:
+    explicit Builder(const KeyMapper<SchemaTypeId>& schema_type_mapper)
+        : schema_type_mapper_(schema_type_mapper),
+          joinable_property_metadata_cache_(schema_type_mapper.num_keys()) {}
+
+    // Checks and appends a new JoinablePropertyMetadata for the schema type id
+    // if the given property config is joinable.
+    //
+    // Returns:
+    //   - OK on success
+    //   - INVALID_ARGUMENT_ERROR if schema type id is invalid (not in range [0,
+    //     schema_type_mapper_.num_keys() - 1])
+    //   - OUT_OF_RANGE_ERROR if # of joinable properties in a single Schema
+    //     exceeds the threshold (kTotalNumJoinableProperties)
+    libtextclassifier3::Status ProcessSchemaTypePropertyConfig(
+        SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
+        std::string&& property_path);
+
+    // Builds and returns a JoinablePropertyManager instance.
+    std::unique_ptr<JoinablePropertyManager> Build() && {
+      return std::unique_ptr<JoinablePropertyManager>(
+          new JoinablePropertyManager(
+              schema_type_mapper_,
+              std::move(joinable_property_metadata_cache_)));
+    }
+
+   private:
+    const KeyMapper<SchemaTypeId>& schema_type_mapper_;  // Does not own.
+    std::vector<JoinablePropertyMetadataListWrapper>
+        joinable_property_metadata_cache_;
+  };
+
+  JoinablePropertyManager(const JoinablePropertyManager&) = delete;
+  JoinablePropertyManager& operator=(const JoinablePropertyManager&) = delete;
+
+  // Extracts all joinable property contents of different types from the given
+  // document and group them by joinable value type.
+  // - Joinable properties are sorted by joinable property id in ascending
+  //   order.
+  // - Joinable property ids start from 0.
+  // - Joinable properties with empty content won't be returned.
+  //
+  // Returns:
+  //   - A JoinablePropertyGroup instance on success
+  //   - NOT_FOUND_ERROR if the type config name of document is not present in
+  //     schema_type_mapper_
+  libtextclassifier3::StatusOr<JoinablePropertyGroup> ExtractJoinableProperties(
+      const DocumentProto& document) const;
+
+  // Returns the JoinablePropertyMetadata associated with property_path that's
+  // in the SchemaTypeId.
+  //
+  // Returns:
+  //   - Valid pointer to JoinablePropertyMetadata on success
+  //   - nullptr if property_path doesn't exist (or is not joinable) in the
+  //     joinable metadata list of the schema
+  //   - INVALID_ARGUMENT_ERROR if schema type id is invalid
+  libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+  GetJoinablePropertyMetadata(SchemaTypeId schema_type_id,
+                              const std::string& property_path) const;
+
+  // Returns the JoinablePropertyMetadata associated with the JoinablePropertyId
+  // that's in the SchemaTypeId.
+  //
+  // Returns:
+  //   - Valid pointer to JoinablePropertyMetadata on success
+  //   - INVALID_ARGUMENT_ERROR if schema type id or JoinablePropertyId is
+  //     invalid
+  libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+  GetJoinablePropertyMetadata(SchemaTypeId schema_type_id,
+                              JoinablePropertyId joinable_property_id) const;
+
+  // Returns:
+  //   - On success, the joinable property metadatas for the specified type
+  //   - NOT_FOUND_ERROR if the type config name is not present in
+  //     schema_type_mapper_
+  libtextclassifier3::StatusOr<const std::vector<JoinablePropertyMetadata>*>
+  GetMetadataList(const std::string& type_config_name) const;
+
+ private:
+  explicit JoinablePropertyManager(
+      const KeyMapper<SchemaTypeId>& schema_type_mapper,
+      std::vector<JoinablePropertyMetadataListWrapper>&&
+          joinable_property_metadata_cache)
+      : schema_type_mapper_(schema_type_mapper),
+        joinable_property_metadata_cache_(joinable_property_metadata_cache) {}
+
+  // Maps schema types to a densely-assigned unique id.
+  const KeyMapper<SchemaTypeId>& schema_type_mapper_;  // Does not own
+
+  // The index of joinable_property_metadata_cache_ corresponds to a schema
+  // type's SchemaTypeId. At that SchemaTypeId index, we store a
+  // JoinablePropertyMetadataListWrapper instance. The metadata list's index
+  // corresponds to a joinable property's JoinablePropertyId. At the
+  // JoinablePropertyId index, we store the JoinablePropertyMetadata of that
+  // joinable property.
+  //
+  // For example, suppose "email" has a SchemaTypeId of 0 and it has a joinable
+  // property called "senderQualifiedId" with a JoinablePropertyId of 1. Then
+  // the "senderQualifiedId" property's JoinablePropertyMetadata will be at
+  // joinable_property_metadata_cache_[0].metadata_list[1], and
+  // joinable_property_metadata_cache_[0]
+  //     .property_path_to_id_map["senderQualifiedId"]
+  // will be 1.
+  const std::vector<JoinablePropertyMetadataListWrapper>
+      joinable_property_metadata_cache_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCHEMA_JOINABLE_PROPERTY_MANAGER_H_
diff --git a/icing/schema/joinable-property-manager_test.cc b/icing/schema/joinable-property-manager_test.cc
new file mode 100644
index 0000000..ceaaa18
--- /dev/null
+++ b/icing/schema/joinable-property-manager_test.cc
@@ -0,0 +1,519 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/joinable-property-manager.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-type-manager.h"
+#include "icing/schema/schema-util.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::IsNull;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+
+// type and property names of Email
+static constexpr char kTypeEmail[] = "Email";
+// joinable
+static constexpr char kPropertyReceiverQualifiedId[] = "receiverQualifiedId";
+static constexpr char kPropertySenderQualifiedId[] = "senderQualifiedId";
+// non-joinable
+static constexpr char kPropertyAttachment[] = "attachment";
+static constexpr char kPropertySubject[] = "subject";
+static constexpr char kPropertyText[] = "text";
+static constexpr char kPropertyTimestamp[] = "timestamp";
+
+// type and property names of Conversation
+static constexpr char kTypeConversation[] = "Conversation";
+// joinable
+static constexpr char kPropertyEmails[] = "emails";
+static constexpr char kPropertyGroupQualifiedId[] = "groupQualifiedId";
+// non-joinable
+static constexpr char kPropertyName[] = "name";
+static constexpr char kPropertyNumber[] = "number";
+
+constexpr int64_t kDefaultTimestamp = 1663274901;
+
+PropertyConfigProto CreateSenderQualifiedIdPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertySenderQualifiedId)
+      .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+      .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+      .SetCardinality(CARDINALITY_OPTIONAL)
+      .Build();
+}
+
+PropertyConfigProto CreateReceiverQualifiedIdPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyReceiverQualifiedId)
+      .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+      .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+      .SetCardinality(CARDINALITY_OPTIONAL)
+      .Build();
+}
+
+PropertyConfigProto CreateGroupQualifiedIdPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyGroupQualifiedId)
+      .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+      .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/false)
+      .SetCardinality(CARDINALITY_OPTIONAL)
+      .Build();
+}
+
+SchemaTypeConfigProto CreateEmailTypeConfig() {
+  return SchemaTypeConfigBuilder()
+      .SetType(kTypeEmail)
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertySubject)
+                       .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                       .SetCardinality(CARDINALITY_OPTIONAL))
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertyText)
+                       .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+                       .SetCardinality(CARDINALITY_OPTIONAL))
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertyAttachment)
+                       .SetDataType(TYPE_BYTES)
+                       .SetCardinality(CARDINALITY_OPTIONAL))
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertyTimestamp)
+                       .SetDataType(TYPE_INT64)
+                       .SetCardinality(CARDINALITY_OPTIONAL))
+      .AddProperty(CreateSenderQualifiedIdPropertyConfig())
+      .AddProperty(CreateReceiverQualifiedIdPropertyConfig())
+      .Build();
+}
+
+SchemaTypeConfigProto CreateConversationTypeConfig() {
+  return SchemaTypeConfigBuilder()
+      .SetType(kTypeConversation)
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertyName)
+                       .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                       .SetCardinality(CARDINALITY_OPTIONAL))
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertyNumber)
+                       .SetDataType(TYPE_INT64)
+                       .SetCardinality(CARDINALITY_OPTIONAL))
+      .AddProperty(CreateGroupQualifiedIdPropertyConfig())
+      .AddProperty(
+          PropertyConfigBuilder()
+              .SetName(kPropertyEmails)
+              .SetDataTypeDocument(kTypeEmail, /*index_nested_properties=*/true)
+              .SetCardinality(CARDINALITY_OPTIONAL))
+      .Build();
+}
+
+class JoinablePropertyManagerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/icing";
+
+    type_config_map_.emplace(kTypeEmail, CreateEmailTypeConfig());
+    type_config_map_.emplace(kTypeConversation, CreateConversationTypeConfig());
+
+    email_document_ =
+        DocumentBuilder()
+            .SetKey("icing", "email/1")
+            .SetSchema(kTypeEmail)
+            .AddStringProperty(kPropertySubject, "the subject")
+            .AddStringProperty(kPropertyText, "the text")
+            .AddStringProperty(kPropertySenderQualifiedId, "pkg$db/ns#Person1")
+            .AddStringProperty(kPropertyReceiverQualifiedId,
+                               "pkg$db/ns#Person2")
+            .AddBytesProperty(kPropertyAttachment, "attachment")
+            .AddInt64Property(kPropertyTimestamp, kDefaultTimestamp)
+            .Build();
+
+    conversation_document_ =
+        DocumentBuilder()
+            .SetKey("icing", "conversation/1")
+            .SetSchema(kTypeConversation)
+            .AddStringProperty(kPropertyName, "the conversation")
+            .AddInt64Property(kPropertyNumber, 2)
+            .AddDocumentProperty(kPropertyEmails,
+                                 DocumentProto(email_document_))
+            .AddStringProperty(kPropertyGroupQualifiedId,
+                               "pkg$db/ns#GroupQualifiedId1")
+            .Build();
+
+    // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each
+    // one 128KiB so the total DynamicTrieKeyMapper should get 384KiB
+    int key_mapper_size = 3 * 128 * 1024;
+    ICING_ASSERT_OK_AND_ASSIGN(schema_type_mapper_,
+                               DynamicTrieKeyMapper<SchemaTypeId>::Create(
+                                   filesystem_, test_dir_, key_mapper_size));
+    ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeEmail, 0));
+    ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeConversation, 1));
+  }
+
+  void TearDown() override {
+    schema_type_mapper_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  std::string test_dir_;
+  SchemaUtil::TypeConfigMap type_config_map_;
+  std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
+
+  DocumentProto email_document_;
+  DocumentProto conversation_document_;
+};
+
+TEST_F(JoinablePropertyManagerTest, ExtractJoinableProperties) {
+  // Use SchemaTypeManager factory method to instantiate
+  // JoinablePropertyManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  // Extracts all joinable properties from 'Email' document
+  ICING_ASSERT_OK_AND_ASSIGN(JoinablePropertyGroup joinable_property_group,
+                             schema_type_manager->joinable_property_manager()
+                                 .ExtractJoinableProperties(email_document_));
+
+  // Qualified Id joinable properties
+  EXPECT_THAT(joinable_property_group.qualified_id_properties, SizeIs(2));
+
+  EXPECT_THAT(
+      joinable_property_group.qualified_id_properties[0].metadata,
+      EqualsJoinablePropertyMetadata(
+          /*expected_id=*/0, /*expected_property_path=*/"receiverQualifiedId",
+          CreateReceiverQualifiedIdPropertyConfig()));
+  EXPECT_THAT(joinable_property_group.qualified_id_properties[0].values,
+              ElementsAre("pkg$db/ns#Person2"));
+
+  EXPECT_THAT(
+      joinable_property_group.qualified_id_properties[1].metadata,
+      EqualsJoinablePropertyMetadata(
+          /*expected_id=*/1, /*expected_property_path=*/"senderQualifiedId",
+          CreateSenderQualifiedIdPropertyConfig()));
+  EXPECT_THAT(joinable_property_group.qualified_id_properties[1].values,
+              ElementsAre("pkg$db/ns#Person1"));
+}
+
+TEST_F(JoinablePropertyManagerTest, ExtractJoinablePropertiesNested) {
+  // Use SchemaTypeManager factory method to instantiate
+  // JoinablePropertyManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  // Extracts all joinable properties from 'Conversation' document
+  ICING_ASSERT_OK_AND_ASSIGN(
+      JoinablePropertyGroup joinable_property_group,
+      schema_type_manager->joinable_property_manager()
+          .ExtractJoinableProperties(conversation_document_));
+
+  // Qualified Id joinable properties
+  EXPECT_THAT(joinable_property_group.qualified_id_properties, SizeIs(3));
+
+  EXPECT_THAT(joinable_property_group.qualified_id_properties[0].metadata,
+              EqualsJoinablePropertyMetadata(
+                  /*expected_id=*/0,
+                  /*expected_property_path=*/"emails.receiverQualifiedId",
+                  CreateReceiverQualifiedIdPropertyConfig()));
+  EXPECT_THAT(joinable_property_group.qualified_id_properties[0].values,
+              ElementsAre("pkg$db/ns#Person2"));
+
+  EXPECT_THAT(joinable_property_group.qualified_id_properties[1].metadata,
+              EqualsJoinablePropertyMetadata(
+                  /*expected_id=*/1,
+                  /*expected_property_path=*/"emails.senderQualifiedId",
+                  CreateSenderQualifiedIdPropertyConfig()));
+  EXPECT_THAT(joinable_property_group.qualified_id_properties[1].values,
+              ElementsAre("pkg$db/ns#Person1"));
+
+  EXPECT_THAT(
+      joinable_property_group.qualified_id_properties[2].metadata,
+      EqualsJoinablePropertyMetadata(
+          /*expected_id=*/2, /*expected_property_path=*/"groupQualifiedId",
+          CreateGroupQualifiedIdPropertyConfig()));
+  EXPECT_THAT(joinable_property_group.qualified_id_properties[2].values,
+              ElementsAre("pkg$db/ns#GroupQualifiedId1"));
+}
+
+TEST_F(JoinablePropertyManagerTest,
+       ExtractJoinablePropertiesShouldIgnoreEmptyContents) {
+  // Use SchemaTypeManager factory method to instantiate
+  // JoinablePropertyManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  // Create an email document without receiverQualifiedId.
+  DocumentProto another_email_document =
+      DocumentBuilder()
+          .SetKey("icing", "email/2")
+          .SetSchema(kTypeEmail)
+          .AddStringProperty(kPropertySubject, "the subject")
+          .AddStringProperty(kPropertyText, "the text")
+          .AddBytesProperty(kPropertyAttachment, "attachment")
+          .AddStringProperty(kPropertySenderQualifiedId, "pkg$db/ns#Person1")
+          .AddInt64Property(kPropertyTimestamp, kDefaultTimestamp)
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      JoinablePropertyGroup joinable_property_group,
+      schema_type_manager->joinable_property_manager()
+          .ExtractJoinableProperties(another_email_document));
+
+  // ExtractJoinableProperties should ignore receiverQualifiedId and not append
+  // a JoinableProperty instance of it into the vector.
+  EXPECT_THAT(joinable_property_group.qualified_id_properties, SizeIs(1));
+  EXPECT_THAT(
+      joinable_property_group.qualified_id_properties[0].metadata,
+      EqualsJoinablePropertyMetadata(
+          /*expected_id=*/1, /*expected_property_path=*/"senderQualifiedId",
+          CreateSenderQualifiedIdPropertyConfig()));
+  EXPECT_THAT(joinable_property_group.qualified_id_properties[0].values,
+              ElementsAre("pkg$db/ns#Person1"));
+}
+
+TEST_F(JoinablePropertyManagerTest, GetJoinablePropertyMetadata) {
+  // Use SchemaTypeManager factory method to instantiate
+  // JoinablePropertyManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  // Email (joinable property id -> joinable property path):
+  //   0 -> receiverQualifiedId
+  //   1 -> senderQualifiedId
+  EXPECT_THAT(
+      schema_type_manager->joinable_property_manager()
+          .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+                                       /*joinable_property_id=*/0),
+      IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+          /*expected_id=*/0, /*expected_property_path=*/"receiverQualifiedId",
+          CreateReceiverQualifiedIdPropertyConfig()))));
+  EXPECT_THAT(
+      schema_type_manager->joinable_property_manager()
+          .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+                                       /*joinable_property_id=*/1),
+      IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+          /*expected_id=*/1, /*expected_property_path=*/"senderQualifiedId",
+          CreateSenderQualifiedIdPropertyConfig()))));
+
+  // Conversation (joinable property id -> joinable property path):
+  //   0 -> emails.receiverQualifiedId
+  //   1 -> emails.senderQualifiedId
+  //   2 -> groupQualifiedId
+  EXPECT_THAT(schema_type_manager->joinable_property_manager()
+                  .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+                                               /*joinable_property_id=*/0),
+              IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+                  /*expected_id=*/0,
+                  /*expected_property_path=*/"emails.receiverQualifiedId",
+                  CreateReceiverQualifiedIdPropertyConfig()))));
+  EXPECT_THAT(schema_type_manager->joinable_property_manager()
+                  .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+                                               /*joinable_property_id=*/1),
+              IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+                  /*expected_id=*/1,
+                  /*expected_property_path=*/"emails.senderQualifiedId",
+                  CreateSenderQualifiedIdPropertyConfig()))));
+  EXPECT_THAT(
+      schema_type_manager->joinable_property_manager()
+          .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+                                       /*joinable_property_id=*/2),
+      IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+          /*expected_id=*/2, /*expected_property_path=*/"groupQualifiedId",
+          CreateGroupQualifiedIdPropertyConfig()))));
+}
+
+TEST_F(JoinablePropertyManagerTest,
+       GetJoinablePropertyMetadataInvalidSchemaTypeId) {
+  // Use SchemaTypeManager factory method to instantiate
+  // JoinablePropertyManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+  ASSERT_THAT(type_config_map_, SizeIs(2));
+
+  EXPECT_THAT(schema_type_manager->joinable_property_manager()
+                  .GetJoinablePropertyMetadata(/*schema_type_id=*/-1,
+                                               /*joinable_property_id=*/0),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(schema_type_manager->joinable_property_manager()
+                  .GetJoinablePropertyMetadata(/*schema_type_id=*/2,
+                                               /*joinable_property_id=*/0),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(JoinablePropertyManagerTest,
+       GetJoinablePropertyMetadataInvalidJoinablePropertyId) {
+  // Use SchemaTypeManager factory method to instantiate
+  // JoinablePropertyManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  // Email (joinable property id -> joinable property path):
+  //   0 -> receiverQualifiedId
+  //   1 -> senderQualifiedId
+  EXPECT_THAT(schema_type_manager->joinable_property_manager()
+                  .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+                                               /*joinable_property_id=*/-1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(schema_type_manager->joinable_property_manager()
+                  .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+                                               /*joinable_property_id=*/2),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // Conversation (joinable property id -> joinable property path):
+  //   0 -> emails.receiverQualifiedId
+  //   1 -> emails.senderQualifiedId
+  //   2 -> groupQualifiedId
+  EXPECT_THAT(schema_type_manager->joinable_property_manager()
+                  .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+                                               /*joinable_property_id=*/-1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(schema_type_manager->joinable_property_manager()
+                  .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+                                               /*joinable_property_id=*/3),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(JoinablePropertyManagerTest, GetJoinablePropertyMetadataByPath) {
+  // Use SchemaTypeManager factory method to instantiate
+  // JoinablePropertyManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  // Email (joinable property id -> joinable property path):
+  //   0 -> receiverQualifiedId
+  //   1 -> senderQualifiedId
+  EXPECT_THAT(
+      schema_type_manager->joinable_property_manager()
+          .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+                                       "receiverQualifiedId"),
+      IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+          /*expected_id=*/0, /*expected_property_path=*/"receiverQualifiedId",
+          CreateReceiverQualifiedIdPropertyConfig()))));
+  EXPECT_THAT(
+      schema_type_manager->joinable_property_manager()
+          .GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+                                       "senderQualifiedId"),
+      IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+          /*expected_id=*/1, /*expected_property_path=*/"senderQualifiedId",
+          CreateSenderQualifiedIdPropertyConfig()))));
+
+  // Conversation (joinable property id -> joinable property path):
+  //   0 -> emails.receiverQualifiedId
+  //   1 -> emails.senderQualifiedId
+  //   2 -> groupQualifiedId
+  EXPECT_THAT(schema_type_manager->joinable_property_manager()
+                  .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+                                               "emails.receiverQualifiedId"),
+              IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+                  /*expected_id=*/0,
+                  /*expected_property_path=*/"emails.receiverQualifiedId",
+                  CreateReceiverQualifiedIdPropertyConfig()))));
+  EXPECT_THAT(schema_type_manager->joinable_property_manager()
+                  .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+                                               "emails.senderQualifiedId"),
+              IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+                  /*expected_id=*/1,
+                  /*expected_property_path=*/"emails.senderQualifiedId",
+                  CreateSenderQualifiedIdPropertyConfig()))));
+  EXPECT_THAT(
+      schema_type_manager->joinable_property_manager()
+          .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+                                       "groupQualifiedId"),
+      IsOkAndHolds(Pointee(EqualsJoinablePropertyMetadata(
+          /*expected_id=*/2, /*expected_property_path=*/"groupQualifiedId",
+          CreateGroupQualifiedIdPropertyConfig()))));
+}
+
+TEST_F(JoinablePropertyManagerTest,
+       GetJoinablePropertyMetadataByPathInvalidSchemaTypeId) {
+  // Use SchemaTypeManager factory method to instantiate
+  // JoinablePropertyManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+  ASSERT_THAT(type_config_map_, SizeIs(2));
+
+  EXPECT_THAT(schema_type_manager->joinable_property_manager()
+                  .GetJoinablePropertyMetadata(/*schema_type_id=*/-1,
+                                               "receiverQualifiedId"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(schema_type_manager->joinable_property_manager()
+                  .GetJoinablePropertyMetadata(/*schema_type_id=*/2,
+                                               "receiverQualifiedId"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(JoinablePropertyManagerTest, GetJoinablePropertyMetadataByPathNotExist) {
+  // Use SchemaTypeManager factory method to instantiate
+  // JoinablePropertyManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  EXPECT_THAT(
+      schema_type_manager->joinable_property_manager()
+          .GetJoinablePropertyMetadata(/*schema_type_id=*/0, "nonExistingPath"),
+      IsOkAndHolds(IsNull()));
+  EXPECT_THAT(schema_type_manager->joinable_property_manager()
+                  .GetJoinablePropertyMetadata(/*schema_type_id=*/1,
+                                               "emails.nonExistingPath"),
+              IsOkAndHolds(IsNull()));
+}
+
+// Note: valid GetMetadataList has been tested in
+// JoinablePropertyManagerBuildTest.
+TEST_F(JoinablePropertyManagerTest, GetMetadataListInvalidSchemaTypeName) {
+  // Use SchemaTypeManager factory method to instantiate
+  // JoinablePropertyManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  EXPECT_THAT(schema_type_manager->joinable_property_manager().GetMetadataList(
+                  "NonExistingSchemaTypeName"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/schema/joinable-property.h b/icing/schema/joinable-property.h
new file mode 100644
index 0000000..057bb74
--- /dev/null
+++ b/icing/schema/joinable-property.h
@@ -0,0 +1,132 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_JOINABLE_PROPERTY_H_
+#define ICING_SCHEMA_JOINABLE_PROPERTY_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/proto/schema.pb.h"
+
+namespace icing {
+namespace lib {
+
+using JoinablePropertyId = int8_t;
+
+// 6 bits for 64 values.
+inline constexpr int kJoinablePropertyIdBits = 6;
+inline constexpr JoinablePropertyId kTotalNumJoinableProperties =
+    (INT8_C(1) << kJoinablePropertyIdBits);
+inline constexpr JoinablePropertyId kInvalidJoinablePropertyId =
+    kTotalNumJoinableProperties;
+inline constexpr JoinablePropertyId kMaxJoinablePropertyId =
+    kTotalNumJoinableProperties - 1;
+inline constexpr JoinablePropertyId kMinJoinablePropertyId = 0;
+
+constexpr bool IsJoinablePropertyIdValid(
+    JoinablePropertyId joinable_property_id) {
+  return joinable_property_id >= kMinJoinablePropertyId &&
+         joinable_property_id <= kMaxJoinablePropertyId;
+}
+
+static_assert(
+    kJoinablePropertyIdBits < 8 * sizeof(JoinablePropertyId),
+    "Cannot exhaust all bits of JoinablePropertyId since it is a signed "
+    "integer and the most significant bit should be preserved.");
+
+struct JoinablePropertyMetadata {
+  // Dot-joined property names, representing the location of joinable property
+  // inside an document. E.g. "property1.property2".
+  std::string path;
+
+  // A unique id of joinable property.
+  JoinablePropertyId id;
+
+  // Data type of this joinable property values. Currently we only support
+  // STRING.
+  PropertyConfigProto::DataType::Code data_type;
+
+  // How values will be used as a joining matcher.
+  //
+  // JoinableConfig::ValueType::QUALIFIED_ID:
+  //   Value in this property is a joinable (string) qualified id. Qualified id
+  //   is composed of namespace and uri, and it will be used as the identifier
+  //   of the parent document. Note: it is invalid to use this value type with
+  //   non-string DataType.
+  JoinableConfig::ValueType::Code value_type;
+
+  explicit JoinablePropertyMetadata(
+      JoinablePropertyId id_in,
+      PropertyConfigProto::DataType::Code data_type_in,
+      JoinableConfig::ValueType::Code value_type_in, std::string&& path_in)
+      : path(std::move(path_in)),
+        id(id_in),
+        data_type(data_type_in),
+        value_type(value_type_in) {}
+
+  JoinablePropertyMetadata(const JoinablePropertyMetadata& other) = default;
+  JoinablePropertyMetadata& operator=(const JoinablePropertyMetadata& other) =
+      default;
+
+  JoinablePropertyMetadata(JoinablePropertyMetadata&& other) = default;
+  JoinablePropertyMetadata& operator=(JoinablePropertyMetadata&& other) =
+      default;
+
+  bool operator==(const JoinablePropertyMetadata& rhs) const {
+    return path == rhs.path && id == rhs.id && data_type == rhs.data_type &&
+           value_type == rhs.value_type;
+  }
+};
+
+// JoinableProperty is an icing internal concept similar to document property
+// values (contents), but with extra metadata. the data type of value is
+// specified by template.
+//
+// Current supported data types:
+// - std::string_view (PropertyConfigProto::DataType::STRING)
+template <typename T>
+struct JoinableProperty {
+  JoinablePropertyMetadata metadata;
+  std::vector<T> values;
+
+  explicit JoinableProperty(JoinablePropertyMetadata&& metadata_in,
+                            std::vector<T>&& values_in)
+      : metadata(std::move(metadata_in)), values(std::move(values_in)) {}
+
+  PropertyConfigProto::DataType::Code data_type() const {
+    return metadata.data_type;
+  }
+
+  JoinableConfig::ValueType::Code value_type() const {
+    return metadata.value_type;
+  }
+};
+
+// Groups of different type joinable properties. Callers can access joinable
+// properties with types they want and avoid going through non-desired ones.
+//
+// REQUIRES: lifecycle of the property must be longer than this object, since we
+//   use std::string_view for extracting its string_values.
+struct JoinablePropertyGroup {
+  std::vector<JoinableProperty<std::string_view>> qualified_id_properties;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCHEMA_JOINABLE_PROPERTY_H_
diff --git a/icing/schema/property-util.cc b/icing/schema/property-util.cc
new file mode 100644
index 0000000..67ff748
--- /dev/null
+++ b/icing/schema/property-util.cc
@@ -0,0 +1,137 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/property-util.h"
+
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/proto/document.pb.h"
+
+namespace icing {
+namespace lib {
+
+namespace property_util {
+
+std::string ConvertToPropertyExprIndexStr(int index) {
+  if (index == kWildcardPropertyIndex) {
+    return "";
+  }
+  return absl_ports::StrCat(kLBracket, std::to_string(index), kRBracket);
+}
+
+std::string ConcatenatePropertyPathExpr(std::string_view property_path_expr1,
+                                        std::string_view property_path_expr2) {
+  if (property_path_expr1.empty()) {
+    return std::string(property_path_expr2);
+  }
+  if (property_path_expr2.empty()) {
+    return std::string(property_path_expr1);
+  }
+  return absl_ports::StrCat(property_path_expr1, kPropertyPathSeparator,
+                            property_path_expr2);
+}
+
+std::vector<std::string_view> SplitPropertyPathExpr(
+    std::string_view property_path_expr) {
+  return absl_ports::StrSplit(property_path_expr, kPropertyPathSeparator);
+}
+
+PropertyInfo ParsePropertyNameExpr(std::string_view property_name_expr) {
+  size_t l_bracket = property_name_expr.find(kLBracket);
+  if (l_bracket == std::string_view::npos ||
+      l_bracket >= property_name_expr.length()) {
+    return PropertyInfo(std::string(property_name_expr),
+                        kWildcardPropertyIndex);
+  }
+  size_t r_bracket = property_name_expr.find(kRBracket, l_bracket);
+  if (r_bracket == std::string_view::npos || r_bracket - l_bracket < 2) {
+    return PropertyInfo(std::string(property_name_expr),
+                        kWildcardPropertyIndex);
+  }
+  std::string index_string = std::string(
+      property_name_expr.substr(l_bracket + 1, r_bracket - l_bracket - 1));
+  return PropertyInfo(std::string(property_name_expr.substr(0, l_bracket)),
+                      std::stoi(index_string));
+}
+
+std::vector<PropertyInfo> ParsePropertyPathExpr(
+    std::string_view property_path_expr) {
+  std::vector<std::string_view> property_name_exprs =
+      SplitPropertyPathExpr(property_path_expr);
+
+  std::vector<PropertyInfo> property_infos;
+  property_infos.reserve(property_name_exprs.size());
+  for (std::string_view property_name_expr : property_name_exprs) {
+    property_infos.push_back(ParsePropertyNameExpr(property_name_expr));
+  }
+  return property_infos;
+}
+
+bool IsParentPropertyPath(std::string_view property_path_expr1,
+                          std::string_view property_path_expr2) {
+  if (property_path_expr2.length() < property_path_expr1.length()) {
+    return false;
+  }
+  if (property_path_expr1 !=
+      property_path_expr2.substr(0, property_path_expr1.length())) {
+    return false;
+  }
+  if (property_path_expr2.length() > property_path_expr1.length() &&
+      property_path_expr2[property_path_expr1.length()] !=
+          kPropertyPathSeparator[0]) {
+    return false;
+  }
+  return true;
+}
+
+const PropertyProto* GetPropertyProto(const DocumentProto& document,
+                                      std::string_view property_name) {
+  for (const PropertyProto& property : document.properties()) {
+    if (property.name() == property_name) {
+      return &property;
+    }
+  }
+  return nullptr;
+}
+
+template <>
+libtextclassifier3::StatusOr<std::vector<std::string>>
+ExtractPropertyValues<std::string>(const PropertyProto& property) {
+  return std::vector<std::string>(property.string_values().begin(),
+                                  property.string_values().end());
+}
+
+template <>
+libtextclassifier3::StatusOr<std::vector<std::string_view>>
+ExtractPropertyValues<std::string_view>(const PropertyProto& property) {
+  return std::vector<std::string_view>(property.string_values().begin(),
+                                       property.string_values().end());
+}
+
+template <>
+libtextclassifier3::StatusOr<std::vector<int64_t>>
+ExtractPropertyValues<int64_t>(const PropertyProto& property) {
+  return std::vector<int64_t>(property.int64_values().begin(),
+                              property.int64_values().end());
+}
+
+}  // namespace property_util
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/schema/property-util.h b/icing/schema/property-util.h
new file mode 100644
index 0000000..7557879
--- /dev/null
+++ b/icing/schema/property-util.h
@@ -0,0 +1,212 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_PROPERTY_UTIL_H_
+#define ICING_SCHEMA_PROPERTY_UTIL_H_
+
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/proto/document.pb.h"
+
+namespace icing {
+namespace lib {
+
+namespace property_util {
+
+// Definition:
+// - Expr (short for expression): with or without index.
+// - property_name: one level of property name without index. E.g. "abc", "def".
+// - property_name_expr: one level of property name with or without index. E.g.
+//                       "abc", "abc[0]", "def[1]".
+// - property_path: multiple levels (including one) of property names without
+//                  indices. E.g. "abc", "abc.def".
+// - property_path_expr: multiple levels (including one) of property name
+//                       expressions. E.g. "abc", "abc[0]", "abc.def",
+//                       "abc[0].def", "abc[0].def[1]".
+//
+// Set relationship graph (A -> B: A is a subset of B):
+//
+// property_path -> property_path_expr
+//      ^                   ^
+//      |                   |
+// property_name -> property_name_expr
+inline constexpr std::string_view kPropertyPathSeparator = ".";
+inline constexpr std::string_view kLBracket = "[";
+inline constexpr std::string_view kRBracket = "]";
+
+inline constexpr int kWildcardPropertyIndex = -1;
+
+struct PropertyInfo {
+  std::string name;
+  int index;
+
+  explicit PropertyInfo(std::string name_in, int index_in)
+      : name(std::move(name_in)), index(index_in) {}
+};
+
+// Converts a property (value) index to string, wrapped by kLBracket and
+// kRBracket.
+//
+// REQUIRES: index should be valid or kWildcardPropertyIndex.
+//
+// Returns:
+//   - "" if index is kWildcardPropertyIndex.
+//   - kLBracket + std::to_string(index) + kRBracket for all non
+//     kWildcardPropertyIndex indices.
+std::string ConvertToPropertyExprIndexStr(int index);
+
+// Concatenates 2 property path expressions.
+//
+// Returns:
+//   - property_path_expr1 + "." + property_path_expr2 if both are not empty.
+//   - property_path_expr1 if property_path_expr2 is empty.
+//   - property_path_expr2 if property_path_expr1 is empty.
+//   - "" if both are empty.
+std::string ConcatenatePropertyPathExpr(std::string_view property_path_expr1,
+                                        std::string_view property_path_expr2);
+
+// Splits a property path expression into multiple property name expressions.
+//
+// Returns: a vector of property name expressions.
+std::vector<std::string_view> SplitPropertyPathExpr(
+    std::string_view property_path_expr);
+
+// Parses a property name expression into (property name, property index). If
+// the index expression is missing, then the returned property index will be
+// kWildcardPropertyIndex.
+//
+// Examples:
+//   - ParsePropertyNameExpr("foo") will return ("foo",
+//     kWildcardPropertyIndex).
+//   - ParsePropertyNameExpr("foo[5]") will return ("foo", 5).
+//
+// Returns: a PropertyInfo instance.
+PropertyInfo ParsePropertyNameExpr(std::string_view property_name_expr);
+
+// Parses a property path expression into multiple (property name, property
+// index). It is similar to ParsePropertyPathExpr, except property path
+// expression can contain multiple name expressions.
+//
+// Examples:
+//   - ParsePropertyPathExpr("foo") will return [("foo",
+//     kWildcardPropertyIndex)].
+//   - ParsePropertyPathExpr("foo[5]") will return [("foo", 5)].
+//   - ParsePropertyPathExpr("foo.bar[2]") will return [("foo",
+//     kWildcardPropertyIndex), ("bar", 2)]
+//
+// Returns: a vector of PropertyInfo instances.
+std::vector<PropertyInfo> ParsePropertyPathExpr(
+    std::string_view property_path_expr);
+
+// A property path property_path_expr1 is considered a parent of another
+// property path property_path_expr2 if:
+// 1. property_path_expr2 == property_path_expr1, OR
+// 2. property_path_expr2 consists of the entire path of property_path_expr1
+//    + "." + [some other property path].
+//
+// Note that this can only be used for property name strings that do not
+// contain the property index.
+//
+// Examples:
+//   - IsParentPropertyPath("foo", "foo") will return true.
+//   - IsParentPropertyPath("foo", "foo.bar") will return true.
+//   - IsParentPropertyPath("foo", "bar.foo") will return false.
+//   - IsParentPropertyPath("foo.bar", "foo.foo.bar") will return false.
+//
+// Returns: true if property_path_expr1 is a parent property path of
+// property_path_expr2.
+bool IsParentPropertyPath(std::string_view property_path_expr1,
+                          std::string_view property_path_expr2);
+
+// Gets the desired PropertyProto from the document by given property name.
+// Since the input parameter is property name, this function only deals with
+// the first level of properties in the document and cannot deal with nested
+// documents.
+//
+// Returns:
+//   - const PropertyInfo* if property name exists in the document.
+//   - nullptr if property name not found.
+const PropertyProto* GetPropertyProto(const DocumentProto& document,
+                                      std::string_view property_name);
+
+template <typename T>
+libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValues(
+    const PropertyProto& property) {
+  return absl_ports::UnimplementedError(
+      "Unimplemented template type for ExtractPropertyValues");
+}
+
+template <>
+libtextclassifier3::StatusOr<std::vector<std::string>>
+ExtractPropertyValues<std::string>(const PropertyProto& property);
+
+template <>
+libtextclassifier3::StatusOr<std::vector<std::string_view>>
+ExtractPropertyValues<std::string_view>(const PropertyProto& property);
+
+template <>
+libtextclassifier3::StatusOr<std::vector<int64_t>>
+ExtractPropertyValues<int64_t>(const PropertyProto& property);
+
+template <typename T>
+libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValuesFromDocument(
+    const DocumentProto& document, std::string_view property_path) {
+  // Finds the first property name in property_path
+  size_t separator_position = property_path.find(kPropertyPathSeparator);
+  std::string_view current_property_name =
+      (separator_position == std::string::npos)
+          ? property_path
+          : property_path.substr(0, separator_position);
+
+  const PropertyProto* property_proto =
+      GetPropertyProto(document, current_property_name);
+  if (property_proto == nullptr) {
+    // Property name not found, it could be one of the following 2 cases:
+    // 1. The property is optional and it's not in the document
+    // 2. The property name is invalid
+    return std::vector<T>();
+  }
+
+  if (separator_position == std::string::npos) {
+    // Current property name is the last one in property path.
+    return ExtractPropertyValues<T>(*property_proto);
+  }
+
+  // Extracts property values recursively
+  std::string_view sub_property_path =
+      property_path.substr(separator_position + 1);
+  std::vector<T> nested_document_content;
+  for (const DocumentProto& nested_document :
+       property_proto->document_values()) {
+    auto content_or = ExtractPropertyValuesFromDocument<T>(nested_document,
+                                                           sub_property_path);
+    if (content_or.ok()) {
+      std::vector<T> content = std::move(content_or).ValueOrDie();
+      std::move(content.begin(), content.end(),
+                std::back_inserter(nested_document_content));
+    }
+  }
+  return nested_document_content;
+}
+
+}  // namespace property_util
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCHEMA_PROPERTY_UTIL_H_
diff --git a/icing/schema/property-util_test.cc b/icing/schema/property-util_test.cc
new file mode 100644
index 0000000..eddcc84
--- /dev/null
+++ b/icing/schema/property-util_test.cc
@@ -0,0 +1,253 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/property-util.h"
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/proto/document.pb.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+
+static constexpr std::string_view kTypeTest = "Test";
+static constexpr std::string_view kPropertySingleString = "singleString";
+static constexpr std::string_view kPropertyRepeatedString = "repeatedString";
+static constexpr std::string_view kPropertySingleInteger = "singleInteger";
+static constexpr std::string_view kPropertyRepeatedInteger = "repeatedInteger";
+
+static constexpr std::string_view kTypeNestedTest = "NestedTest";
+static constexpr std::string_view kPropertyStr = "str";
+static constexpr std::string_view kPropertyNestedDocument = "nestedDocument";
+
+TEST(PropertyUtilTest, IsParentPropertyPath) {
+  EXPECT_TRUE(property_util::IsParentPropertyPath("foo", "foo"));
+  EXPECT_TRUE(property_util::IsParentPropertyPath("foo", "foo.bar"));
+  EXPECT_TRUE(property_util::IsParentPropertyPath("foo", "foo.bar.foo"));
+  EXPECT_TRUE(property_util::IsParentPropertyPath("foo", "foo.foo.bar"));
+  EXPECT_TRUE(property_util::IsParentPropertyPath("foo.bar", "foo.bar.foo"));
+
+  EXPECT_FALSE(property_util::IsParentPropertyPath("foo", "foofoo.bar"));
+  EXPECT_FALSE(property_util::IsParentPropertyPath("foo.bar", "foo.foo.bar"));
+  EXPECT_FALSE(property_util::IsParentPropertyPath("foo.bar", "foofoo.bar"));
+  EXPECT_FALSE(property_util::IsParentPropertyPath("foo.bar.foo", "foo"));
+  EXPECT_FALSE(property_util::IsParentPropertyPath("foo.bar.foo", "foo.bar"));
+  EXPECT_FALSE(
+      property_util::IsParentPropertyPath("foo.foo.bar", "foo.bar.foo"));
+  EXPECT_FALSE(property_util::IsParentPropertyPath("foo", "foo#bar.foo"));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesTypeString) {
+  PropertyProto property;
+  property.mutable_string_values()->Add("Hello, world");
+  property.mutable_string_values()->Add("Foo");
+  property.mutable_string_values()->Add("Bar");
+
+  EXPECT_THAT(property_util::ExtractPropertyValues<std::string>(property),
+              IsOkAndHolds(ElementsAre("Hello, world", "Foo", "Bar")));
+
+  EXPECT_THAT(property_util::ExtractPropertyValues<std::string_view>(property),
+              IsOkAndHolds(ElementsAre("Hello, world", "Foo", "Bar")));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesTypeInteger) {
+  PropertyProto property;
+  property.mutable_int64_values()->Add(123);
+  property.mutable_int64_values()->Add(-456);
+  property.mutable_int64_values()->Add(0);
+
+  EXPECT_THAT(property_util::ExtractPropertyValues<int64_t>(property),
+              IsOkAndHolds(ElementsAre(123, -456, 0)));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesMismatchedType) {
+  PropertyProto property;
+  property.mutable_int64_values()->Add(123);
+  property.mutable_int64_values()->Add(-456);
+  property.mutable_int64_values()->Add(0);
+
+  EXPECT_THAT(property_util::ExtractPropertyValues<std::string_view>(property),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesEmpty) {
+  PropertyProto property;
+  EXPECT_THAT(property_util::ExtractPropertyValues<std::string>(property),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(property_util::ExtractPropertyValues<std::string_view>(property),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(property_util::ExtractPropertyValues<int64_t>(property),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesTypeUnimplemented) {
+  PropertyProto property;
+  EXPECT_THAT(property_util::ExtractPropertyValues<int32_t>(property),
+              StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesFromDocument) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "test/1")
+          .SetSchema(std::string(kTypeTest))
+          .AddStringProperty(std::string(kPropertySingleString), "single")
+          .AddStringProperty(std::string(kPropertyRepeatedString), "repeated1",
+                             "repeated2", "repeated3")
+          .AddInt64Property(std::string(kPropertySingleInteger), 123)
+          .AddInt64Property(std::string(kPropertyRepeatedInteger), 1, 2, 3)
+          .Build();
+
+  // Single string
+  EXPECT_THAT(
+      property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+          document, /*property_path=*/kPropertySingleString),
+      IsOkAndHolds(ElementsAre("single")));
+  // Repeated string
+  EXPECT_THAT(
+      property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+          document, /*property_path=*/kPropertyRepeatedString),
+      IsOkAndHolds(ElementsAre("repeated1", "repeated2", "repeated3")));
+  // Single integer
+  EXPECT_THAT(property_util::ExtractPropertyValuesFromDocument<int64_t>(
+                  document, /*property_path=*/kPropertySingleInteger),
+              IsOkAndHolds(ElementsAre(123)));
+  // Repeated integer
+  EXPECT_THAT(property_util::ExtractPropertyValuesFromDocument<int64_t>(
+                  document, /*property_path=*/kPropertyRepeatedInteger),
+              IsOkAndHolds(ElementsAre(1, 2, 3)));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesFromDocumentNested) {
+  DocumentProto nested_document =
+      DocumentBuilder()
+          .SetKey("icing", "nested/1")
+          .SetSchema(std::string(kTypeNestedTest))
+          .AddStringProperty(std::string(kPropertyStr), "a", "b", "c")
+          .AddDocumentProperty(
+              std::string(kPropertyNestedDocument),
+              DocumentBuilder()
+                  .SetSchema(std::string(kTypeTest))
+                  .AddStringProperty(std::string(kPropertySingleString),
+                                     "single1")
+                  .AddStringProperty(std::string(kPropertyRepeatedString),
+                                     "repeated1", "repeated2", "repeated3")
+                  .AddInt64Property(std::string(kPropertySingleInteger), 123)
+                  .AddInt64Property(std::string(kPropertyRepeatedInteger), 1, 2,
+                                    3)
+                  .Build(),
+              DocumentBuilder()
+                  .SetSchema(std::string(kTypeTest))
+                  .AddStringProperty(std::string(kPropertySingleString),
+                                     "single2")
+                  .AddStringProperty(std::string(kPropertyRepeatedString),
+                                     "repeated4", "repeated5", "repeated6")
+                  .AddInt64Property(std::string(kPropertySingleInteger), 456)
+                  .AddInt64Property(std::string(kPropertyRepeatedInteger), 4, 5,
+                                    6)
+                  .Build())
+          .Build();
+
+  // Since there are 2 nested documents, all of values at leaf will be returned.
+  EXPECT_THAT(
+      property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+          nested_document, /*property_path=*/"nestedDocument.singleString"),
+      IsOkAndHolds(ElementsAre("single1", "single2")));
+  EXPECT_THAT(
+      property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+          nested_document, /*property_path=*/"nestedDocument.repeatedString"),
+      IsOkAndHolds(ElementsAre("repeated1", "repeated2", "repeated3",
+                               "repeated4", "repeated5", "repeated6")));
+  EXPECT_THAT(
+      property_util::ExtractPropertyValuesFromDocument<int64_t>(
+          nested_document, /*property_path=*/"nestedDocument.singleInteger"),
+      IsOkAndHolds(ElementsAre(123, 456)));
+  EXPECT_THAT(
+      property_util::ExtractPropertyValuesFromDocument<int64_t>(
+          nested_document, /*property_path=*/"nestedDocument.repeatedInteger"),
+      IsOkAndHolds(ElementsAre(1, 2, 3, 4, 5, 6)));
+
+  // Test the property at first level
+  EXPECT_THAT(
+      property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+          nested_document, kPropertyStr),
+      IsOkAndHolds(ElementsAre("a", "b", "c")));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesFromDocumentNonExistingPaths) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "test/1")
+          .SetSchema(std::string(kTypeTest))
+          .AddStringProperty(std::string(kPropertySingleString), "single")
+          .AddStringProperty(std::string(kPropertyRepeatedString), "repeated1",
+                             "repeated2", "repeated3")
+          .AddInt64Property(std::string(kPropertySingleInteger), 123)
+          .AddInt64Property(std::string(kPropertyRepeatedInteger), 1, 2, 3)
+          .Build();
+  EXPECT_THAT(
+      property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+          document, /*property_path=*/"invalid"),
+      IsOkAndHolds(IsEmpty()));
+
+  DocumentProto nested_document =
+      DocumentBuilder()
+          .SetKey("icing", "nested/1")
+          .SetSchema(std::string(kTypeNestedTest))
+          .AddStringProperty(std::string(kPropertyStr), "a", "b", "c")
+          .AddDocumentProperty(std::string(kPropertyNestedDocument),
+                               DocumentProto(document), DocumentProto(document))
+          .Build();
+  EXPECT_THAT(
+      property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+          nested_document, /*property_path=*/kPropertySingleString),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(
+      property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+          nested_document, /*property_path=*/"nestedDocument.invalid"),
+      IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PropertyUtilTest, ExtractPropertyValuesFromDocumentTypeUnimplemented) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "test/1")
+          .SetSchema(std::string(kTypeTest))
+          .AddStringProperty(std::string(kPropertySingleString), "single")
+          .AddStringProperty(std::string(kPropertyRepeatedString), "repeated1",
+                             "repeated2", "repeated3")
+          .AddInt64Property(std::string(kPropertySingleInteger), 123)
+          .AddInt64Property(std::string(kPropertyRepeatedInteger), 1, 2, 3)
+          .Build();
+  EXPECT_THAT(property_util::ExtractPropertyValuesFromDocument<int32_t>(
+                  document, /*property_path=*/kPropertySingleString),
+              StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/schema/schema-property-iterator.cc b/icing/schema/schema-property-iterator.cc
new file mode 100644
index 0000000..8fc245c
--- /dev/null
+++ b/icing/schema/schema-property-iterator.cc
@@ -0,0 +1,198 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-property-iterator.h"
+
+#include <algorithm>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/property-util.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::Status SchemaPropertyIterator::Advance() {
+  while (!levels_.empty()) {
+    if (!levels_.back().Advance()) {
+      // When finishing iterating all properties of the current level, pop it
+      // from the stack (levels_), return to the previous level and resume the
+      // iteration.
+      parent_type_config_names_.erase(
+          parent_type_config_names_.find(levels_.back().GetSchemaTypeName()));
+      levels_.pop_back();
+      continue;
+    }
+
+    const PropertyConfigProto& curr_property_config =
+        levels_.back().GetCurrentPropertyConfig();
+    std::string curr_property_path = levels_.back().GetCurrentPropertyPath();
+
+    // Iterate through the sorted_top_level_indexable_nested_properties_ in
+    // order until we find the first element that is >= curr_property_path.
+    while (current_top_level_indexable_nested_properties_idx_ <
+               sorted_top_level_indexable_nested_properties_.size() &&
+           sorted_top_level_indexable_nested_properties_.at(
+               current_top_level_indexable_nested_properties_idx_) <
+               curr_property_path) {
+      // If an element in sorted_top_level_indexable_nested_properties_ < the
+      // current property path, it means that we've already iterated past the
+      // possible position for it without seeing it.
+      // It's not a valid property path in our schema definition. Add it to
+      // unknown_indexable_nested_properties_ and advance
+      // current_top_level_indexable_nested_properties_idx_.
+      unknown_indexable_nested_property_paths_.push_back(
+          sorted_top_level_indexable_nested_properties_.at(
+              current_top_level_indexable_nested_properties_idx_));
+      ++current_top_level_indexable_nested_properties_idx_;
+    }
+
+    if (curr_property_config.data_type() !=
+        PropertyConfigProto::DataType::DOCUMENT) {
+      // We've advanced to a leaf property.
+      // Set whether this property is indexable according to its level's
+      // indexable config. If this property is declared in
+      // indexable_nested_properties_list of the top-level schema, it is also
+      // nested indexable.
+      std::string* current_indexable_nested_prop =
+          current_top_level_indexable_nested_properties_idx_ <
+                  sorted_top_level_indexable_nested_properties_.size()
+              ? &sorted_top_level_indexable_nested_properties_.at(
+                    current_top_level_indexable_nested_properties_idx_)
+              : nullptr;
+      if (current_indexable_nested_prop == nullptr ||
+          *current_indexable_nested_prop > curr_property_path) {
+        // Current property is not in the indexable list. Set it as indexable if
+        // its schema level is indexable AND it is an indexable property.
+        bool is_property_indexable =
+            levels_.back().GetLevelNestedIndexable() &&
+            SchemaUtil::IsIndexedProperty(curr_property_config);
+        levels_.back().SetCurrentPropertyIndexable(is_property_indexable);
+      } else if (*current_indexable_nested_prop == curr_property_path) {
+        // Current property is in the indexable list. Set its indexable config
+        // to true. This property will consume a sectionId regardless of whether
+        // or not it is actually indexable.
+        levels_.back().SetCurrentPropertyIndexable(true);
+        ++current_top_level_indexable_nested_properties_idx_;
+      }
+      return libtextclassifier3::Status::OK;
+    }
+
+    // - When advancing to a TYPE_DOCUMENT property, it means it is a nested
+    //   schema and we need to traverse the next level. Look up SchemaTypeConfig
+    //   (by the schema name) by type_config_map_, and push a new level into
+    //   levels_.
+    // - Each level has to record the index of property it is currently at, so
+    //   we can resume the iteration when returning back to it. Also other
+    //   essential info will be maintained in LevelInfo as well.
+    auto nested_type_config_iter =
+        type_config_map_.find(curr_property_config.schema_type());
+    if (nested_type_config_iter == type_config_map_.end()) {
+      // This should never happen because our schema should already be
+      // validated by this point.
+      return absl_ports::NotFoundError(absl_ports::StrCat(
+          "Type config not found: ", curr_property_config.schema_type()));
+    }
+    const SchemaTypeConfigProto& nested_type_config =
+        nested_type_config_iter->second;
+
+    if (levels_.back().GetLevelNestedIndexable()) {
+      // We should set sorted_top_level_indexable_nested_properties_ to the list
+      // defined by the current level.
+      // GetLevelNestedIndexable() is true either because:
+      // 1. We're looking at a document property of the top-level schema --
+      //    The first LevelInfo for the iterator is initialized with
+      //    all_nested_properties_indexable_ = true.
+      // 2. All previous levels set index_nested_properties = true:
+      //    This indicates that upper-level schema types want to follow nested
+      //    properties definition of its document subtypes. If this is the first
+      //    subtype level that defines a list, we should set it as
+      //    top_level_indexable_nested_properties_ for the current top-level
+      //    schema.
+      sorted_top_level_indexable_nested_properties_.clear();
+      sorted_top_level_indexable_nested_properties_.reserve(
+          curr_property_config.document_indexing_config()
+              .indexable_nested_properties_list()
+              .size());
+      for (const std::string& property :
+           curr_property_config.document_indexing_config()
+               .indexable_nested_properties_list()) {
+        // Concat the current property name to each property to get the full
+        // property path expression for each indexable nested property.
+        sorted_top_level_indexable_nested_properties_.push_back(
+            property_util::ConcatenatePropertyPathExpr(curr_property_path,
+                                                       property));
+      }
+      current_top_level_indexable_nested_properties_idx_ = 0;
+      // Sort elements and dedupe
+      std::sort(sorted_top_level_indexable_nested_properties_.begin(),
+                sorted_top_level_indexable_nested_properties_.end());
+      auto last =
+          std::unique(sorted_top_level_indexable_nested_properties_.begin(),
+                      sorted_top_level_indexable_nested_properties_.end());
+      sorted_top_level_indexable_nested_properties_.erase(
+          last, sorted_top_level_indexable_nested_properties_.end());
+    }
+
+    bool is_cycle =
+        parent_type_config_names_.find(nested_type_config.schema_type()) !=
+        parent_type_config_names_.end();
+    bool is_parent_property_path =
+        current_top_level_indexable_nested_properties_idx_ <
+            sorted_top_level_indexable_nested_properties_.size() &&
+        property_util::IsParentPropertyPath(
+            curr_property_path,
+            sorted_top_level_indexable_nested_properties_.at(
+                current_top_level_indexable_nested_properties_idx_));
+    if (is_cycle && !is_parent_property_path) {
+      // Cycle detected. The schema definition is guaranteed to be valid here
+      // since it must have already been validated during SchemaUtil::Validate,
+      // which would have rejected any schema with bad cycles.
+      //
+      // There are no properties in the indexable_nested_properties_list that
+      // are a part of this circular reference.
+      // We do not need to iterate this type further so we simply move on to
+      // other properties in the parent type.
+      continue;
+    }
+
+    bool all_nested_properties_indexable =
+        levels_.back().GetLevelNestedIndexable() &&
+        curr_property_config.document_indexing_config()
+            .index_nested_properties();
+    levels_.push_back(LevelInfo(nested_type_config,
+                                std::move(curr_property_path),
+                                all_nested_properties_indexable));
+    parent_type_config_names_.insert(nested_type_config.schema_type());
+  }
+
+  // Before returning, move all remaining uniterated properties from
+  // sorted_top_level_indexable_nested_properties_ into
+  // unknown_indexable_nested_properties_.
+  std::move(sorted_top_level_indexable_nested_properties_.begin() +
+                current_top_level_indexable_nested_properties_idx_,
+            sorted_top_level_indexable_nested_properties_.end(),
+            std::back_inserter(unknown_indexable_nested_property_paths_));
+
+  return absl_ports::OutOfRangeError("End of iterator");
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/schema/schema-property-iterator.h b/icing/schema/schema-property-iterator.h
new file mode 100644
index 0000000..66b8f32
--- /dev/null
+++ b/icing/schema/schema-property-iterator.h
@@ -0,0 +1,222 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_
+#define ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_
+
+#include <algorithm>
+#include <numeric>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/schema-util.h"
+
+namespace icing {
+namespace lib {
+
+// SchemaPropertyIterator: a class for iterating through all properties of a
+// given SchemaTypeConfigProto in lexicographical order. Only leaf
+// (non-document-type) properties will be returned, and for document type
+// properties, the iterator will traverse down to the next nested level of
+// schema.
+//
+// REQUIRED: The schema in which this SchemaTypeConfigProto is defined must have
+// already passed the validation step during SetSchema.
+class SchemaPropertyIterator {
+ public:
+  explicit SchemaPropertyIterator(
+      const SchemaTypeConfigProto& base_schema_type_config,
+      const SchemaUtil::TypeConfigMap& type_config_map)
+      : type_config_map_(type_config_map) {
+    levels_.push_back(LevelInfo(base_schema_type_config,
+                                /*base_property_path=*/"",
+                                /*all_nested_properties_indexable=*/true));
+    parent_type_config_names_.insert(base_schema_type_config.schema_type());
+  }
+
+  // Gets the current property config.
+  //
+  // REQUIRES: The preceding call for Advance() is OK.
+  const PropertyConfigProto& GetCurrentPropertyConfig() const {
+    return levels_.back().GetCurrentPropertyConfig();
+  }
+
+  // Gets the current property path.
+  //
+  // REQUIRES: The preceding call for Advance() is OK.
+  std::string GetCurrentPropertyPath() const {
+    return levels_.back().GetCurrentPropertyPath();
+  }
+
+  // Returns whether the current property is indexable. This would be true if
+  // either the current level is nested indexable, or if the current property is
+  // declared indexable in the indexable_nested_properties_list of the top-level
+  // schema type.
+  //
+  // REQUIRES: The preceding call for Advance() is OK.
+  bool GetCurrentPropertyIndexable() const {
+    return levels_.back().GetCurrentPropertyIndexable();
+  }
+
+  // Returns whether the current schema level is nested indexable. If this is
+  // true, all properties in the level are indexed.
+  //
+  // REQUIRES: The preceding call for Advance() is OK.
+  bool GetLevelNestedIndexable() const {
+    return levels_.back().GetLevelNestedIndexable();
+  }
+
+  // The set of indexable nested properties that are defined in the
+  // indexable_nested_properties_list but are not found in the schema
+  // definition. These properties still consume sectionIds, but will not be
+  // indexed.
+  const std::vector<std::string>& unknown_indexable_nested_property_paths()
+      const {
+    return unknown_indexable_nested_property_paths_;
+  }
+
+  // Advances to the next leaf property.
+  //
+  // Returns:
+  //   - OK on success
+  //   - OUT_OF_RANGE_ERROR if there is no more leaf property
+  //   - INVALID_ARGUMENT_ERROR if cycle dependency is detected in the nested
+  //     schema
+  //   - NOT_FOUND_ERROR if any nested schema name is not found in
+  //     type_config_map
+  libtextclassifier3::Status Advance();
+
+ private:
+  // An inner class for maintaining the iterating state of a (nested) level.
+  // Nested SchemaTypeConfig is a tree structure, so we have to traverse it
+  // recursively to all leaf properties.
+  class LevelInfo {
+   public:
+    explicit LevelInfo(const SchemaTypeConfigProto& schema_type_config,
+                       std::string base_property_path,
+                       bool all_nested_properties_indexable)
+        : schema_type_config_(schema_type_config),
+          base_property_path_(std::move(base_property_path)),
+          sorted_property_indices_(schema_type_config.properties_size()),
+          current_vec_idx_(-1),
+          sorted_property_indexable_(schema_type_config.properties_size()),
+          all_nested_properties_indexable_(all_nested_properties_indexable) {
+      // Index sort property by lexicographical order.
+      std::iota(sorted_property_indices_.begin(),
+                sorted_property_indices_.end(),
+                /*value=*/0);
+      std::sort(
+          sorted_property_indices_.begin(), sorted_property_indices_.end(),
+          [&schema_type_config](int lhs_idx, int rhs_idx) -> bool {
+            return schema_type_config.properties(lhs_idx).property_name() <
+                   schema_type_config.properties(rhs_idx).property_name();
+          });
+    }
+
+    bool Advance() {
+      return ++current_vec_idx_ < sorted_property_indices_.size();
+    }
+
+    const PropertyConfigProto& GetCurrentPropertyConfig() const {
+      return schema_type_config_.properties(
+          sorted_property_indices_[current_vec_idx_]);
+    }
+
+    std::string GetCurrentPropertyPath() const {
+      return property_util::ConcatenatePropertyPathExpr(
+          base_property_path_, GetCurrentPropertyConfig().property_name());
+    }
+
+    bool GetLevelNestedIndexable() const {
+      return all_nested_properties_indexable_;
+    }
+
+    bool GetCurrentPropertyIndexable() const {
+      return sorted_property_indexable_[current_vec_idx_];
+    }
+
+    void SetCurrentPropertyIndexable(bool indexable) {
+      sorted_property_indexable_[current_vec_idx_] = indexable;
+    }
+
+    std::string_view GetSchemaTypeName() const {
+      return schema_type_config_.schema_type();
+    }
+
+   private:
+    const SchemaTypeConfigProto& schema_type_config_;  // Does not own
+
+    // Concatenated property path of all parent levels.
+    std::string base_property_path_;
+
+    // We perform index sort (comparing property name) in order to iterate all
+    // leaf properties in lexicographical order. This vector is for storing
+    // these sorted indices.
+    std::vector<int> sorted_property_indices_;
+    int current_vec_idx_;
+
+    // Vector indicating whether each property in the current level is
+    // indexable. We can declare different indexable settings for properties in
+    // the same level using indexable_nested_properties_list.
+    //
+    // Element indices in this vector correspond to property indices in the
+    // sorted order.
+    std::vector<bool> sorted_property_indexable_;
+
+    // Indicates if all properties in the current level is nested indexable.
+    // This would be true for a level if the document declares
+    // index_nested_properties=true. If any of parent document type
+    // property sets its flag false, then this would be false for all its child
+    // properties.
+    bool all_nested_properties_indexable_;
+  };
+
+  const SchemaUtil::TypeConfigMap& type_config_map_;  // Does not own
+
+  // For maintaining the stack of recursive nested schema type traversal. We use
+  // std::vector instead of std::stack to avoid memory allocate and free too
+  // frequently.
+  std::vector<LevelInfo> levels_;
+
+  // Maintaining all traversed parent schema type config names of the current
+  // stack (levels_). It is used to detect nested schema cycle dependency.
+  std::unordered_multiset<std::string_view> parent_type_config_names_;
+
+  // Sorted list of indexable nested properties for the top-level schema.
+  std::vector<std::string> sorted_top_level_indexable_nested_properties_;
+
+  // Current iteration index in the sorted_top_level_indexable_nested_properties
+  // list.
+  int current_top_level_indexable_nested_properties_idx_ = 0;
+
+  // Vector of indexable nested properties defined in the
+  // indexable_nested_properties_list, but not found in the schema definition.
+  // These properties still consume sectionIds, but will not be indexed.
+  // Properties are inserted into this vector in sorted order.
+  //
+  // TODO(b/289152024): Implement support for indexing these properties if they
+  // are in the child types of polymorphic nested properties.
+  std::vector<std::string> unknown_indexable_nested_property_paths_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_
diff --git a/icing/schema/schema-property-iterator_test.cc b/icing/schema/schema-property-iterator_test.cc
new file mode 100644
index 0000000..2b0226d
--- /dev/null
+++ b/icing/schema/schema-property-iterator_test.cc
@@ -0,0 +1,3905 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-property-iterator.h"
+
+#include <initializer_list>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-util.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+
+TEST(SchemaPropertyIteratorTest,
+     SingleLevelSchemaTypeConfigShouldIterateInCorrectOrder) {
+  std::string schema_type_name = "Schema";
+
+  SchemaTypeConfigProto schema_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name)
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Google").SetDataTypeString(
+                  TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(PropertyConfigBuilder().SetName("Youtube").SetDataType(
+              TYPE_BYTES))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("Alphabet")
+                           .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN))
+          .Build();
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_type_name, schema_type_config}};
+
+  SchemaPropertyIterator iterator(schema_type_config, type_config_map);
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Alphabet"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config.properties(2)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Google"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config.properties(0)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Youtube"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config.properties(1)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(iterator.unknown_indexable_nested_property_paths(), IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+     NestedSchemaTypeConfigShouldIterateInCorrectOrder) {
+  std::string schema_type_name1 = "SchemaOne";
+  std::string schema_type_name2 = "SchemaTwo";
+  std::string schema_type_name3 = "SchemaThree";
+
+  SchemaTypeConfigProto schema_type_config1 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name1)
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Google").SetDataTypeString(
+                  TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(PropertyConfigBuilder().SetName("Youtube").SetDataType(
+              TYPE_BYTES))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("Alphabet")
+                           .SetDataTypeInt64(NUMERIC_MATCH_RANGE))
+          .Build();
+  SchemaTypeConfigProto schema_type_config2 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name2)
+          .AddProperty(PropertyConfigBuilder().SetName("Foo").SetDataTypeString(
+              TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Bar").SetDataTypeDocument(
+                  schema_type_name1, /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto schema_type_config3 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name3)
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Hello").SetDataTypeString(
+                  TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("World").SetDataTypeDocument(
+                  schema_type_name1, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Icing").SetDataTypeDocument(
+                  schema_type_name2, /*index_nested_properties=*/true))
+          .Build();
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_type_name1, schema_type_config1},
+      {schema_type_name2, schema_type_config2},
+      {schema_type_name3, schema_type_config3}};
+
+  // SchemaThree: {
+  //   "Hello": TYPE_STRING,
+  //   "World": TYPE_DOCUMENT SchemaOne {
+  //     "Google": TYPE_STRING,
+  //     "Youtube": TYPE_BYTES,
+  //     "Alphabet": TYPE_INT64,
+  //   },
+  //   "Icing": TYPE_DOCUMENT SchemaTwo {
+  //     "Foo": TYPE_STRING,
+  //     "Bar": TYPE_DOCUMENT SchemaOne {
+  //       "Google": TYPE_STRING,
+  //       "Youtube": TYPE_BYTES,
+  //       "Alphabet": TYPE_INT64,
+  //     },
+  //   },
+  // }
+  SchemaPropertyIterator iterator(schema_type_config3, type_config_map);
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Hello"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config3.properties(0)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Icing.Bar.Alphabet"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Icing.Bar.Google"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Icing.Bar.Youtube"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Icing.Foo"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(0)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("World.Alphabet"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("World.Google"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("World.Youtube"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(iterator.unknown_indexable_nested_property_paths(), IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+     NonExistingNestedSchemaTypeConfigShouldGetNotFoundError) {
+  std::string schema_type_name1 = "SchemaOne";
+  std::string schema_type_name2 = "SchemaTwo";
+
+  SchemaTypeConfigProto schema_type_config1 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name1)
+          .AddProperty(PropertyConfigBuilder().SetName("Google").SetDataType(
+              TYPE_STRING))
+          .AddProperty(PropertyConfigBuilder().SetName("Youtube").SetDataType(
+              TYPE_BYTES))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("Alphabet")
+                           .SetDataType(TYPE_INT64))
+          .Build();
+  SchemaTypeConfigProto schema_type_config2 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name2)
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Foo").SetDataTypeDocument(
+                  schema_type_name1, /*index_nested_properties=*/true))
+          .Build();
+  // Remove the second level (schema_type_config1) from type_config_map.
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_type_name2, schema_type_config2}};
+
+  SchemaPropertyIterator iterator(schema_type_config2, type_config_map);
+  // Since Foo is a document type property with schema type = "SchemaOne" and
+  // "SchemaOne" is not in type_config_map, Advance() should return NOT_FOUND
+  // error.
+  EXPECT_THAT(iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST(SchemaPropertyIteratorTest,
+     SchemaTypeConfigWithEmptyPropertyShouldGetOutOfRangeErrorAtFirstAdvance) {
+  std::string schema_type_name = "Schema";
+
+  SchemaTypeConfigProto schema_type_config =
+      SchemaTypeConfigBuilder().SetType(schema_type_name).Build();
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_type_name, schema_type_config}};
+
+  SchemaPropertyIterator iterator(schema_type_config, type_config_map);
+  EXPECT_THAT(iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(iterator.unknown_indexable_nested_property_paths(), IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, NestedIndexable) {
+  std::string schema_type_name1 = "SchemaOne";
+  std::string schema_type_name2 = "SchemaTwo";
+  std::string schema_type_name3 = "SchemaThree";
+  std::string schema_type_name4 = "SchemaFour";
+
+  SchemaTypeConfigProto schema_type_config1 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name1)
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Google").SetDataTypeString(
+                  TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config2 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name2)
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Bar").SetDataTypeDocument(
+                  schema_type_name1, /*index_nested_properties=*/true))
+          .AddProperty(PropertyConfigBuilder().SetName("Foo").SetDataTypeString(
+              TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config3 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name3)
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Bar").SetDataTypeDocument(
+                  schema_type_name1,
+                  /*index_nested_properties=*/false))
+          .AddProperty(PropertyConfigBuilder().SetName("Foo").SetDataTypeString(
+              TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config4 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name4)
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Baz1").SetDataTypeDocument(
+                  schema_type_name2, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Baz2").SetDataTypeDocument(
+                  schema_type_name2, /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Baz3").SetDataTypeDocument(
+                  schema_type_name3, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Baz4").SetDataTypeDocument(
+                  schema_type_name3, /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Hello1").SetDataTypeDocument(
+                  schema_type_name1, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("Hello2").SetDataTypeDocument(
+                  schema_type_name1, /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("World").SetDataTypeString(
+                  TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_type_name1, schema_type_config1},
+      {schema_type_name2, schema_type_config2},
+      {schema_type_name3, schema_type_config3},
+      {schema_type_name4, schema_type_config4}};
+
+  // SchemaFour: {
+  //   "Baz1": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=true SchemaTwo {
+  //     "Bar": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=true SchemaOne {
+  //       "Google": TYPE_STRING INDEXABLE,
+  //     },
+  //     "Foo": TYPE_STRING INDEXABLE,
+  //   },
+  //   "Baz2": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaTwo {
+  //     "Bar": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=true SchemaOne {
+  //       "Google": TYPE_STRING INDEXABLE,
+  //     },
+  //     "Foo": TYPE_STRING INDEXABLE,
+  //   },
+  //   "Baz3": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=true SchemaThree {
+  //     "Bar": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaOne {
+  //       "Google": TYPE_STRING INDEXABLE,
+  //     },
+  //     "Foo": TYPE_STRING INDEXABLE,
+  //   },
+  //   "Baz4": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaThree {
+  //     "Bar": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaOne {
+  //       "Google": TYPE_STRING INDEXABLE,
+  //     },
+  //     "Foo": TYPE_STRING INDEXABLE,
+  //   },
+  //   "Hello": TYPE_DOCUMENT INDEX_NESTED_PROPERTIES=false SchemaOne {
+  //     "Google": TYPE_STRING INDEXABLE,
+  //   },
+  //   "World": TYPE_STRING INDEXABLE,
+  // }
+  SchemaPropertyIterator iterator(schema_type_config4, type_config_map);
+
+  // Baz1 to Baz4: 2 levels of nested document type property.
+  // For Baz1, all levels set index_nested_properties = true, so all leaf
+  // properties should be nested indexable.
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz1.Bar.Google"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz1.Foo"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(1)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  // For Baz2, the parent level sets index_nested_properties = false, so all
+  // leaf properties in child levels should be nested unindexable even if
+  // they've set their index_nested_properties = true.
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz2.Bar.Google"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz2.Foo"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(1)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  // For Baz3, the parent level sets index_nested_properties = true, but the
+  // child level sets index_nested_properties = false.
+  // - Leaf properties in the parent level should be nested indexable.
+  // - Leaf properties in the child level should be nested unindexable.
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz3.Bar.Google"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz3.Foo"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(1)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  // For Baz4, all levels set index_nested_properties = false, so all leaf
+  // properties should be nested unindexable.
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz4.Bar.Google"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Baz4.Foo"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(1)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  // Verify 1 and 0 level of nested document type properties.
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Hello1.Google"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("Hello2.Google"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(iterator.Advance(), IsOk());
+  EXPECT_THAT(iterator.GetCurrentPropertyPath(), Eq("World"));
+  EXPECT_THAT(iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config4.properties(6)));
+  EXPECT_THAT(iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(iterator.unknown_indexable_nested_property_paths(), IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+     IndexableNestedPropertiesList_singleNestedLevel) {
+  std::string schema_type_name1 = "SchemaOne";
+  std::string schema_type_name2 = "SchemaTwo";
+
+  SchemaTypeConfigProto schema_type_config1 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name1)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop1")
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop2")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop3")
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schema1prop4")
+                           .SetDataTypeInt64(NUMERIC_MATCH_RANGE))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schema1prop5")
+                           .SetDataType(TYPE_BOOLEAN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config2 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name2)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema2prop1")
+                  .SetDataTypeDocument(
+                      schema_type_name1,
+                      /*indexable_nested_properties_list=*/{"schema1prop2",
+                                                            "schema1prop3",
+                                                            "schema1prop5"}))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema2prop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schema2prop3")
+                           .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN))
+          .Build();
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_type_name1, schema_type_config1},
+      {schema_type_name2, schema_type_config2}};
+
+  // Order of iteration for Schema2:
+  // {"schema2prop1.schema1prop1", "schema2prop1.schema1prop2",
+  // "schema2prop1.schema1prop3", "schema2prop1.schema1prop4",
+  // "schema2prop1.schema1prop5", "schema2prop2", "schema2prop3"}
+  //
+  // Indexable properties:
+  // {"schema2prop1.schema1prop2", "schema2prop1.schema1prop3",
+  // "schema2prop1.schema1prop5", "schema2prop2"}.
+  //
+  // "schema2prop1.schema1prop4" is indexable by its indexing-config, but is not
+  // considered indexable for Schema2 because Schema2 sets its
+  // index_nested_properties config to false, and "schema1prop4" is not
+  // in the indexable_nested_properties_list for schema2prop1.
+  //
+  // "schema2prop1.schema1prop1", "schema2prop1.schema1prop3" and
+  // "schema2prop1.schema1prop5" are non-indexable by its indexing-config.
+  // However "schema2prop1.schema1prop3" and "schema2prop1.schema1prop5" are
+  // indexed as it appears in the indexable_list.
+  SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop3"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop4"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(3)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop5"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(4)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("schema2prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("schema2prop3"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(2)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema2_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema2_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Iterate through schema1 properties. Schema1 only has non-document type leaf
+  // properties, so its properties will be assigned indexable or not according
+  // to their indexing configs.
+  SchemaPropertyIterator schema1_iterator(schema_type_config1, type_config_map);
+
+  EXPECT_THAT(schema1_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyPath(), Eq("schema1prop1"));
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema1_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyPath(), Eq("schema1prop2"));
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema1_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyPath(), Eq("schema1prop3"));
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema1_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyPath(), Eq("schema1prop4"));
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(3)));
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema1_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyPath(), Eq("schema1prop5"));
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(4)));
+  EXPECT_THAT(schema1_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema1_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema1_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+     IndexableNestedPropertiesList_indexBooleanTrueDoesNotAffectOtherLevels) {
+  std::string schema_type_name1 = "SchemaOne";
+  std::string schema_type_name2 = "SchemaTwo";
+  std::string schema_type_name3 = "SchemaThree";
+
+  SchemaTypeConfigProto schema_type_config1 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name1)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop1")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop2")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop3")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config2 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name2)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema2prop1")
+                  .SetDataTypeDocument(schema_type_name1,
+                                       /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema2prop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema2prop3")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config3 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name3)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema3prop3")
+                  .SetDataTypeDocument(
+                      schema_type_name1,
+                      /*indexable_nested_properties_list=*/{"schema1prop1",
+                                                            "schema1prop3"}))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schema3prop1")
+                           .SetDataTypeDocument(
+                               schema_type_name2,
+                               /*indexable_nested_properties_list=*/
+                               {"schema2prop2", "schema2prop1.schema1prop1",
+                                "schema2prop1.schema1prop3"}))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema3prop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_type_name1, schema_type_config1},
+      {schema_type_name2, schema_type_config2},
+      {schema_type_name3, schema_type_config3}};
+
+  // Order of iteration for Schema3:
+  // {"schema3prop1.schema2prop1.schema1prop1",
+  // "schema3prop1.schema2prop1.schema1prop2",
+  // "schema3prop1.schema2prop1.schema1prop3",
+  // "schema3prop1.schema2prop2", "schema3prop1.schema2prop3", "schema3prop2",
+  // "schema3prop3.schema1prop1", "schema3prop3.schema1prop2",
+  // "schema3prop3.schema1prop3"}.
+  //
+  // Indexable properties:
+  // {"schema3prop1.schema2prop1.schema1prop1",
+  // "schema3prop1.schema2prop1.schema1prop3",
+  // "schema3prop1.schema2prop2", "schema3prop2", "schema3prop3.schema1prop1",
+  // "schema3prop3.schema1prop3"}
+  //
+  // Schema2 setting index_nested_properties=true does not affect nested
+  // properties indexing for Schema3.
+  SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop3"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop3"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(2)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("schema3prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config3.properties(2)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop3.schema1prop1"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop3.schema1prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop3.schema1prop3"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for Schema2:
+  // {"schema2prop1.schema1prop1", "schema2prop1.schema1prop2",
+  // "schema2prop1.schema1prop3", "schema2prop2", "schema2prop3"}
+  //
+  // Indexable properties:
+  // {"schema2prop1.schema1prop1", "schema2prop1.schema1prop2",
+  // "schema2prop1.schema1prop3", "schema2prop2", "schema2prop3"}
+  //
+  // All properties are indexed because index_nested_properties=true for
+  // Schema2.schema2prop1. Schema3's indexable_nested_properties setting does
+  // not affect this.
+  SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop3"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("schema2prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("schema2prop3"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(2)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema2_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+     IndexableNestedPropertiesList_indexBooleanFalseDoesNotAffectOtherLevels) {
+  std::string schema_type_name1 = "SchemaOne";
+  std::string schema_type_name2 = "SchemaTwo";
+  std::string schema_type_name3 = "SchemaThree";
+
+  SchemaTypeConfigProto schema_type_config1 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name1)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop1")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop2")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config2 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name2)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema2prop1")
+                  .SetDataTypeDocument(schema_type_name1,
+                                       /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto schema_type_config3 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name3)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema3prop1")
+                  .SetDataTypeDocument(schema_type_name2,
+                                       /*indexable_nested_properties_list=*/
+                                       std::initializer_list<std::string>{
+                                           "schema2prop1.schema1prop2"}))
+          .Build();
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_type_name1, schema_type_config1},
+      {schema_type_name2, schema_type_config2},
+      {schema_type_name3, schema_type_config3}};
+
+  // Order of iteration for Schema3:
+  // {"schema3prop1.schema2prop1.schema1prop1",
+  // "schema3prop1.schema2prop1.schema1prop2"}.
+  //
+  // Indexable properties: {"schema3prop1.schema2prop1.schema1prop2"}
+  //
+  // Schema2 setting index_nested_properties=false, does not affect Schema3's
+  // indexable list.
+  SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for Schema2:
+  // {"schema2prop1.schema1prop1", "schema2prop1.schema1prop2"}
+  //
+  // Indexable properties: None
+  //
+  // The indexable list for Schema3 does not propagate to Schema2.
+  SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema2_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema2_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+     IndexableNestedPropertiesList_indexableSetDoesNotAffectOtherLevels) {
+  std::string schema_type_name1 = "SchemaOne";
+  std::string schema_type_name2 = "SchemaTwo";
+  std::string schema_type_name3 = "SchemaThree";
+
+  SchemaTypeConfigProto schema_type_config1 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name1)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop1")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop2")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop3")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config2 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name2)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema2prop1")
+                  .SetDataTypeDocument(
+                      schema_type_name1,
+                      /*indexable_nested_properties_list=*/
+                      std::initializer_list<std::string>{"schema1prop2"}))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema2prop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema2prop3")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config3 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name3)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema3prop3")
+                  .SetDataTypeDocument(
+                      schema_type_name1,
+                      /*indexable_nested_properties_list=*/{"schema1prop1",
+                                                            "schema1prop3"}))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schema3prop1")
+                           .SetDataTypeDocument(
+                               schema_type_name2,
+                               /*indexable_nested_properties_list=*/
+                               {"schema2prop2", "schema2prop1.schema1prop1",
+                                "schema2prop1.schema1prop3"}))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema3prop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_type_name1, schema_type_config1},
+      {schema_type_name2, schema_type_config2},
+      {schema_type_name3, schema_type_config3}};
+
+  // Order of iteration for Schema3:
+  // {"schema3prop1.schema2prop1.schema1prop1",
+  // "schema3prop1.schema2prop1.schema1prop2",
+  // "schema3prop1.schema2prop1.schema1prop3",
+  // "schema3prop1.schema2prop2", "schema3prop1.schema2prop3", "schema3prop2",
+  // "schema3prop3.schema1prop1", "schema3prop3.schema1prop2",
+  // "schema3prop3.schema1prop3"}.
+  //
+  // Indexable properties:
+  // {"schema3prop1.schema2prop1.schema1prop1",
+  // "schema3prop1.schema2prop1.schema1prop3",
+  // "schema3prop1.schema2prop2", "schema3prop2", "schema3prop3.schema1prop1",
+  // "schema3prop3.schema1prop3"}
+  //
+  // Schema2 setting indexable_nested_properties_list={schema1prop2} does not
+  // affect nested properties indexing for Schema3.
+  SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop3"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop3"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(2)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("schema3prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config3.properties(2)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop3.schema1prop1"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop3.schema1prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop3.schema1prop3"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for Schema2:
+  // {"schema2prop1.schema1prop1", "schema2prop1.schema1prop2",
+  // "schema2prop1.schema1prop3", "schema2prop2", "schema2prop3"}
+  //
+  // Indexable properties:
+  // {"schema2prop1.schema1prop2", "schema2prop2", "schema2prop3"}
+  //
+  // Indexable_nested_properties set for Schema3.schema3prop1 does not propagate
+  // to Schema2.
+  SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop3"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("schema2prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("schema2prop3"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(2)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema2_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+
+TEST(
+    SchemaPropertyIteratorTest,
+    IndexableNestedPropertiesList_upperLevelIndexTrueIndexesListOfNestedLevel) {
+  std::string schema_type_name1 = "SchemaOne";
+  std::string schema_type_name2 = "SchemaTwo";
+  std::string schema_type_name3 = "SchemaThree";
+  std::string schema_type_name4 = "SchemaFour";
+
+  SchemaTypeConfigProto schema_type_config1 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name1)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop1")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop2")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config2 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name2)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema2prop1")
+                  .SetDataTypeDocument(
+                      schema_type_name1,
+                      /*indexable_nested_properties_list=*/
+                      std::initializer_list<std::string>{"schema1prop2"}))
+          .Build();
+  SchemaTypeConfigProto schema_type_config3 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name3)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema3prop1")
+                  .SetDataTypeDocument(schema_type_name2,
+                                       /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto schema_type_config4 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name4)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema4prop1")
+                  .SetDataTypeDocument(schema_type_name3,
+                                       /*index_nested_properties=*/true))
+          .Build();
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_type_name1, schema_type_config1},
+      {schema_type_name2, schema_type_config2},
+      {schema_type_name3, schema_type_config3},
+      {schema_type_name4, schema_type_config4}};
+
+  // Order of iteration for Schema4:
+  // {"schema4prop1.schema3prop1.schema2prop1.schema1prop1",
+  // "schema4prop1.schema3prop1.schema2prop1.schema1prop2"}.
+  //
+  // Indexable properties: {schema4prop1.schema3prop1.schema2prop1.schema1prop2}
+  //
+  // Both Schema4 and Schema3 sets index_nested_properties=true, so they both
+  // want to follow the indexing behavior of its subtype.
+  // Schema2 is the first subtype to define an indexing config, so we index its
+  // list for both Schema3 and Schema4 even though it sets
+  // index_nested_properties=false.
+  SchemaPropertyIterator schema4_iterator(schema_type_config4, type_config_map);
+
+  EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+              Eq("schema4prop1.schema3prop1.schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+              Eq("schema4prop1.schema3prop1.schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema4_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema4_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for Schema3:
+  // {"schema3prop1.schema2prop1.schema1prop1",
+  // "schema3prop1.schema2prop1.schema1prop2"}.
+  //
+  // Indexable properties: {schema3prop1.schema2prop1.schema1prop2}
+  SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for Schema2:
+  // {"schema2prop1.schema1prop1", "schema2prop1.schema1prop2"}
+  //
+  // Indexable properties:
+  // {"schema2prop1.schema1prop2"}
+  //
+  // Schema3 setting index_nested_properties=true does not propagate to Schema2.
+  SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema2_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+     IndexableNestedPropertiesList_unknownPropPaths) {
+  std::string schema_type_name1 = "SchemaOne";
+  std::string schema_type_name2 = "SchemaTwo";
+  std::string schema_type_name3 = "SchemaThree";
+  std::string schema_type_name4 = "SchemaFour";
+
+  SchemaTypeConfigProto schema_type_config1 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name1)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop1")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop2")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config2 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name2)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema2prop1")
+                  .SetDataTypeDocument(schema_type_name1,
+                                       /*indexable_nested_properties_list=*/
+                                       {"schema1prop2", "schema1prop2.foo",
+                                        "foo.bar", "zzz", "aaa.zzz"}))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema2prop2")
+                  .SetDataTypeDocument(
+                      schema_type_name1,
+                      /*indexable_nested_properties_list=*/
+                      {"schema1prop1", "schema1prop2", "unknown.path"}))
+          .Build();
+  SchemaTypeConfigProto schema_type_config3 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name3)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema3prop1")
+                  .SetDataTypeDocument(
+                      schema_type_name2,
+                      /*indexable_nested_properties_list=*/
+                      {"schema3prop1", "schema2prop1", "schema1prop2",
+                       "schema2prop1.schema1prop2", "schema2prop1.zzz", "zzz"}))
+          .Build();
+  SchemaTypeConfigProto schema_type_config4 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name4)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema4prop1")
+                  .SetDataTypeDocument(schema_type_name3,
+                                       /*index_nested_properties=*/true))
+          .Build();
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_type_name1, schema_type_config1},
+      {schema_type_name2, schema_type_config2},
+      {schema_type_name3, schema_type_config3},
+      {schema_type_name4, schema_type_config4}};
+
+  // Order of iteration for Schema4:
+  // "schema4prop1.schema3prop1.schema2prop1.schema1prop1",
+  // "schema4prop1.schema3prop1.schema2prop1.schema1prop2" (indexable),
+  // "schema4prop1.schema3prop1.schema2prop2.schema1prop1",
+  // "schema4prop1.schema3prop1.schema2prop2.schema1prop2"
+  //
+  // Unknown property paths from schema3 will also be included for schema4,
+  // since schema4 sets index_nested_properties=true.
+  // This includes everything in schema3prop1's list except
+  // "schema2prop1.schema1prop2".
+  SchemaPropertyIterator schema4_iterator(schema_type_config4, type_config_map);
+
+  EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+              Eq("schema4prop1.schema3prop1.schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+              Eq("schema4prop1.schema3prop1.schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+              Eq("schema4prop1.schema3prop1.schema2prop2.schema1prop1"));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+              Eq("schema4prop1.schema3prop1.schema2prop2.schema1prop2"));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema4_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema4_iterator.unknown_indexable_nested_property_paths(),
+              testing::ElementsAre("schema4prop1.schema3prop1.schema1prop2",
+                                   "schema4prop1.schema3prop1.schema2prop1",
+                                   "schema4prop1.schema3prop1.schema2prop1.zzz",
+                                   "schema4prop1.schema3prop1.schema3prop1",
+                                   "schema4prop1.schema3prop1.zzz"));
+
+  // Order of iteration for Schema3:
+  // "schema3prop1.schema2prop1.schema1prop1",
+  // "schema3prop1.schema2prop1.schema1prop2" (indexable),
+  // "schema3prop1.schema2prop2.schema1prop1",
+  // "schema3prop1.schema2prop2.schema1prop2"
+  //
+  // Unknown properties (in order):
+  // "schema3prop1.schema1prop2", "schema3prop1.schema2prop1" (not a leaf prop),
+  // "schema3prop1.schema2prop1.zzz", "schema3prop1.schema3prop1",
+  // "schema3prop1.zzz"
+  SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop2.schema1prop1"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop2.schema1prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+              testing::ElementsAre(
+                  "schema3prop1.schema1prop2", "schema3prop1.schema2prop1",
+                  "schema3prop1.schema2prop1.zzz", "schema3prop1.schema3prop1",
+                  "schema3prop1.zzz"));
+
+  // Order of iteration for Schema2:
+  // "schema2prop1.schema1prop1",
+  // "schema2prop1.schema1prop2" (indexable),
+  // "schema2prop2.schema1prop1" (indexable),
+  // "schema2prop2.schema1prop2" (indexable)
+  //
+  // Unknown properties (in order):
+  // "schema2prop1.aaa.zzz", "schema2prop1.foo.bar",
+  // "schema2prop1.schema1prop2.foo", "schema2prop1.zzz",
+  // "schema2prop2.unknown.path"
+  SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop2.schema1prop1"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop2.schema1prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(
+      schema2_iterator.unknown_indexable_nested_property_paths(),
+      testing::ElementsAre("schema2prop1.aaa.zzz", "schema2prop1.foo.bar",
+                           "schema2prop1.schema1prop2.foo", "schema2prop1.zzz",
+                           "schema2prop2.unknown.path"));
+}
+
+TEST(SchemaPropertyIteratorTest,
+     IndexableNestedPropertiesListDuplicateElements) {
+  std::string schema_type_name1 = "SchemaOne";
+  std::string schema_type_name2 = "SchemaTwo";
+  std::string schema_type_name3 = "SchemaThree";
+  std::string schema_type_name4 = "SchemaFour";
+
+  SchemaTypeConfigProto schema_type_config1 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name1)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop1")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema1prop2")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config2 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name2)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema2prop1")
+                  .SetDataTypeDocument(
+                      schema_type_name1,
+                      /*indexable_nested_properties_list=*/
+                      {"schema1prop2", "schema1prop2", "schema1prop2.foo",
+                       "schema1prop2.foo", "foo.bar", "foo.bar", "foo.bar",
+                       "zzz", "zzz", "aaa.zzz", "schema1prop2"}))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schema2prop2")
+                           .SetDataTypeDocument(
+                               schema_type_name1,
+                               /*indexable_nested_properties_list=*/
+                               {"schema1prop1", "schema1prop2", "unknown.path",
+                                "unknown.path", "unknown.path", "unknown.path",
+                                "schema1prop1"}))
+          .Build();
+  SchemaTypeConfigProto schema_type_config3 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name3)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema3prop1")
+                  .SetDataTypeDocument(
+                      schema_type_name2,
+                      /*indexable_nested_properties_list=*/
+                      {"schema3prop1", "schema3prop1", "schema2prop1",
+                       "schema2prop1", "schema1prop2", "schema1prop2",
+                       "schema2prop1.schema1prop2", "schema2prop1.schema1prop2",
+                       "schema2prop1.zzz", "zzz", "zzz"}))
+          .Build();
+  SchemaTypeConfigProto schema_type_config4 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name4)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schema4prop1")
+                  .SetDataTypeDocument(schema_type_name3,
+                                       /*index_nested_properties=*/true))
+          .Build();
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_type_name1, schema_type_config1},
+      {schema_type_name2, schema_type_config2},
+      {schema_type_name3, schema_type_config3},
+      {schema_type_name4, schema_type_config4}};
+
+  // The results of this test case is the same as the previous test case. This
+  // is to test that the indexable-list is deduped correctly.
+
+  // Order of iteration for Schema4:
+  // "schema4prop1.schema3prop1.schema2prop1.schema1prop1",
+  // "schema4prop1.schema3prop1.schema2prop1.schema1prop2" (indexable),
+  // "schema4prop1.schema3prop1.schema2prop2.schema1prop1",
+  // "schema4prop1.schema3prop1.schema2prop2.schema1prop2"
+  //
+  // Unknown property paths from schema3 will also be included for schema4,
+  // since schema4 sets index_nested_properties=true.
+  // This includes everything in schema3prop1's list except
+  // "schema2prop1.schema1prop2".
+  SchemaPropertyIterator schema4_iterator(schema_type_config4, type_config_map);
+
+  EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+              Eq("schema4prop1.schema3prop1.schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+              Eq("schema4prop1.schema3prop1.schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+              Eq("schema4prop1.schema3prop1.schema2prop2.schema1prop1"));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema4_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyPath(),
+              Eq("schema4prop1.schema3prop1.schema2prop2.schema1prop2"));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema4_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema4_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema4_iterator.unknown_indexable_nested_property_paths(),
+              testing::ElementsAre("schema4prop1.schema3prop1.schema1prop2",
+                                   "schema4prop1.schema3prop1.schema2prop1",
+                                   "schema4prop1.schema3prop1.schema2prop1.zzz",
+                                   "schema4prop1.schema3prop1.schema3prop1",
+                                   "schema4prop1.schema3prop1.zzz"));
+
+  // Order of iteration for Schema3:
+  // "schema3prop1.schema2prop1.schema1prop1",
+  // "schema3prop1.schema2prop1.schema1prop2" (indexable),
+  // "schema3prop1.schema2prop2.schema1prop1",
+  // "schema3prop1.schema2prop2.schema1prop2"
+  //
+  // Unknown properties (in order):
+  // "schema2prop1.aaa.zzz", "schema2prop1.foo.bar",
+  // "schema2prop1.schema1prop2.foo", "schema2prop1.zzz",
+  // "schema2prop2.unknown.path"
+  SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop2.schema1prop1"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("schema3prop1.schema2prop2.schema1prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+              testing::ElementsAre(
+                  "schema3prop1.schema1prop2", "schema3prop1.schema2prop1",
+                  "schema3prop1.schema2prop1.zzz", "schema3prop1.schema3prop1",
+                  "schema3prop1.zzz"));
+
+  // Order of iteration for Schema2:
+  // "schema2prop1.schema1prop1",
+  // "schema2prop1.schema1prop2" (indexable),
+  // "schema2prop2.schema1prop1" (indexable),
+  // "schema2prop2.schema1prop2" (indexable)
+  //
+  // Unknown properties (in order):
+  // "schema2prop1.aaa.zzz", "schema2prop1.foo.bar",
+  // "schema2prop1.schema1prop2.foo", "schema2prop1.zzz",
+  // "schema2prop2.unknown.path"
+  SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop1"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop1.schema1prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop2.schema1prop1"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(),
+              Eq("schema2prop2.schema1prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(
+      schema2_iterator.unknown_indexable_nested_property_paths(),
+      testing::ElementsAre("schema2prop1.aaa.zzz", "schema2prop1.foo.bar",
+                           "schema2prop1.schema1prop2.foo", "schema2prop1.zzz",
+                           "schema2prop2.unknown.path"));
+}
+
+TEST(SchemaPropertyIteratorTest,
+     IndexableNestedProperties_duplicatePropertyNamesInDifferentProperties) {
+  std::string schema_type_name1 = "SchemaOne";
+  std::string schema_type_name2 = "SchemaTwo";
+  std::string schema_type_name3 = "SchemaThree";
+
+  SchemaTypeConfigProto schema_type_config1 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name1)
+          .AddProperty(
+              PropertyConfigBuilder().SetName("prop1").SetDataTypeString(
+                  TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("prop2").SetDataTypeString(
+                  TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("prop3").SetDataTypeString(
+                  TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config2 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name2)
+          .AddProperty(
+              PropertyConfigBuilder().SetName("prop1").SetDataTypeDocument(
+                  schema_type_name1,
+                  /*indexable_nested_properties_list=*/
+                  std::initializer_list<std::string>{"prop2"}))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("prop2").SetDataTypeString(
+                  TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("prop3").SetDataTypeString(
+                  TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config3 =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_type_name3)
+          .AddProperty(
+              PropertyConfigBuilder().SetName("prop3").SetDataTypeDocument(
+                  schema_type_name1,
+                  /*indexable_nested_properties_list=*/
+                  {"prop1", "prop3"}))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("prop1").SetDataTypeDocument(
+                  schema_type_name2,
+                  /*indexable_nested_properties_list=*/
+                  {"prop2", "prop1.prop1", "prop1.prop3"}))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("prop2").SetDataTypeString(
+                  TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder().SetName("prop4").SetDataTypeDocument(
+                  schema_type_name1,
+                  /*indexable_nested_properties_list=*/
+                  {"prop2", "prop3"}))
+          .Build();
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_type_name1, schema_type_config1},
+      {schema_type_name2, schema_type_config2},
+      {schema_type_name3, schema_type_config3}};
+
+  // Order of iteration for Schema3:
+  // {"prop1.prop1.prop1", "prop1.prop1.prop2", "prop1.prop1.prop3",
+  // "prop1.prop2", "prop1.prop3", "prop2",
+  // "prop3.prop1", "prop3.prop2", "prop3.prop3",
+  // "prop4.prop1", "prop4.prop2", "prop4.prop3"}.
+  //
+  // Indexable properties:
+  // {"prop1.prop1.prop1", "prop1.prop1.prop3", "prop1.prop2", "prop2",
+  // "prop3.prop1", "prop3.prop3", "prop4.prop2", "prop4.prop3"}
+  //
+  // Properties do not affect other properties with the same name from different
+  // properties.
+  SchemaPropertyIterator schema3_iterator(schema_type_config3, type_config_map);
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("prop1.prop1.prop1"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("prop1.prop1.prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(),
+              Eq("prop1.prop1.prop3"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop1.prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop1.prop3"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(2)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config3.properties(2)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop3.prop1"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop3.prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop3.prop3"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop4.prop1"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop4.prop2"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyPath(), Eq("prop4.prop3"));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(schema3_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema3_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema3_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for Schema2:
+  // {"prop1.prop1", "prop1.prop2",
+  // "prop1.prop3", "prop2", "prop3"}
+  //
+  // Indexable properties:
+  // {"prop1.prop2", "prop1.prop3", "prop2", "prop3"}
+  //
+  // Indexable_nested_properties set for Schema3.prop1 does not propagate
+  // to Schema2.
+  SchemaPropertyIterator schema2_iterator(schema_type_config2, type_config_map);
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("prop1.prop1"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(0)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("prop1.prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("prop1.prop3"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config1.properties(2)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("prop2"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(1)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyPath(), Eq("prop3"));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config2.properties(2)));
+  EXPECT_THAT(schema2_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema2_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema2_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+TEST(SchemaPropertyIteratorTest, SingleLevelCycle) {
+  std::string schema_a = "A";
+  std::string schema_b = "B";
+
+  // Create schema with A -> B -> B -> B...
+  SchemaTypeConfigProto schema_type_config_a =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_a)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaAprop1")
+                           .SetDataTypeDocument(
+                               schema_b, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_b =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_b)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaBprop1")
+                           .SetDataTypeDocument(
+                               schema_b, /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaBprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_a, schema_type_config_a}, {schema_b, schema_type_config_b}};
+
+  // Order of iteration for schema A:
+  // {"schemaAprop1.schemaBprop2", "schemaAprop2"}, both indexable
+  SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for schema B:
+  // {"schemaBprop2"}, indexable.
+  SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, MultipleLevelCycle) {
+  std::string schema_a = "A";
+  std::string schema_b = "B";
+  std::string schema_c = "C";
+
+  // Create schema with A -> B -> C -> A -> B -> C...
+  SchemaTypeConfigProto schema_type_config_a =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_a)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaAprop1")
+                           .SetDataTypeDocument(
+                               schema_b, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_b =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_b)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaBprop1")
+                           .SetDataTypeDocument(
+                               schema_c, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaBprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_c =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_c)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaCprop1")
+                           .SetDataTypeDocument(
+                               schema_a, /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaCprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_a, schema_type_config_a},
+      {schema_b, schema_type_config_b},
+      {schema_c, schema_type_config_c}};
+
+  // Order of iteration for schema A:
+  // {"schemaAprop1.schemaBprop1.schemaCprop2", "schemaAprop1.schemaBprop2",
+  // "schemaAprop2"}, all indexable
+  SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for schema B:
+  // {"schemaBprop1.schemaCprop1.schemaAprop2", "schemaBprop1.schemaCprop2",
+  // "schemaBprop2"}
+  //
+  // Indexable properties: {"schemaBprop1.schemaCprop2", "schemaBprop2"}
+  SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for schema C:
+  // {"schemaCprop1.schemaAprop1.schemaBprop2", "schemaCprop1.schemaAprop2",
+  // "schemaCprop2"}
+  //
+  // Indexable properties: {"schemaCprop2"}
+  SchemaPropertyIterator schema_c_iterator(schema_type_config_c,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_c_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, SingleLevelCycleWithIndexableList) {
+  std::string schema_a = "A";
+  std::string schema_b = "B";
+
+  // Create schema with A -> B -> B -> B...
+  SchemaTypeConfigProto schema_type_config_a =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_a)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaAprop1")
+                           .SetDataTypeDocument(
+                               schema_b, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_b =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_b)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaBprop1")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaBprop2")
+                           .SetDataTypeDocument(
+                               schema_b, /*indexable_nested_properties_list=*/
+                               {"schemaBprop1", "schemaBprop2.schemaBprop1",
+                                "schemaBprop2.schemaBprop3",
+                                "schemaBprop2.schemaBprop2.schemaBprop3"}))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaBprop3")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_a, schema_type_config_a}, {schema_b, schema_type_config_b}};
+
+  // Order of iteration and whether each property is indexable for schema A:
+  // {"schemaAprop1.schemaBprop1" (true),
+  // "schemaAprop1.schemaBprop2.schemaBprop1" (true),
+  // "schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop1" (true),
+  // "schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop1" (false),
+  // "schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop3" (true),
+  // "schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop3" (true),
+  // "schemaAprop1.schemaBprop2.schemaBprop3" (false),
+  // "schemaAprop1.schemaBprop3" (true),
+  // "schemaAprop2" (true)}
+  SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop1"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(0)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop2.schemaBprop1"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(0)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop1"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(0)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_a_iterator.GetCurrentPropertyPath(),
+      Eq("schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop1"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(0)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_a_iterator.GetCurrentPropertyPath(),
+      Eq("schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop3"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(2)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop2.schemaBprop2.schemaBprop3"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(2)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop2.schemaBprop3"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(2)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop3"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(2)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for schema B:
+  // {"schemaBprop1" (true),
+  // "schemaBprop2.schemaBprop1" (true),
+  // "schemaBprop2.schemaBprop2.schemaBprop1" (true),
+  // "schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop1" (false),
+  // "schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop3" (true),
+  // "schemaBprop2.schemaBprop2.schemaBprop3" (true),
+  // "schemaBprop2.schemaBprop3" (false),
+  // "schemaBprop3" (true)}
+  SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop1"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(0)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop2.schemaBprop1"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(0)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop2.schemaBprop2.schemaBprop1"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(0)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop1"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(0)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop2.schemaBprop2.schemaBprop2.schemaBprop3"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(2)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop2.schemaBprop2.schemaBprop3"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(2)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop2.schemaBprop3"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(2)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop3"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(2)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, MultipleCycles) {
+  std::string schema_a = "A";
+  std::string schema_b = "B";
+  std::string schema_c = "C";
+  std::string schema_d = "D";
+
+  // Create the following schema:
+  // D <--> A <--- C
+  //         \    ^
+  //          v  /
+  //           B
+  // Schema type A has two cycles: A-B-C-A and A-D-A
+  SchemaTypeConfigProto schema_type_config_a =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_a)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaAprop1")
+                           .SetDataTypeDocument(
+                               schema_b, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaAprop3")
+                           .SetDataTypeDocument(
+                               schema_d, /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_b =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_b)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaBprop1")
+                           .SetDataTypeDocument(
+                               schema_c, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaBprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_c =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_c)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaCprop1")
+                           .SetDataTypeDocument(
+                               schema_a, /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaCprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_d =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_d)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaDprop1")
+                           .SetDataTypeDocument(
+                               schema_a, /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaDprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_a, schema_type_config_a},
+      {schema_b, schema_type_config_b},
+      {schema_c, schema_type_config_c},
+      {schema_d, schema_type_config_d}};
+
+  // Order of iteration for schema A:
+  // {"schemaAprop1.schemaBprop1.schemaCprop2", "schemaAprop1.schemaBprop2",
+  // "schemaAprop2", "schemaAprop3.schemaDprop2"}, all indexable
+  SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for schema B:
+  // {"schemaBprop1.schemaCprop1.schemaAprop2",
+  // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2",
+  // "schemaBprop1.schemaCprop2", "schemaBprop2"}
+  //
+  // Indexable properties: {"schemaBprop1.schemaCprop2", "schemaBprop2"}
+  SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for schema C:
+  // {"schemaCprop1.schemaAprop1.schemaBprop2", "schemaCprop1.schemaAprop2",
+  // "schemaCprop1.schemaAprop3.schemaDprop2", "schemaCprop2"}
+  //
+  // Indexable properties: {"schemaCprop2"}
+  SchemaPropertyIterator schema_c_iterator(schema_type_config_c,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_c_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for schema D:
+  // {"schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2",
+  // "schemaDprop1.schemaAprop1.schemaBprop2", "schemaDprop1.schemaAprop2",
+  // "schemaDprop2"}
+  //
+  // Indexable properties: {"schemaDprop2"}
+  SchemaPropertyIterator schema_d_iterator(schema_type_config_d,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(), Eq("schemaDprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_d_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, MultipleCyclesWithIndexableList) {
+  std::string schema_a = "A";
+  std::string schema_b = "B";
+  std::string schema_c = "C";
+  std::string schema_d = "D";
+
+  // Create the following schema:
+  // D <--> A <--- C
+  //         \    ^
+  //          v  /
+  //           B
+  // Schema type A has two cycles: A-B-C-A and A-D-A
+  SchemaTypeConfigProto schema_type_config_a =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_a)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop1")
+                  .SetDataTypeDocument(
+                      schema_b, /*indexable_nested_properties_list=*/
+                      {"schemaBprop2", "schemaBprop1.schemaCprop1.schemaAprop2",
+                       "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2",
+                       "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2",
+                       "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1."
+                       "schemaAprop2"}))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop3")
+                  .SetDataTypeDocument(
+                      schema_d, /*indexable_nested_properties_list=*/
+                      {"schemaDprop2", "schemaDprop1.schemaAprop2",
+                       "schemaDprop1.schemaAprop1.schemaBprop2",
+                       "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2",
+                       "schemaDprop1.schemaAprop3.schemaDprop2"}))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_b =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_b)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaBprop1")
+                           .SetDataTypeDocument(
+                               schema_c, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaBprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_c =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_c)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaCprop1")
+                           .SetDataTypeDocument(
+                               schema_a, /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaCprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_d =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_d)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaDprop1")
+                           .SetDataTypeDocument(
+                               schema_a, /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaDprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_a, schema_type_config_a},
+      {schema_b, schema_type_config_b},
+      {schema_c, schema_type_config_c},
+      {schema_d, schema_type_config_d}};
+
+  // Order of iteration and whether each property is indexable for schema A:
+  // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2" (true),
+  // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2" (true),
+  // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+  // (true), "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"
+  // (true), "schemaAprop1.schemaBprop1.schemaCprop2" (false),
+  // "schemaAprop1.schemaBprop2" (true),
+  // "schemaAprop2" (true),
+  // "schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2" (true),
+  // "schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2" (true),
+  // "schemaAprop3.schemaDprop1.schemaAprop2" (true),
+  // "schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2" (true),
+  // "schemaAprop3.schemaDprop2" (true)
+  SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_a_iterator.GetCurrentPropertyPath(),
+      Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3."
+                 "schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_a_iterator.GetCurrentPropertyPath(),
+      Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_a_iterator.GetCurrentPropertyPath(),
+      Eq("schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop3.schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration and whether each property is indexable for schema B:
+  // "schemaBprop1.schemaCprop1.schemaAprop2" (false),
+  // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2" (false),
+  // "schemaBprop1.schemaCprop2" (true),
+  // "schemaBprop2" (true)
+  SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for schema C:
+  // "schemaCprop1.schemaAprop1.schemaBprop2" (false),
+  // "schemaCprop1.schemaAprop2" (false),
+  // "schemaCprop1.schemaAprop3.schemaDprop2" (false),
+  // "schemaCprop2" (true)
+  SchemaPropertyIterator schema_c_iterator(schema_type_config_c,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_c_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for schema D:
+  // "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2" (false),
+  // "schemaDprop1.schemaAprop1.schemaBprop2" (false),
+  // "schemaDprop1.schemaAprop2" (false),
+  // "schemaDprop2" (true)
+  SchemaPropertyIterator schema_d_iterator(schema_type_config_d,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(), Eq("schemaDprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_d_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, MultipleCyclesWithIndexableList_allIndexTrue) {
+  std::string schema_a = "A";
+  std::string schema_b = "B";
+  std::string schema_c = "C";
+  std::string schema_d = "D";
+
+  // Create the following schema:
+  // D <--> A <--- C
+  //         \    ^
+  //          v  /
+  //           B
+  // Schema type A has two cycles: A-B-C-A and A-D-A
+  SchemaTypeConfigProto schema_type_config_a =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_a)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop1")
+                  .SetDataTypeDocument(
+                      schema_b, /*indexable_nested_properties_list=*/
+                      {"schemaBprop2", "schemaBprop1.schemaCprop1.schemaAprop2",
+                       "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2",
+                       "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2",
+                       "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1."
+                       "schemaAprop2"}))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop3")
+                  .SetDataTypeDocument(
+                      schema_d, /*indexable_nested_properties_list=*/
+                      {"schemaDprop2", "schemaDprop1.schemaAprop2",
+                       "schemaDprop1.schemaAprop1.schemaBprop2",
+                       "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2",
+                       "schemaDprop1.schemaAprop3.schemaDprop2"}))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_b =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_b)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaBprop1")
+                           .SetDataTypeDocument(
+                               schema_c, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaBprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_c =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_c)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaCprop1")
+                           .SetDataTypeDocument(
+                               schema_a, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaCprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_d =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_d)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaDprop1")
+                           .SetDataTypeDocument(
+                               schema_a, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaDprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_a, schema_type_config_a},
+      {schema_b, schema_type_config_b},
+      {schema_c, schema_type_config_c},
+      {schema_d, schema_type_config_d}};
+
+  // Order of iteration and whether each property is indexable for schema A:
+  // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2" (true),
+  // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2" (true),
+  // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+  // (true), "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"
+  // (true), "schemaAprop1.schemaBprop1.schemaCprop2" (false),
+  // "schemaAprop1.schemaBprop2" (true),
+  // "schemaAprop2" (true),
+  // "schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2" (true),
+  // "schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2" (true),
+  // "schemaAprop3.schemaDprop1.schemaAprop2" (true),
+  // "schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2" (true),
+  // "schemaAprop3.schemaDprop2" (true)
+  SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_a_iterator.GetCurrentPropertyPath(),
+      Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3."
+                 "schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_a_iterator.GetCurrentPropertyPath(),
+      Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_a_iterator.GetCurrentPropertyPath(),
+      Eq("schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop3.schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration and whether each property is indexable for schema B:
+  // "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"
+  // (true),
+  // "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"
+  // (true),
+  // "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+  // (true),
+  // "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"
+  // (true), "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop2"
+  // (false), "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2" (true),
+  // "schemaBprop1.schemaCprop1.schemaAprop2" (true),
+  // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"
+  // (true),
+  // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"
+  // (true), "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+  // (true),
+  // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2"
+  // (true), "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2" (true)
+  // "schemaBprop1.schemaCprop2" (true)
+  // "schemaBprop2" (true)
+
+  SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1."
+                 "schemaCprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1."
+                 "schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1."
+                 "schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1."
+                 "schemaCprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_b_iterator.GetCurrentPropertyPath(),
+      Eq("schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1."
+                 "schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1."
+                 "schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_b_iterator.GetCurrentPropertyPath(),
+      Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1."
+                 "schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration and whether each property is indexable for schema C:
+  // "schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"
+  // (true), "schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"
+  // (true),
+  // "schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+  // (true),
+  // "schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"
+  // (true),
+  // "schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop2" (false),
+  // "schemaCprop1.schemaAprop1.schemaBprop2" (true),
+  // "schemaCprop1.schemaAprop2" (true),
+  // "schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"
+  // (true),
+  // "schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2" (true),
+  // "schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2" (true),
+  // "schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2" (true),
+  // "schemaCprop1.schemaAprop3.schemaDprop2" (true)
+  // "schemaCprop2" (true)
+  SchemaPropertyIterator schema_c_iterator(schema_type_config_c,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1."
+                 "schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_c_iterator.GetCurrentPropertyPath(),
+      Eq("schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1."
+                 "schemaAprop3.schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop1."
+                 "schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop1."
+                 "schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_c_iterator.GetCurrentPropertyPath(),
+      Eq("schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_c_iterator.GetCurrentPropertyPath(),
+      Eq("schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_c_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration and whether each property is indexable for schema D:
+  // "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"
+  // (true), "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"
+  // (true),
+  // "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+  // (true),
+  // "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"
+  // (true), "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2" (false),
+  // "schemaDprop1.schemaAprop1.schemaBprop2" (true),
+  // "schemaDprop1.schemaAprop2" (true),
+  // "schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"
+  // (true), "schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"
+  // (true), "schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop2" (true),
+  // "schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2" (true),
+  // "schemaDprop1.schemaAprop3.schemaDprop2" (true),
+  // "schemaDprop2" (true)
+  SchemaPropertyIterator schema_d_iterator(schema_type_config_d,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1."
+                 "schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_d_iterator.GetCurrentPropertyPath(),
+      Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1."
+                 "schemaAprop3.schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop1."
+                 "schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop1."
+                 "schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_d_iterator.GetCurrentPropertyPath(),
+      Eq("schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_d_iterator.GetCurrentPropertyPath(),
+      Eq("schemaDprop1.schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(), Eq("schemaDprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_d_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest,
+     MultipleCyclesWithIndexableList_unknownPropPaths) {
+  std::string schema_a = "A";
+  std::string schema_b = "B";
+  std::string schema_c = "C";
+  std::string schema_d = "D";
+
+  // Create the following schema:
+  // D <--> A <--- C
+  //         \    ^
+  //          v  /
+  //           B
+  // Schema type A has two cycles: A-B-C-A and A-D-A
+  SchemaTypeConfigProto schema_type_config_a =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_a)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop1")
+                  .SetDataTypeDocument(
+                      schema_b, /*indexable_nested_properties_list=*/
+                      {"schemaBprop2", "schemaBprop1.schemaCprop1.schemaAprop2",
+                       "schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2",
+                       "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2",
+                       "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1."
+                       "schemaAprop2",
+                       "schemaBprop1.schemaCprop1",
+                       "schemaBprop1.schemaCprop1.schemaAprop3", "schemaAprop2",
+                       "schemaBprop2.schemaCprop2", "schemaBprop1.foo.bar",
+                       "foo", "foo", "bar"}))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop3")
+                  .SetDataTypeDocument(
+                      schema_d, /*indexable_nested_properties_list=*/
+                      {"schemaDprop2", "schemaDprop1.schemaAprop2",
+                       "schemaDprop1.schemaAprop1.schemaBprop2",
+                       "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2",
+                       "schemaDprop1.schemaAprop3.schemaDprop2", "schemaBprop2",
+                       "bar", "schemaDprop2.foo", "schemaDprop1",
+                       "schemaAprop3.schemaDprop2"}))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_b =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_b)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaBprop1")
+                           .SetDataTypeDocument(
+                               schema_c, /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaBprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_c =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_c)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaCprop1")
+                           .SetDataTypeDocument(
+                               schema_a, /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaCprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_d =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_d)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaDprop1")
+                           .SetDataTypeDocument(
+                               schema_a, /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaDprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_a, schema_type_config_a},
+      {schema_b, schema_type_config_b},
+      {schema_c, schema_type_config_c},
+      {schema_d, schema_type_config_d}};
+
+  // Order of iteration and whether each property is indexable for schema A:
+  // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2" (true),
+  // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2" (true),
+  // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop1.schemaAprop2"
+  // (true), "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"
+  // (true), "schemaAprop1.schemaBprop1.schemaCprop2" (false),
+  // "schemaAprop1.schemaBprop2" (true),
+  // "schemaAprop2" (true),
+  // "schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2" (true),
+  // "schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2" (true),
+  // "schemaAprop3.schemaDprop1.schemaAprop2" (true),
+  // "schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2" (true),
+  // "schemaAprop3.schemaDprop2" (true)
+  //
+  // The following properties listed in the indexable_list are not defined
+  // in the schema and should not be seen during iteration. These should appear
+  // in the unknown_indexable_nested_properties_ set.
+  // "schemaAprop1.bar",
+  // "schemaAprop1.foo",
+  // "schemaAprop1.schemaAprop2",
+  // "schemaAprop1.schemaBprop1.foo.bar",
+  // "schemaAprop1.schemaBprop1.schemaCprop1",
+  // "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3",
+  // "schemaAprop1.schemaBprop2.schemaCprop2",
+  // "schemaAprop3.bar",
+  // "schemaAprop3.schemaAprop3.schemaDprop2",
+  // "schemaAprop3.schemaBprop2",
+  // "schemaAprop3.schemaDprop1",
+  // "schemaAprop3.schemaDprop2.foo"
+  SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_a_iterator.GetCurrentPropertyPath(),
+      Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3."
+                 "schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_a_iterator.GetCurrentPropertyPath(),
+      Eq("schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(
+      schema_a_iterator.GetCurrentPropertyPath(),
+      Eq("schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop3.schemaDprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop3.schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop3.schemaDprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(
+      schema_a_iterator.unknown_indexable_nested_property_paths(),
+      ElementsAre(
+          "schemaAprop1.bar", "schemaAprop1.foo", "schemaAprop1.schemaAprop2",
+          "schemaAprop1.schemaBprop1.foo.bar",
+          "schemaAprop1.schemaBprop1.schemaCprop1",
+          "schemaAprop1.schemaBprop1.schemaCprop1.schemaAprop3",
+          "schemaAprop1.schemaBprop2.schemaCprop2", "schemaAprop3.bar",
+          "schemaAprop3.schemaAprop3.schemaDprop2", "schemaAprop3.schemaBprop2",
+          "schemaAprop3.schemaDprop1", "schemaAprop3.schemaDprop2.foo"));
+
+  // Order of iteration and whether each property is indexable for schema B:
+  // "schemaBprop1.schemaCprop1.schemaAprop2" (false),
+  // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2" (false),
+  // "schemaBprop1.schemaCprop2" (true),
+  // "schemaBprop2" (true)
+  SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+              Eq("schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_b_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_b_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_b_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for schema C:
+  // "schemaCprop1.schemaAprop1.schemaBprop2" (false),
+  // "schemaCprop1.schemaAprop2" (false),
+  // "schemaCprop1.schemaAprop3.schemaDprop2" (false),
+  // "schemaCprop2" (true)
+  SchemaPropertyIterator schema_c_iterator(schema_type_config_c,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+              Eq("schemaCprop1.schemaAprop3.schemaDprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2"));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_c_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_c_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_c_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+
+  // Order of iteration for schema D:
+  // "schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2" (false),
+  // "schemaDprop1.schemaAprop1.schemaBprop2" (false),
+  // "schemaDprop1.schemaAprop2" (false),
+  // "schemaDprop2" (true)
+  SchemaPropertyIterator schema_d_iterator(schema_type_config_d,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_c.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+              Eq("schemaDprop1.schemaAprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(), Eq("schemaDprop2"));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_d.properties(1)));
+  EXPECT_THAT(schema_d_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_d_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_d_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+
+TEST(SchemaPropertyIteratorTest, TopLevelCycleWithMultipleIndexableLists) {
+  std::string schema_a = "A";
+  std::string schema_b = "B";
+  std::string schema_c = "C";
+  std::string schema_d = "D";
+
+  // Create the following schema:
+  // A <-> A -> B
+  // A has a top-level property that is a self-reference.
+  SchemaTypeConfigProto schema_type_config_a =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_a)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaAprop1")
+                           .SetDataTypeDocument(
+                               schema_b, /*indexable_nested_properties_list=*/
+                               {"schemaBprop1", "schemaBprop2"}))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("schemaAprop2")
+                           .SetDataTypeDocument(
+                               schema_a, /*indexable_nested_properties_list=*/
+                               {"schemaAprop1.schemaBprop2",
+                                "schemaAprop1.schemaBprop3"}))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaAprop3")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto schema_type_config_b =
+      SchemaTypeConfigBuilder()
+          .SetType(schema_b)
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaBprop1")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaBprop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("schemaBprop3")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  SchemaUtil::TypeConfigMap type_config_map = {
+      {schema_a, schema_type_config_a}, {schema_b, schema_type_config_b}};
+
+  // Order of iteration for Schema A:
+  // "schemaAprop1.schemaBprop1" (true)
+  // "schemaAprop1.schemaBprop2" (true)
+  // "schemaAprop1.schemaBprop3" (false)
+  // "schemaAprop2.schemaAprop1.schemaBprop1" (false)
+  // "schemaAprop2.schemaAprop1.schemaBprop2" (true)
+  // "schemaAprop2.schemaAprop1.schemaBprop3" (true)
+  // "schemaAprop2.schemaAprop3" (false)
+  // "schemaAprop3" (true)
+  SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+                                           type_config_map);
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop1"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(0)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop1.schemaBprop3"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(2)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop2.schemaAprop1.schemaBprop1"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(0)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop2.schemaAprop1.schemaBprop2"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(1)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop2.schemaAprop1.schemaBprop3"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_b.properties(2)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+              Eq("schemaAprop2.schemaAprop3"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(2)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsFalse());
+
+  EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop3"));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+              EqualsProto(schema_type_config_a.properties(2)));
+  EXPECT_THAT(schema_a_iterator.GetCurrentPropertyIndexable(), IsTrue());
+
+  EXPECT_THAT(schema_a_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  EXPECT_THAT(schema_a_iterator.unknown_indexable_nested_property_paths(),
+              IsEmpty());
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index 34ccf22..e17e388 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -15,11 +15,14 @@
 #include "icing/schema/schema-store.h"
 
 #include <algorithm>
+#include <cinttypes>
 #include <cstdint>
+#include <limits>
 #include <memory>
 #include <string>
 #include <string_view>
 #include <unordered_map>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -27,15 +30,24 @@
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
 #include "icing/file/file-backed-proto.h"
 #include "icing/file/filesystem.h"
+#include "icing/file/version-util.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/document.pb.h"
+#include "icing/proto/logging.pb.h"
 #include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/schema/backup-schema-producer.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/schema-type-manager.h"
 #include "icing/schema/schema-util.h"
-#include "icing/schema/section-manager.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-filter-data.h"
-#include "icing/store/key-mapper.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
 #include "icing/util/crc32.h"
 #include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
@@ -47,21 +59,27 @@ namespace {
 
 constexpr char kSchemaStoreHeaderFilename[] = "schema_store_header";
 constexpr char kSchemaFilename[] = "schema.pb";
+constexpr char kOverlaySchemaFilename[] = "overlay_schema.pb";
 constexpr char kSchemaTypeMapperFilename[] = "schema_type_mapper";
 
-// A KeyMapper stores its data across 3 arrays internally. Giving each array
-// 128KiB for storage means the entire KeyMapper requires 384KiB.
+// A DynamicTrieKeyMapper stores its data across 3 arrays internally. Giving
+// each array 128KiB for storage means the entire DynamicTrieKeyMapper requires
+// 384KiB.
 constexpr int32_t kSchemaTypeMapperMaxSize = 3 * 128 * 1024;  // 384 KiB
 
-const std::string MakeHeaderFilename(const std::string& base_dir) {
+std::string MakeHeaderFilename(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kSchemaStoreHeaderFilename);
 }
 
-const std::string MakeSchemaFilename(const std::string& base_dir) {
+std::string MakeSchemaFilename(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kSchemaFilename);
 }
 
-const std::string MakeSchemaTypeMapperFilename(const std::string& base_dir) {
+std::string MakeOverlaySchemaFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kOverlaySchemaFilename);
+}
+
+std::string MakeSchemaTypeMapperFilename(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kSchemaTypeMapperFilename);
 }
 
@@ -102,30 +120,226 @@ std::unordered_set<SchemaTypeId> SchemaTypeIdsChanged(
 
 }  // namespace
 
+/* static */ libtextclassifier3::StatusOr<SchemaStore::Header>
+SchemaStore::Header::Read(const Filesystem* filesystem,
+                          const std::string& path) {
+  Header header;
+  ScopedFd sfd(filesystem->OpenForRead(path.c_str()));
+  if (!sfd.is_valid()) {
+    return absl_ports::NotFoundError("SchemaStore header doesn't exist");
+  }
+
+  // If file is sizeof(LegacyHeader), then it must be LegacyHeader.
+  int64_t file_size = filesystem->GetFileSize(sfd.get());
+  if (file_size == sizeof(LegacyHeader)) {
+    LegacyHeader legacy_header;
+    if (!filesystem->Read(path.c_str(), &legacy_header,
+                          sizeof(legacy_header))) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Couldn't read: ", path));
+    }
+    if (legacy_header.magic != Header::kMagic) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Invalid header kMagic for file: ", path));
+    }
+    header.set_checksum(legacy_header.checksum);
+  } else if (file_size == sizeof(Header)) {
+    if (!filesystem->Read(path.c_str(), &header, sizeof(header))) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Couldn't read: ", path));
+    }
+    if (header.magic() != Header::kMagic) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Invalid header kMagic for file: ", path));
+    }
+  } else {
+    int legacy_header_size = sizeof(LegacyHeader);
+    int header_size = sizeof(Header);
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Unexpected header size %" PRId64 ". Expected %d or %d", file_size,
+        legacy_header_size, header_size));
+  }
+  return header;
+}
+
+libtextclassifier3::Status SchemaStore::Header::Write(
+    const Filesystem* filesystem, const std::string& path) {
+  ScopedFd scoped_fd(filesystem->OpenForWrite(path.c_str()));
+  // This should overwrite the header.
+  if (!scoped_fd.is_valid() ||
+      !filesystem->Write(scoped_fd.get(), this, sizeof(*this)) ||
+      !filesystem->DataSync(scoped_fd.get())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write SchemaStore header: ", path));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
 libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
-    const Filesystem* filesystem, const std::string& base_dir) {
+    const Filesystem* filesystem, const std::string& base_dir,
+    const Clock* clock, InitializeStatsProto* initialize_stats) {
   ICING_RETURN_ERROR_IF_NULL(filesystem);
+  ICING_RETURN_ERROR_IF_NULL(clock);
 
-  std::unique_ptr<SchemaStore> schema_store =
-      std::unique_ptr<SchemaStore>(new SchemaStore(filesystem, base_dir));
-  ICING_RETURN_IF_ERROR(schema_store->Initialize());
+  if (!filesystem->DirectoryExists(base_dir.c_str())) {
+    return absl_ports::FailedPreconditionError(
+        "Schema store base directory does not exist!");
+  }
+  std::unique_ptr<SchemaStore> schema_store = std::unique_ptr<SchemaStore>(
+      new SchemaStore(filesystem, base_dir, clock));
+  ICING_RETURN_IF_ERROR(schema_store->Initialize(initialize_stats));
   return schema_store;
 }
 
-SchemaStore::SchemaStore(const Filesystem* filesystem, std::string base_dir)
-    : filesystem_(*filesystem),
+libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
+    const Filesystem* filesystem, const std::string& base_dir,
+    const Clock* clock, SchemaProto schema) {
+  ICING_RETURN_ERROR_IF_NULL(filesystem);
+  ICING_RETURN_ERROR_IF_NULL(clock);
+
+  if (!filesystem->DirectoryExists(base_dir.c_str())) {
+    return absl_ports::FailedPreconditionError(
+        "Schema store base directory does not exist!");
+  }
+  std::unique_ptr<SchemaStore> schema_store = std::unique_ptr<SchemaStore>(
+      new SchemaStore(filesystem, base_dir, clock));
+  ICING_RETURN_IF_ERROR(schema_store->Initialize(std::move(schema)));
+  return schema_store;
+}
+
+/* static */ libtextclassifier3::Status SchemaStore::DiscardOverlaySchema(
+    const Filesystem* filesystem, const std::string& base_dir, Header& header) {
+  std::string header_filename = MakeHeaderFilename(base_dir);
+  if (header.overlay_created()) {
+    header.SetOverlayInfo(
+        /*overlay_created=*/false,
+        /*min_overlay_version_compatibility=*/ std::numeric_limits<
+            int32_t>::max());
+    ICING_RETURN_IF_ERROR(header.Write(filesystem, header_filename));
+  }
+  std::string schema_overlay_filename = MakeOverlaySchemaFilename(base_dir);
+  if (!filesystem->DeleteFile(schema_overlay_filename.c_str())) {
+    return absl_ports::InternalError(
+        "Unable to delete stale schema overlay file.");
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::Status SchemaStore::MigrateSchema(
+    const Filesystem* filesystem, const std::string& base_dir,
+    version_util::StateChange version_state_change, int32_t new_version) {
+  if (!filesystem->DirectoryExists(base_dir.c_str())) {
+    // Situations when schema store directory doesn't exist:
+    // - Initializing new Icing instance: don't have to do anything now. The
+    //   directory will be created later.
+    // - Lose schema store: there is nothing we can do now. The logic will be
+    //   handled later by initializing.
+    //
+    // Therefore, just simply return OK here.
+    return libtextclassifier3::Status::OK;
+  }
+
+  std::string overlay_schema_filename = MakeOverlaySchemaFilename(base_dir);
+  if (!filesystem->FileExists(overlay_schema_filename.c_str())) {
+    // The overlay doesn't exist. So there should be nothing particularly
+    // interesting to worry about.
+    return libtextclassifier3::Status::OK;
+  }
+
+  std::string header_filename = MakeHeaderFilename(base_dir);
+  libtextclassifier3::StatusOr<Header> header_or;
+  switch (version_state_change) {
+    // No necessary actions for normal upgrades or no version change. The data
+    // that was produced by the previous version is fully compatible with this
+    // version and there's no stale data for us to clean up.
+    // The same is true for a normal rollforward. A normal rollforward implies
+    // that the previous version was one that understood the concept of the
+    // overlay schema and would have already discarded it if it was unusable.
+    case version_util::StateChange::kVersionZeroUpgrade:
+      // fallthrough
+    case version_util::StateChange::kUpgrade:
+      // fallthrough
+    case version_util::StateChange::kRollForward:
+      // fallthrough
+    case version_util::StateChange::kCompatible:
+      return libtextclassifier3::Status::OK;
+    case version_util::StateChange::kVersionZeroRollForward:
+      // We've rolled forward. The schema overlay file, if it exists, is
+      // possibly stale. We must throw it out.
+      header_or = Header::Read(filesystem, header_filename);
+      if (!header_or.ok()) {
+        return header_or.status();
+      }
+      return SchemaStore::DiscardOverlaySchema(filesystem, base_dir,
+                                               header_or.ValueOrDie());
+    case version_util::StateChange::kRollBack:
+      header_or = Header::Read(filesystem, header_filename);
+      if (!header_or.ok()) {
+        return header_or.status();
+      }
+      if (header_or.ValueOrDie().min_overlay_version_compatibility() <=
+          new_version) {
+        // We've been rolled back, but the overlay schema claims that it
+        // supports this version. So we can safely return.
+        return libtextclassifier3::Status::OK;
+      }
+      // We've been rolled back to a version that the overlay schema doesn't
+      // support. We must throw it out.
+      return SchemaStore::DiscardOverlaySchema(filesystem, base_dir,
+                                               header_or.ValueOrDie());
+    case version_util::StateChange::kUndetermined:
+      // It's not clear what version we're on, but the base schema should always
+      // be safe to use. Throw out the overlay.
+      header_or = Header::Read(filesystem, header_filename);
+      if (!header_or.ok()) {
+        return header_or.status();
+      }
+      return SchemaStore::DiscardOverlaySchema(filesystem, base_dir,
+                                               header_or.ValueOrDie());
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::Status SchemaStore::DiscardDerivedFiles(
+    const Filesystem* filesystem, const std::string& base_dir) {
+  // Schema type mapper
+  return DynamicTrieKeyMapper<SchemaTypeId>::Delete(
+      *filesystem, MakeSchemaTypeMapperFilename(base_dir));
+}
+
+SchemaStore::SchemaStore(const Filesystem* filesystem, std::string base_dir,
+                         const Clock* clock)
+    : filesystem_(filesystem),
       base_dir_(std::move(base_dir)),
-      schema_file_(*filesystem, MakeSchemaFilename(base_dir_)) {}
+      clock_(clock),
+      schema_file_(std::make_unique<FileBackedProto<SchemaProto>>(
+          *filesystem, MakeSchemaFilename(base_dir_))) {}
 
 SchemaStore::~SchemaStore() {
-  if (initialized_) {
+  if (has_schema_successfully_set_ && schema_file_ != nullptr &&
+      schema_type_mapper_ != nullptr && schema_type_manager_ != nullptr) {
     if (!PersistToDisk().ok()) {
       ICING_LOG(ERROR) << "Error persisting to disk in SchemaStore destructor";
     }
   }
 }
 
-libtextclassifier3::Status SchemaStore::Initialize() {
+libtextclassifier3::Status SchemaStore::Initialize(SchemaProto new_schema) {
+  ICING_RETURN_IF_ERROR(LoadSchema());
+  if (!absl_ports::IsNotFound(GetSchema().status())) {
+    return absl_ports::FailedPreconditionError(
+        "Incorrectly tried to initialize schema store with a new schema, when "
+        "one is already set!");
+  }
+  ICING_RETURN_IF_ERROR(schema_file_->Write(
+      std::make_unique<SchemaProto>(std::move(new_schema))));
+  return InitializeInternal(/*create_overlay_if_necessary=*/true,
+                            /*initialize_stats=*/nullptr);
+}
+
+libtextclassifier3::Status SchemaStore::Initialize(
+    InitializeStatsProto* initialize_stats) {
+  ICING_RETURN_IF_ERROR(LoadSchema());
   auto schema_proto_or = GetSchema();
   if (absl_ports::IsNotFound(schema_proto_or.status())) {
     // Don't have an existing schema proto, that's fine
@@ -134,124 +348,215 @@ libtextclassifier3::Status SchemaStore::Initialize() {
     // Real error when trying to read the existing schema
     return schema_proto_or.status();
   }
+  return InitializeInternal(/*create_overlay_if_necessary=*/false,
+                            initialize_stats);
+}
+
+libtextclassifier3::Status SchemaStore::LoadSchema() {
+  libtextclassifier3::StatusOr<Header> header_or =
+      Header::Read(filesystem_, MakeHeaderFilename(base_dir_));
+  bool header_exists = false;
+  if (!header_or.ok() && !absl_ports::IsNotFound(header_or.status())) {
+    return header_or.status();
+  } else if (!header_or.ok()) {
+    header_ = std::make_unique<Header>();
+  } else {
+    header_exists = true;
+    header_ = std::make_unique<Header>(std::move(header_or).ValueOrDie());
+  }
+
+  std::string overlay_schema_filename = MakeOverlaySchemaFilename(base_dir_);
+  bool overlay_schema_file_exists =
+      filesystem_->FileExists(overlay_schema_filename.c_str());
+
+  libtextclassifier3::Status base_schema_state = schema_file_->Read().status();
+  if (!base_schema_state.ok() && !absl_ports::IsNotFound(base_schema_state)) {
+    return base_schema_state;
+  }
+
+  // There are three valid cases:
+  // 1. Everything is missing. This is an empty schema store.
+  if (!base_schema_state.ok() && !overlay_schema_file_exists &&
+      !header_exists) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  // 2. There never was a overlay schema. The header exists, the base schema
+  //    exists and the header says the overlay schema shouldn't exist
+  if (base_schema_state.ok() && !overlay_schema_file_exists && header_exists &&
+      !header_->overlay_created()) {
+    // Nothing else to do. Just return safely.
+    return libtextclassifier3::Status::OK;
+  }
+
+  // 3. There is an overlay schema and a base schema and a header. The header
+  // says that the overlay schema should exist.
+  if (base_schema_state.ok() && overlay_schema_file_exists && header_exists &&
+      header_->overlay_created()) {
+    overlay_schema_file_ = std::make_unique<FileBackedProto<SchemaProto>>(
+        *filesystem_, MakeOverlaySchemaFilename(base_dir_));
+    return libtextclassifier3::Status::OK;
+  }
+
+  // Something has gone wrong. We've lost part of the schema ground truth.
+  // Return an error.
+  bool overlay_created = header_->overlay_created();
+  bool base_schema_exists = base_schema_state.ok();
+  return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+      "Unable to properly load schema. Header {exists:%d, overlay_created:%d}, "
+      "base schema exists: %d, overlay_schema_exists: %d",
+      header_exists, overlay_created, base_schema_exists,
+      overlay_schema_file_exists));
+}
 
+libtextclassifier3::Status SchemaStore::InitializeInternal(
+    bool create_overlay_if_necessary, InitializeStatsProto* initialize_stats) {
   if (!InitializeDerivedFiles().ok()) {
     ICING_VLOG(3)
         << "Couldn't find derived files or failed to initialize them, "
            "regenerating derived files for SchemaStore.";
-    ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
+    std::unique_ptr<Timer> regenerate_timer = clock_->GetNewTimer();
+    if (initialize_stats != nullptr) {
+      initialize_stats->set_schema_store_recovery_cause(
+          InitializeStatsProto::IO_ERROR);
+    }
+    ICING_RETURN_IF_ERROR(RegenerateDerivedFiles(create_overlay_if_necessary));
+    if (initialize_stats != nullptr) {
+      initialize_stats->set_schema_store_recovery_latency_ms(
+          regenerate_timer->GetElapsedMilliseconds());
+    }
   }
 
-  initialized_ = true;
+  if (initialize_stats != nullptr) {
+    initialize_stats->set_num_schema_types(type_config_map_.size());
+  }
+  has_schema_successfully_set_ = true;
 
   return libtextclassifier3::Status::OK;
 }
 
 libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() {
-  if (!HeaderExists()) {
-    // Without a header, we don't know if things are consistent between each
-    // other so the caller should just regenerate everything from ground truth.
-    return absl_ports::InternalError("SchemaStore header doesn't exist");
-  }
-
-  SchemaStore::Header header;
-  if (!filesystem_.Read(MakeHeaderFilename(base_dir_).c_str(), &header,
-                        sizeof(header))) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Couldn't read: ", MakeHeaderFilename(base_dir_)));
-  }
-
-  if (header.magic != SchemaStore::Header::kMagic) {
-    return absl_ports::InternalError(absl_ports::StrCat(
-        "Invalid header kMagic for file: ", MakeHeaderFilename(base_dir_)));
-  }
-
   ICING_ASSIGN_OR_RETURN(
       schema_type_mapper_,
-      KeyMapper<SchemaTypeId>::Create(filesystem_,
-                                      MakeSchemaTypeMapperFilename(base_dir_),
-                                      kSchemaTypeMapperMaxSize));
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          *filesystem_, MakeSchemaTypeMapperFilename(base_dir_),
+          kSchemaTypeMapperMaxSize));
 
   ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
-  if (checksum.Get() != header.checksum) {
+  if (checksum.Get() != header_->checksum()) {
     return absl_ports::InternalError(
         "Combined checksum of SchemaStore was inconsistent");
   }
 
-  // Update our in-memory data structures
-  type_config_map_.clear();
-  ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
-  for (const SchemaTypeConfigProto& type_config : schema_proto->types()) {
-    // Update our type_config_map_
-    type_config_map_.emplace(type_config.schema_type(), type_config);
-  }
-  ICING_ASSIGN_OR_RETURN(
-      section_manager_,
-      SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
+  ICING_RETURN_IF_ERROR(BuildInMemoryCache());
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles() {
+libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles(
+    bool create_overlay_if_necessary) {
   ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
 
   ICING_RETURN_IF_ERROR(ResetSchemaTypeMapper());
-  type_config_map_.clear();
 
   for (const SchemaTypeConfigProto& type_config : schema_proto->types()) {
-    // Update our type_config_map_
-    type_config_map_.emplace(type_config.schema_type(), type_config);
-
     // Assign a SchemaTypeId to the type
     ICING_RETURN_IF_ERROR(schema_type_mapper_->Put(
         type_config.schema_type(), schema_type_mapper_->num_keys()));
   }
-
-  ICING_ASSIGN_OR_RETURN(
-      section_manager_,
-      SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+  ICING_RETURN_IF_ERROR(BuildInMemoryCache());
+
+  if (create_overlay_if_necessary) {
+    ICING_ASSIGN_OR_RETURN(
+        BackupSchemaProducer producer,
+        BackupSchemaProducer::Create(*schema_proto,
+                                     schema_type_manager_->section_manager()));
+
+    if (producer.is_backup_necessary()) {
+      SchemaProto base_schema = std::move(producer).Produce();
+
+      // The overlay schema should be written to the overlay file location.
+      overlay_schema_file_ = std::make_unique<FileBackedProto<SchemaProto>>(
+          *filesystem_, MakeOverlaySchemaFilename(base_dir_));
+      auto schema_ptr = std::make_unique<SchemaProto>(std::move(*schema_proto));
+      ICING_RETURN_IF_ERROR(overlay_schema_file_->Write(std::move(schema_ptr)));
+
+      // The base schema should be written to the original file
+      auto base_schema_ptr =
+          std::make_unique<SchemaProto>(std::move(base_schema));
+      ICING_RETURN_IF_ERROR(schema_file_->Write(std::move(base_schema_ptr)));
+
+      // LINT.IfChange(min_overlay_version_compatibility)
+      // Although the current version is 3, the schema is compatible with
+      // version 1, so min_overlay_version_compatibility should be 1.
+      int32_t min_overlay_version_compatibility = version_util::kVersionOne;
+      // LINT.ThenChange(//depot/google3/icing/file/version-util.h:kVersion)
+      header_->SetOverlayInfo(
+          /*overlay_created=*/true, min_overlay_version_compatibility);
+      // Rebuild in memory data - references to the old schema will be invalid
+      // now.
+      ICING_RETURN_IF_ERROR(BuildInMemoryCache());
+    }
+  }
 
   // Write the header
   ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
-  ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
-
-  return libtextclassifier3::Status::OK;
-}
-
-bool SchemaStore::HeaderExists() {
-  if (!filesystem_.FileExists(MakeHeaderFilename(base_dir_).c_str())) {
-    return false;
-  }
-
-  int64_t file_size =
-      filesystem_.GetFileSize(MakeHeaderFilename(base_dir_).c_str());
-
-  // If it's been truncated to size 0 before, we consider it to be a new file
-  return file_size != 0 && file_size != Filesystem::kBadFileSize;
+  header_->set_checksum(checksum.Get());
+  return header_->Write(filesystem_, MakeHeaderFilename(base_dir_));
 }
 
-libtextclassifier3::Status SchemaStore::UpdateHeader(const Crc32& checksum) {
-  // Write the header
-  SchemaStore::Header header;
-  header.magic = SchemaStore::Header::kMagic;
-  header.checksum = checksum.Get();
+libtextclassifier3::Status SchemaStore::BuildInMemoryCache() {
+  ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
+  ICING_ASSIGN_OR_RETURN(
+      SchemaUtil::InheritanceMap inheritance_map,
+      SchemaUtil::BuildTransitiveInheritanceGraph(*schema_proto));
 
-  // This should overwrite the header.
-  if (!filesystem_.Write(MakeHeaderFilename(base_dir_).c_str(), &header,
-                         sizeof(header))) {
-    return absl_ports::InternalError(absl_ports::StrCat(
-        "Failed to write SchemaStore header: ", MakeHeaderFilename(base_dir_)));
+  reverse_schema_type_mapper_.clear();
+  type_config_map_.clear();
+  schema_subtype_id_map_.clear();
+  for (const SchemaTypeConfigProto& type_config : schema_proto->types()) {
+    std::string_view type_name = type_config.schema_type();
+    ICING_ASSIGN_OR_RETURN(SchemaTypeId type_id,
+                           schema_type_mapper_->Get(type_name));
+
+    // Build reverse_schema_type_mapper_
+    reverse_schema_type_mapper_.insert({type_id, std::string(type_name)});
+
+    // Build type_config_map_
+    type_config_map_.insert({std::string(type_name), type_config});
+
+    // Build schema_subtype_id_map_
+    std::unordered_set<SchemaTypeId>& subtype_id_set =
+        schema_subtype_id_map_[type_id];
+    // Find all child types
+    auto child_types_names = inheritance_map.find(type_name);
+    if (child_types_names != inheritance_map.end()) {
+      subtype_id_set.reserve(child_types_names->second.size() + 1);
+      for (const auto& [child_type_name, is_direct_child] :
+           child_types_names->second) {
+        ICING_ASSIGN_OR_RETURN(SchemaTypeId child_type_id,
+                               schema_type_mapper_->Get(child_type_name));
+        subtype_id_set.insert(child_type_id);
+      }
+    }
+    // Every type is a subtype of itself.
+    subtype_id_set.insert(type_id);
   }
+
+  // Build schema_type_manager_
+  ICING_ASSIGN_OR_RETURN(
+      schema_type_manager_,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
   return libtextclassifier3::Status::OK;
 }
 
 libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   schema_type_mapper_.reset();
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
-  libtextclassifier3::Status status = KeyMapper<SchemaTypeId>::Delete(
-      filesystem_, MakeSchemaTypeMapperFilename(base_dir_));
+  libtextclassifier3::Status status =
+      DynamicTrieKeyMapper<SchemaTypeId>::Delete(
+          *filesystem_, MakeSchemaTypeMapperFilename(base_dir_));
   if (!status.ok()) {
     ICING_LOG(ERROR) << status.error_message()
                      << "Failed to delete old schema_type mapper";
@@ -259,33 +564,40 @@ libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() {
   }
   ICING_ASSIGN_OR_RETURN(
       schema_type_mapper_,
-      KeyMapper<SchemaTypeId>::Create(filesystem_,
-                                      MakeSchemaTypeMapperFilename(base_dir_),
-                                      kSchemaTypeMapperMaxSize));
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          *filesystem_, MakeSchemaTypeMapperFilename(base_dir_),
+          kSchemaTypeMapperMaxSize));
 
   return libtextclassifier3::Status::OK;
 }
 
 libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
-  Crc32 total_checksum;
-
-  auto schema_proto_or = GetSchema();
+  // Base schema checksum
+  auto schema_proto_or = schema_file_->Read();
   if (absl_ports::IsNotFound(schema_proto_or.status())) {
-    // Nothing to checksum
-    return total_checksum;
-  } else if (!schema_proto_or.ok()) {
-    // Some real error. Pass it up
-    return schema_proto_or.status();
+    return Crc32();
   }
-
-  // Guaranteed to have a schema proto now
-  const SchemaProto* schema_proto = schema_proto_or.ValueOrDie();
+  ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, schema_proto_or);
   Crc32 schema_checksum;
   schema_checksum.Append(schema_proto->SerializeAsString());
 
-  Crc32 schema_type_mapper_checksum = schema_type_mapper_->ComputeChecksum();
+  Crc32 overlay_schema_checksum;
+  if (overlay_schema_file_ != nullptr) {
+    auto schema_proto_or = schema_file_->Read();
+    if (schema_proto_or.ok()) {
+      ICING_ASSIGN_OR_RETURN(schema_proto, schema_proto_or);
+      overlay_schema_checksum.Append(schema_proto->SerializeAsString());
+    }
+  }
+
+  ICING_ASSIGN_OR_RETURN(Crc32 schema_type_mapper_checksum,
+                         schema_type_mapper_->ComputeChecksum());
 
+  Crc32 total_checksum;
   total_checksum.Append(std::to_string(schema_checksum.Get()));
+  if (overlay_schema_file_ != nullptr) {
+    total_checksum.Append(std::to_string(overlay_schema_checksum.Get()));
+  }
   total_checksum.Append(std::to_string(schema_type_mapper_checksum.Get()));
 
   return total_checksum;
@@ -293,7 +605,10 @@ libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
 
 libtextclassifier3::StatusOr<const SchemaProto*> SchemaStore::GetSchema()
     const {
-  return schema_file_.Read();
+  if (overlay_schema_file_ != nullptr) {
+    return overlay_schema_file_->Read();
+  }
+  return schema_file_->Read();
 }
 
 // TODO(cassiewang): Consider removing this definition of SetSchema if it's not
@@ -302,19 +617,29 @@ libtextclassifier3::StatusOr<const SchemaProto*> SchemaStore::GetSchema()
 // SetSchema(SchemaProto&& new_schema)
 libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult>
 SchemaStore::SetSchema(const SchemaProto& new_schema,
-                       bool ignore_errors_and_delete_documents) {
-  return SetSchema(SchemaProto(new_schema), ignore_errors_and_delete_documents);
+                       bool ignore_errors_and_delete_documents,
+                       bool allow_circular_schema_definitions) {
+  return SetSchema(SchemaProto(new_schema), ignore_errors_and_delete_documents,
+                   allow_circular_schema_definitions);
 }
 
 libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult>
 SchemaStore::SetSchema(SchemaProto&& new_schema,
-                       bool ignore_errors_and_delete_documents) {
+                       bool ignore_errors_and_delete_documents,
+                       bool allow_circular_schema_definitions) {
+  ICING_ASSIGN_OR_RETURN(
+      SchemaUtil::DependentMap new_dependent_map,
+      SchemaUtil::Validate(new_schema, allow_circular_schema_definitions));
+
   SetSchemaResult result;
 
   auto schema_proto_or = GetSchema();
   if (absl_ports::IsNotFound(schema_proto_or.status())) {
     // We don't have a pre-existing schema, so anything is valid.
     result.success = true;
+    for (const SchemaTypeConfigProto& type_config : new_schema.types()) {
+      result.schema_types_new_by_name.insert(type_config.schema_type());
+    }
   } else if (!schema_proto_or.ok()) {
     // Real error
     return schema_proto_or.status();
@@ -332,10 +657,16 @@ SchemaStore::SetSchema(SchemaProto&& new_schema,
 
     // Different schema, track the differences and see if we can still write it
     SchemaUtil::SchemaDelta schema_delta =
-        SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema);
+        SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                              new_dependent_map);
 
-    // An incompatible index is fine, we can just reindex
-    result.index_incompatible = schema_delta.index_incompatible;
+    result.schema_types_new_by_name = std::move(schema_delta.schema_types_new);
+    result.schema_types_changed_fully_compatible_by_name =
+        std::move(schema_delta.schema_types_changed_fully_compatible);
+    result.schema_types_index_incompatible_by_name =
+        std::move(schema_delta.schema_types_index_incompatible);
+    result.schema_types_join_incompatible_by_name =
+        std::move(schema_delta.schema_types_join_incompatible);
 
     for (const auto& schema_type : schema_delta.schema_types_deleted) {
       // We currently don't support deletions, so mark this as not possible.
@@ -370,26 +701,70 @@ SchemaStore::SetSchema(SchemaProto&& new_schema,
   result.success = result.success || ignore_errors_and_delete_documents;
 
   if (result.success) {
-    // Write the schema (and potentially overwrite a previous schema)
-    ICING_RETURN_IF_ERROR(
-        schema_file_.Write(std::make_unique<SchemaProto>(new_schema)));
-
-    ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
+    ICING_RETURN_IF_ERROR(ApplySchemaChange(std::move(new_schema)));
+    has_schema_successfully_set_ = true;
   }
 
   return result;
 }
 
-libtextclassifier3::StatusOr<const SchemaTypeConfigProto*>
-SchemaStore::GetSchemaTypeConfig(std::string_view schema_type) const {
-  auto schema_proto_or = GetSchema();
-  if (absl_ports::IsNotFound(schema_proto_or.status())) {
-    return absl_ports::FailedPreconditionError("Schema not set yet.");
-  } else if (!schema_proto_or.ok()) {
-    // Some other real error, pass it up
-    return schema_proto_or.status();
+libtextclassifier3::Status SchemaStore::ApplySchemaChange(
+    SchemaProto new_schema) {
+  // We need to ensure that we either 1) successfully set the schema and
+  // update all derived data structures or 2) fail and leave the schema store
+  // unchanged.
+  // So, first, we create an empty temporary directory to build a new schema
+  // store in.
+  std::string temp_schema_store_dir_path = base_dir_ + "_temp";
+  if (!filesystem_->DeleteDirectoryRecursively(
+          temp_schema_store_dir_path.c_str())) {
+    ICING_LOG(ERROR) << "Recursively deleting "
+                     << temp_schema_store_dir_path.c_str();
+    return absl_ports::InternalError(
+        "Unable to delete temp directory to prepare to build new schema "
+        "store.");
+  }
+
+  DestructibleDirectory temp_schema_store_dir(
+      filesystem_, std::move(temp_schema_store_dir_path));
+  if (!temp_schema_store_dir.is_valid()) {
+    return absl_ports::InternalError(
+        "Unable to create temp directory to build new schema store.");
+  }
+
+  // Then we create our new schema store with the new schema.
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<SchemaStore> new_schema_store,
+      SchemaStore::Create(filesystem_, temp_schema_store_dir.dir(), clock_,
+                          std::move(new_schema)));
+
+  // Then we swap the new schema file + new derived files with the old files.
+  if (!filesystem_->SwapFiles(base_dir_.c_str(),
+                              temp_schema_store_dir.dir().c_str())) {
+    return absl_ports::InternalError(
+        "Unable to apply new schema due to failed swap!");
+  }
+
+  std::string old_base_dir = std::move(base_dir_);
+  *this = std::move(*new_schema_store);
+
+  // After the std::move, the filepaths saved in this instance and in the
+  // schema_file_ instance will still be the one from temp_schema_store_dir
+  // even though they now point to files that are within old_base_dir.
+  // Manually set them to the correct paths.
+  base_dir_ = std::move(old_base_dir);
+  schema_file_->SetSwappedFilepath(MakeSchemaFilename(base_dir_));
+  if (overlay_schema_file_ != nullptr) {
+    overlay_schema_file_->SetSwappedFilepath(
+        MakeOverlaySchemaFilename(base_dir_));
   }
 
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<const SchemaTypeConfigProto*>
+SchemaStore::GetSchemaTypeConfig(std::string_view schema_type) const {
+  ICING_RETURN_IF_ERROR(CheckSchemaSet());
   const auto& type_config_iter =
       type_config_map_.find(std::string(schema_type));
   if (type_config_iter == type_config_map_.end()) {
@@ -401,44 +776,211 @@ SchemaStore::GetSchemaTypeConfig(std::string_view schema_type) const {
 
 libtextclassifier3::StatusOr<SchemaTypeId> SchemaStore::GetSchemaTypeId(
     std::string_view schema_type) const {
+  ICING_RETURN_IF_ERROR(CheckSchemaSet());
   return schema_type_mapper_->Get(schema_type);
 }
 
-libtextclassifier3::StatusOr<std::vector<std::string>>
-SchemaStore::GetSectionContent(const DocumentProto& document,
-                               std::string_view section_path) const {
-  return section_manager_->GetSectionContent(document, section_path);
+libtextclassifier3::StatusOr<const std::string*> SchemaStore::GetSchemaType(
+      SchemaTypeId schema_type_id) const {
+  ICING_RETURN_IF_ERROR(CheckSchemaSet());
+  if (const auto it = reverse_schema_type_mapper_.find(schema_type_id);
+      it == reverse_schema_type_mapper_.end()) {
+    return absl_ports::InvalidArgumentError("Invalid schema type id");
+  } else {
+    return &it->second;
+  }
 }
 
-libtextclassifier3::StatusOr<std::vector<std::string>>
-SchemaStore::GetSectionContent(const DocumentProto& document,
-                               SectionId section_id) const {
-  return section_manager_->GetSectionContent(document, section_id);
+libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*>
+SchemaStore::GetSchemaTypeIdsWithChildren(std::string_view schema_type) const {
+  ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+                         GetSchemaTypeId(schema_type));
+  auto iter = schema_subtype_id_map_.find(schema_type_id);
+  if (iter == schema_subtype_id_map_.end()) {
+    // This should never happen, unless there is an inconsistency or IO error.
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Schema type '", schema_type, "' is not found in the subtype map."));
+  }
+  return &iter->second;
 }
 
 libtextclassifier3::StatusOr<const SectionMetadata*>
 SchemaStore::GetSectionMetadata(SchemaTypeId schema_type_id,
                                 SectionId section_id) const {
-  return section_manager_->GetSectionMetadata(schema_type_id, section_id);
+  ICING_RETURN_IF_ERROR(CheckSchemaSet());
+  return schema_type_manager_->section_manager().GetSectionMetadata(
+      schema_type_id, section_id);
 }
 
-libtextclassifier3::StatusOr<std::vector<Section>> SchemaStore::ExtractSections(
+libtextclassifier3::StatusOr<SectionGroup> SchemaStore::ExtractSections(
     const DocumentProto& document) const {
-  return section_manager_->ExtractSections(document);
+  ICING_RETURN_IF_ERROR(CheckSchemaSet());
+  return schema_type_manager_->section_manager().ExtractSections(document);
+}
+
+libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+SchemaStore::GetJoinablePropertyMetadata(
+    SchemaTypeId schema_type_id, const std::string& property_path) const {
+  ICING_RETURN_IF_ERROR(CheckSchemaSet());
+  return schema_type_manager_->joinable_property_manager()
+      .GetJoinablePropertyMetadata(schema_type_id, property_path);
+}
+
+libtextclassifier3::StatusOr<JoinablePropertyGroup>
+SchemaStore::ExtractJoinableProperties(const DocumentProto& document) const {
+  ICING_RETURN_IF_ERROR(CheckSchemaSet());
+  return schema_type_manager_->joinable_property_manager()
+      .ExtractJoinableProperties(document);
 }
 
 libtextclassifier3::Status SchemaStore::PersistToDisk() {
-  if (schema_type_mapper_ != nullptr) {
-    // It's possible we haven't had a schema set yet, so SchemaTypeMapper hasn't
-    // been initialized and is still a nullptr
-    ICING_RETURN_IF_ERROR(schema_type_mapper_->PersistToDisk());
+  if (!has_schema_successfully_set_) {
+    return libtextclassifier3::Status::OK;
   }
-
+  ICING_RETURN_IF_ERROR(schema_type_mapper_->PersistToDisk());
   // Write the header
   ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
-  ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
+  header_->set_checksum(checksum.Get());
+  return header_->Write(filesystem_, MakeHeaderFilename(base_dir_));
+}
 
-  return libtextclassifier3::Status::OK;
+SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const {
+  SchemaStoreStorageInfoProto storage_info;
+  int64_t directory_size = filesystem_->GetDiskUsage(base_dir_.c_str());
+  storage_info.set_schema_store_size(
+      Filesystem::SanitizeFileSize(directory_size));
+  ICING_ASSIGN_OR_RETURN(const SchemaProto* schema, GetSchema(), storage_info);
+  storage_info.set_num_schema_types(schema->types_size());
+  int total_sections = 0;
+  int num_types_sections_exhausted = 0;
+  for (const SchemaTypeConfigProto& type : schema->types()) {
+    auto sections_list_or =
+        schema_type_manager_->section_manager().GetMetadataList(
+            type.schema_type());
+    if (!sections_list_or.ok()) {
+      continue;
+    }
+    total_sections += sections_list_or.ValueOrDie()->size();
+    if (sections_list_or.ValueOrDie()->size() == kTotalNumSections) {
+      ++num_types_sections_exhausted;
+    }
+  }
+
+  storage_info.set_num_total_sections(total_sections);
+  storage_info.set_num_schema_types_sections_exhausted(
+      num_types_sections_exhausted);
+  return storage_info;
+}
+
+libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
+SchemaStore::GetSectionMetadata(const std::string& schema_type) const {
+  return schema_type_manager_->section_manager().GetMetadataList(schema_type);
+}
+
+bool SchemaStore::IsPropertyDefinedInSchema(
+    SchemaTypeId schema_type_id, const std::string& property_path) const {
+  auto schema_name_itr = reverse_schema_type_mapper_.find(schema_type_id);
+  if (schema_name_itr == reverse_schema_type_mapper_.end()) {
+    return false;
+  }
+  const std::string* current_type_name = &schema_name_itr->second;
+
+  std::vector<std::string_view> property_path_parts =
+      property_util::SplitPropertyPathExpr(property_path);
+  for (int i = 0; i < property_path_parts.size(); ++i) {
+    auto type_config_itr = type_config_map_.find(*current_type_name);
+    if (type_config_itr == type_config_map_.end()) {
+      return false;
+    }
+    std::string_view property_name = property_path_parts.at(i);
+    const PropertyConfigProto* selected_property = nullptr;
+    for (const PropertyConfigProto& property :
+         type_config_itr->second.properties()) {
+      if (property.property_name() == property_name) {
+        selected_property = &property;
+        break;
+      }
+    }
+    if (selected_property == nullptr) {
+      return false;
+    }
+    if (i == property_path_parts.size() - 1) {
+      // We've found a property at the final part of the path.
+      return true;
+    }
+    if (selected_property->data_type() !=
+        PropertyConfigProto::DataType::DOCUMENT) {
+      // If this isn't final part of the path, but this property isn't a
+      // document, so we know that this path doesn't exist.
+      return false;
+    }
+    current_type_name = &selected_property->schema_type();
+  }
+
+  // We should never reach this point.
+  return false;
+}
+
+libtextclassifier3::StatusOr<SchemaDebugInfoProto> SchemaStore::GetDebugInfo()
+    const {
+  SchemaDebugInfoProto debug_info;
+  if (has_schema_successfully_set_) {
+    ICING_ASSIGN_OR_RETURN(const SchemaProto* schema, GetSchema());
+    *debug_info.mutable_schema() = *schema;
+  }
+  ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum());
+  debug_info.set_crc(crc.Get());
+  return debug_info;
+}
+
+std::vector<SchemaStore::ExpandedTypePropertyMask>
+SchemaStore::ExpandTypePropertyMasks(
+    const google::protobuf::RepeatedPtrField<TypePropertyMask>& type_property_masks)
+    const {
+  std::unordered_map<SchemaTypeId, ExpandedTypePropertyMask> result_map;
+  for (const TypePropertyMask& type_field_mask : type_property_masks) {
+    if (type_field_mask.schema_type() == kSchemaTypeWildcard) {
+      ExpandedTypePropertyMask entry{type_field_mask.schema_type(),
+                                     /*paths=*/{}};
+      entry.paths.insert(type_field_mask.paths().begin(),
+                         type_field_mask.paths().end());
+      result_map.insert({kInvalidSchemaTypeId, std::move(entry)});
+    } else {
+      auto schema_type_ids_or =
+          GetSchemaTypeIdsWithChildren(type_field_mask.schema_type());
+      // If we can't find the SchemaTypeIds, just throw it away
+      if (!schema_type_ids_or.ok()) {
+        continue;
+      }
+      const std::unordered_set<SchemaTypeId>* schema_type_ids =
+          schema_type_ids_or.ValueOrDie();
+      for (SchemaTypeId schema_type_id : *schema_type_ids) {
+        auto schema_type_name_iter =
+            reverse_schema_type_mapper_.find(schema_type_id);
+        if (schema_type_name_iter == reverse_schema_type_mapper_.end()) {
+          // This should never happen, unless there is an inconsistency or IO
+          // error.
+          ICING_LOG(ERROR) << "Got unknown schema type id: " << schema_type_id;
+          continue;
+        }
+
+        auto iter = result_map.find(schema_type_id);
+        if (iter == result_map.end()) {
+          ExpandedTypePropertyMask entry{schema_type_name_iter->second,
+                                         /*paths=*/{}};
+          iter = result_map.insert({schema_type_id, std::move(entry)}).first;
+        }
+        iter->second.paths.insert(type_field_mask.paths().begin(),
+                                  type_field_mask.paths().end());
+      }
+    }
+  }
+  std::vector<ExpandedTypePropertyMask> result;
+  result.reserve(result_map.size());
+  for (auto& entry : result_map) {
+    result.push_back(std::move(entry.second));
+  }
+  return result;
 }
 
 }  // namespace lib
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index f5c6588..88968b1 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -16,23 +16,34 @@
 #define ICING_SCHEMA_SCHEMA_STORE_H_
 
 #include <cstdint>
+#include <cstring>
+#include <limits>
 #include <memory>
 #include <string>
 #include <string_view>
+#include <unordered_map>
 #include <unordered_set>
 #include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
 #include "icing/file/file-backed-proto.h"
 #include "icing/file/filesystem.h"
+#include "icing/file/version-util.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/document.pb.h"
+#include "icing/proto/logging.pb.h"
 #include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-type-manager.h"
 #include "icing/schema/schema-util.h"
-#include "icing/schema/section-manager.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/key-mapper.h"
+#include "icing/util/clock.h"
 #include "icing/util/crc32.h"
 
 namespace icing {
@@ -45,9 +56,7 @@ namespace lib {
 // should always call Get* from the SchemaStore.
 class SchemaStore {
  public:
-  struct Header {
-    static constexpr int32_t kMagic = 0x72650d0a;
-
+  struct LegacyHeader {
     // Holds the magic as a quick sanity check against file corruption.
     int32_t magic;
 
@@ -55,6 +64,69 @@ class SchemaStore {
     uint32_t checksum;
   };
 
+  class Header {
+   public:
+    static constexpr int32_t kMagic = 0x72650d0a;
+
+    explicit Header()
+        : magic_(kMagic),
+          checksum_(0),
+          overlay_created_(false),
+          min_overlay_version_compatibility_(
+              std::numeric_limits<int32_t>::max()) {
+      memset(overlay_created_padding_, 0, kOverlayCreatedPaddingSize);
+      memset(padding_, 0, kPaddingSize);
+    }
+
+    // RETURNS:
+    //   - On success, a valid Header instance
+    //   - NOT_FOUND if header file doesn't exist
+    //   - INTERNAL if unable to read header
+    static libtextclassifier3::StatusOr<Header> Read(
+        const Filesystem* filesystem, const std::string& path);
+
+    libtextclassifier3::Status Write(const Filesystem* filesystem,
+                                     const std::string& path);
+
+    int32_t magic() const { return magic_; }
+
+    uint32_t checksum() const { return checksum_; }
+    void set_checksum(uint32_t checksum) { checksum_ = checksum; }
+
+    bool overlay_created() const { return overlay_created_; }
+
+    int32_t min_overlay_version_compatibility() const {
+      return min_overlay_version_compatibility_;
+    }
+
+    void SetOverlayInfo(bool overlay_created,
+                        int32_t min_overlay_version_compatibility) {
+      overlay_created_ = overlay_created;
+      min_overlay_version_compatibility_ = min_overlay_version_compatibility;
+    }
+
+   private:
+    // Holds the magic as a quick sanity check against file corruption.
+    int32_t magic_;
+
+    // Checksum of the SchemaStore's sub-component's checksums.
+    uint32_t checksum_;
+
+    bool overlay_created_;
+    // Three bytes of padding due to the fact that
+    // min_overlay_version_compatibility_ has an alignof() == 4 and the offset
+    // of overlay_created_padding_ == 9.
+    static constexpr int kOverlayCreatedPaddingSize = 3;
+    uint8_t overlay_created_padding_[kOverlayCreatedPaddingSize];
+
+    int32_t min_overlay_version_compatibility_;
+
+    static constexpr int kPaddingSize = 1008;
+    // Padding exists just to reserve space for additional values.
+    uint8_t padding_[kPaddingSize];
+  };
+  static_assert(sizeof(Header) == 1024);
+
   // Holds information on what may have been affected by the new schema. This is
   // generally data that other classes may depend on from the SchemaStore,
   // so that we can know if we should go update those classes as well.
@@ -65,9 +137,6 @@ class SchemaStore {
     // to file.
     bool success = false;
 
-    // Whether the new schema changes invalidate the index.
-    bool index_incompatible = false;
-
     // SchemaTypeIds of schema types can be reassigned new SchemaTypeIds if:
     //   1. Schema types are added in the middle of the SchemaProto
     //   2. Schema types are removed from the middle of the SchemaProto
@@ -97,30 +166,78 @@ class SchemaStore {
     // SchemaUtil::ComputeCompatibilityDelta. Represented by the SchemaTypeId
     // assigned to this SchemaTypeConfigProto in the *old* schema.
     std::unordered_set<SchemaTypeId> schema_types_incompatible_by_id;
+
+    // Schema types that were added in the new schema. Represented by the
+    // `schema_type` field in the SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_new_by_name;
+
+    // Schema types that were changed in a way that was backwards compatible and
+    // didn't invalidate the index. Represented by the `schema_type` field in
+    // the SchemaTypeConfigProto.
+    std::unordered_set<std::string>
+        schema_types_changed_fully_compatible_by_name;
+
+    // Schema types that were changed in a way that was backwards compatible,
+    // but invalidated the index. Represented by the `schema_type` field in the
+    // SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_index_incompatible_by_name;
+
+    // Schema types that were changed in a way that was backwards compatible,
+    // but invalidated the joinable cache. Represented by the `schema_type`
+    // field in the SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_join_incompatible_by_name;
   };
 
+  struct ExpandedTypePropertyMask {
+    std::string schema_type;
+    std::unordered_set<std::string> paths;
+  };
+
+  static constexpr std::string_view kSchemaTypeWildcard = "*";
+
   // Factory function to create a SchemaStore which does not take ownership
   // of any input components, and all pointers must refer to valid objects that
   // outlive the created SchemaStore instance. The base_dir must already exist.
   // There does not need to be an existing schema already.
   //
+  // If initialize_stats is present, the fields related to SchemaStore will be
+  // populated.
+  //
   // Returns:
   //   A SchemaStore on success
   //   FAILED_PRECONDITION on any null pointer input
   //   INTERNAL_ERROR on any IO errors
   static libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> Create(
+      const Filesystem* filesystem, const std::string& base_dir,
+      const Clock* clock, InitializeStatsProto* initialize_stats = nullptr);
+
+  // Migrates schema files (backup v.s. new schema) according to version state
+  // change.
+  //
+  // Returns:
+  //   OK on success or nothing to migrate
+  static libtextclassifier3::Status MigrateSchema(
+      const Filesystem* filesystem, const std::string& base_dir,
+      version_util::StateChange version_state_change, int32_t new_version);
+
+  // Discards all derived data in the schema store.
+  //
+  // Returns:
+  //   OK on success or nothing to discard
+  //   INTERNAL_ERROR on any I/O errors
+  static libtextclassifier3::Status DiscardDerivedFiles(
       const Filesystem* filesystem, const std::string& base_dir);
 
-  // Not copyable
+  SchemaStore(SchemaStore&&) = default;
+  SchemaStore& operator=(SchemaStore&&) = default;
+
   SchemaStore(const SchemaStore&) = delete;
   SchemaStore& operator=(const SchemaStore&) = delete;
 
   // Persists and updates checksum of subcomponents.
   ~SchemaStore();
 
-  // Retrieve the current schema if it exists. Caller does not get ownership of
-  // the schema proto and modifying the returned pointer does not affect the
-  // underlying schema proto.
+  // Retrieve the current schema if it exists.
   //
   // Returns:
   //   SchemaProto* if exists
@@ -142,10 +259,12 @@ class SchemaStore {
   //   INTERNAL_ERROR on any IO errors
   libtextclassifier3::StatusOr<const SetSchemaResult> SetSchema(
       const SchemaProto& new_schema,
-      bool ignore_errors_and_delete_documents = false);
+      bool ignore_errors_and_delete_documents,
+      bool allow_circular_schema_definitions);
   libtextclassifier3::StatusOr<const SetSchemaResult> SetSchema(
       SchemaProto&& new_schema,
-      bool ignore_errors_and_delete_documents = false);
+      bool ignore_errors_and_delete_documents,
+      bool allow_circular_schema_definitions);
 
   // Get the SchemaTypeConfigProto of schema_type name.
   //
@@ -157,52 +276,94 @@ class SchemaStore {
   libtextclassifier3::StatusOr<const SchemaTypeConfigProto*>
   GetSchemaTypeConfig(std::string_view schema_type) const;
 
+  // Returns the schema type of the passed in SchemaTypeId
+  //
+  // Returns:
+  //   schema type on success
+  //   FAILED_PRECONDITION if schema hasn't been set yet
+  //   INVALID_ARGUMENT if schema type id is invalid
+  libtextclassifier3::StatusOr<const std::string*> GetSchemaType(
+      SchemaTypeId schema_type_id) const;
+
   // Returns the SchemaTypeId of the passed in schema type
   //
   // Returns:
   //   SchemaTypeId on success
+  //   FAILED_PRECONDITION if schema hasn't been set yet
   //   NOT_FOUND_ERROR if we don't know about the schema type
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::StatusOr<SchemaTypeId> GetSchemaTypeId(
       std::string_view schema_type) const;
 
-  // Finds content of a section by section path (e.g. property1.property2)
+  // Similar to GetSchemaTypeId but will return a set of SchemaTypeId to also
+  // include child types.
   //
   // Returns:
-  //   A string of content on success
-  //   NOT_FOUND if:
-  //     1. Property is optional and not found in the document
-  //     2. section_path is invalid
-  //     3. Content is empty
-  libtextclassifier3::StatusOr<std::vector<std::string>> GetSectionContent(
-      const DocumentProto& document, std::string_view section_path) const;
-
-  // Finds content of a section by id
-  //
-  // Returns:
-  //   A string of content on success
-  //   INVALID_ARGUMENT if section id is invalid
-  //   NOT_FOUND if type config name of document not found
-  libtextclassifier3::StatusOr<std::vector<std::string>> GetSectionContent(
-      const DocumentProto& document, SectionId section_id) const;
+  //   A set of SchemaTypeId on success
+  //   FAILED_PRECONDITION if schema hasn't been set yet
+  //   NOT_FOUND_ERROR if we don't know about the schema type
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*>
+  GetSchemaTypeIdsWithChildren(std::string_view schema_type) const;
 
   // Returns the SectionMetadata associated with the SectionId that's in the
   // SchemaTypeId.
   //
   // Returns:
-  //   pointer to SectionMetadata on success
-  //   INVALID_ARGUMENT if schema type id or section is invalid
+  //   Valid pointer to SectionMetadata on success
+  //   FAILED_PRECONDITION if schema hasn't been set yet
+  //   INVALID_ARGUMENT if schema type id or section id is invalid
   libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata(
       SchemaTypeId schema_type_id, SectionId section_id) const;
 
-  // Extracts all sections from the given document, sections are sorted by
-  // section id in increasing order. Section ids start from 0. Sections with
-  // empty content won't be returned.
+  // Returns true if a property is defined in the said schema, regardless of
+  // whether it is indexed or not.
+  bool IsPropertyDefinedInSchema(SchemaTypeId schema_type_id,
+                                 const std::string& property) const;
+
+  // Extracts all sections of different types from the given document and group
+  // them by type.
+  // - Each Section vector is sorted by section Id in ascending order. The
+  //   sorted section ids may not be continuous, since not all sections are
+  //   present in the document.
+  // - Sections with empty content won't be returned.
+  // - For example, we may extract:
+  //   string_sections: [2, 7, 10]
+  //   integer_sections: [3, 5, 8]
   //
   // Returns:
-  //   A list of sections on success
+  //   A SectionGroup instance on success
+  //   FAILED_PRECONDITION if schema hasn't been set yet
   //   NOT_FOUND if type config name of document not found
-  libtextclassifier3::StatusOr<std::vector<Section>> ExtractSections(
+  libtextclassifier3::StatusOr<SectionGroup> ExtractSections(
+      const DocumentProto& document) const;
+
+  // Returns the JoinablePropertyMetadata associated with property_path that's
+  // in the SchemaTypeId.
+  //
+  // Returns:
+  //   Valid pointer to JoinablePropertyMetadata on success
+  //   nullptr if property_path doesn't exist (or is not joinable) in the
+  //     joinable metadata list of the schema
+  //   FAILED_PRECONDITION if schema hasn't been set yet
+  //   INVALID_ARGUMENT if schema type id is invalid
+  libtextclassifier3::StatusOr<const JoinablePropertyMetadata*>
+  GetJoinablePropertyMetadata(SchemaTypeId schema_type_id,
+                              const std::string& property_path) const;
+
+  // Extracts all joinable property contents of different types from the given
+  // document and group them by joinable value type.
+  // - Joinable properties are sorted by joinable property id in ascending
+  //   order. The sorted joinable property ids may not be continuous, since not
+  //   all joinable properties are present in the document.
+  // - Joinable property ids start from 0.
+  // - Joinable properties with empty content won't be returned.
+  //
+  // Returns:
+  //   A JoinablePropertyGroup instance on success
+  //   FAILED_PRECONDITION if schema hasn't been set yet
+  //   NOT_FOUND if the type config name of document not found
+  libtextclassifier3::StatusOr<JoinablePropertyGroup> ExtractJoinableProperties(
       const DocumentProto& document) const;
 
   // Syncs all the data changes to disk.
@@ -220,16 +381,95 @@ class SchemaStore {
   //   INTERNAL_ERROR on compute error
   libtextclassifier3::StatusOr<Crc32> ComputeChecksum() const;
 
+  // Returns:
+  //   - On success, the section metadata list for the specified schema type
+  //   - NOT_FOUND if the schema type is not present in the schema
+  libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
+  GetSectionMetadata(const std::string& schema_type) const;
+
+  // Calculates the StorageInfo for the Schema Store.
+  //
+  // If an IO error occurs while trying to calculate the value for a field, then
+  // that field will be set to -1.
+  SchemaStoreStorageInfoProto GetStorageInfo() const;
+
+  // Get debug information for the schema store.
+  //
+  // Returns:
+  //   SchemaDebugInfoProto on success
+  //   INTERNAL_ERROR on IO errors, crc compute error
+  libtextclassifier3::StatusOr<SchemaDebugInfoProto> GetDebugInfo() const;
+
+  // Expands the provided type_property_masks into a vector of
+  // ExpandedTypePropertyMasks to account for polymorphism. If both a parent
+  // type and one of its child type appears in the masks, the parent type's
+  // paths will be merged into the child's.
+  //
+  // For example, assume that we have two schema types A and B, and we have
+  // - A is the parent type of B
+  // - Paths of A: {P1, P2}
+  // - Paths of B: {P3}
+  //
+  // Then, we will have the following in the result.
+  // - Expanded paths of A: {P1, P2}
+  // - Expanded paths of B: {P1, P2, P3}
+  std::vector<ExpandedTypePropertyMask> ExpandTypePropertyMasks(
+      const google::protobuf::RepeatedPtrField<TypePropertyMask>& type_property_masks)
+      const;
+
  private:
+  // Factory function to create a SchemaStore and set its schema. The created
+  // instance does not take ownership of any input components and all pointers
+  // must refer to valid objects that outlive the created SchemaStore instance.
+  // The base_dir must already exist. No schema must have set in base_dir prior
+  // to this.
+  //
+  // Returns:
+  //   A SchemaStore on success
+  //   FAILED_PRECONDITION on any null pointer input or if there has already
+  //       been a schema set for this path.
+  //   INTERNAL_ERROR on any IO errors
+  static libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> Create(
+      const Filesystem* filesystem, const std::string& base_dir,
+      const Clock* clock, SchemaProto schema);
+
   // Use SchemaStore::Create instead.
-  explicit SchemaStore(const Filesystem* filesystem, std::string base_dir);
+  explicit SchemaStore(const Filesystem* filesystem, std::string base_dir,
+                       const Clock* clock);
+
+  // Deletes the overlay schema and ensures that the Header is correctly set.
+  //
+  // RETURNS:
+  //   OK on success
+  //   INTERNAL_ERROR on any IO errors
+  static libtextclassifier3::Status DiscardOverlaySchema(
+      const Filesystem* filesystem, const std::string& base_dir,
+      Header& header);
+
+  // Verifies that there is no error retrieving a previously set schema. Then
+  // initializes like normal.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status Initialize(InitializeStatsProto* initialize_stats);
+
+  // First, blindly writes new_schema to the schema_file. Then initializes like
+  // normal.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  //   FAILED_PRECONDITION if there is already a schema set for the schema_file.
+  libtextclassifier3::Status Initialize(SchemaProto new_schema);
 
   // Handles initializing the SchemaStore and regenerating any data if needed.
   //
   // Returns:
   //   OK on success
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status Initialize();
+  libtextclassifier3::Status InitializeInternal(
+      bool create_overlay_if_necessary, InitializeStatsProto* initialize_stats);
 
   // Creates sub-components and verifies the integrity of each sub-component.
   //
@@ -244,11 +484,16 @@ class SchemaStore {
   //   OK on success
   //   NOT_FOUND_ERROR if a schema proto has not been set
   //   INTERNAL_ERROR on any IO errors
-  libtextclassifier3::Status RegenerateDerivedFiles();
+  libtextclassifier3::Status RegenerateDerivedFiles(
+      bool create_overlay_if_necessary);
 
-  // Checks if the header exists already. This does not create the header file
-  // if it doesn't exist.
-  bool HeaderExists();
+  // Build type_config_map_, schema_subtype_id_map_, and schema_type_manager_.
+  //
+  // Returns:
+  //   OK on success
+  //   NOT_FOUND_ERROR if a schema proto has not been set
+  //   INTERNAL_ERROR on any IO errors
+  libtextclassifier3::Status BuildInMemoryCache();
 
   // Update and replace the header file. Creates the header file if it doesn't
   // exist.
@@ -265,29 +510,74 @@ class SchemaStore {
   // Returns any IO errors.
   libtextclassifier3::Status ResetSchemaTypeMapper();
 
-  const Filesystem& filesystem_;
-  const std::string base_dir_;
-
-  // Used internally to indicate whether the class has been initialized. This is
-  // to guard against cases where the object has been created, but Initialize
-  // fails in the constructor. If we have successfully exited the constructor,
-  // then this field can be ignored. Clients of SchemaStore should not need to
-  // worry about this field.
-  bool initialized_ = false;
+  // Creates a new schema store with new_schema and then swaps that new schema
+  // store with the existing one. This function guarantees that either: this
+  // instance will be fully updated to the new schema or no changes will take
+  // effect.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL on I/O error.
+  libtextclassifier3::Status ApplySchemaChange(SchemaProto new_schema);
+
+  libtextclassifier3::Status CheckSchemaSet() const {
+    return has_schema_successfully_set_
+               ? libtextclassifier3::Status::OK
+               : absl_ports::FailedPreconditionError("Schema not set yet.");
+  }
+
+  // Correctly loads the Header, schema_file_ and (if present) the
+  // overlay_schema_file_.
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL if an IO error is encountered when reading the Header or
+  //   schemas.
+  //     Or an invalid schema configuration is present.
+  libtextclassifier3::Status LoadSchema();
+
+  const Filesystem* filesystem_;
+  std::string base_dir_;
+  const Clock* clock_;
+
+  // Used internally to indicate whether the class has been successfully
+  // initialized with a valid schema. Will be false if Initialize failed or no
+  // schema has ever been set.
+  bool has_schema_successfully_set_ = false;
 
   // Cached schema
-  FileBackedProto<SchemaProto> schema_file_;
+  std::unique_ptr<FileBackedProto<SchemaProto>> schema_file_;
+
+  // This schema holds the definition of any schema types that are not
+  // compatible with older versions of Icing code.
+  std::unique_ptr<FileBackedProto<SchemaProto>> overlay_schema_file_;
+
+  // Maps schema types to a densely-assigned unique id.
+  std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
+
+  // Maps schema type ids to the corresponding schema type. This is an inverse
+  // map of schema_type_mapper_.
+  std::unordered_map<SchemaTypeId, std::string> reverse_schema_type_mapper_;
 
   // A hash map of (type config name -> type config), allows faster lookup of
   // type config in schema. The O(1) type config access makes schema-related and
   // section-related operations faster.
   SchemaUtil::TypeConfigMap type_config_map_;
 
-  // Maps schema types to a densely-assigned unique id.
-  std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
-
-  // Manager of indexed section related metadata.
-  std::unique_ptr<const SectionManager> section_manager_;
+  // Maps from each type id to all of its subtype ids.
+  // T2 is a subtype of T1, if and only if one of the following conditions is
+  // met:
+  // - T2 is T1
+  // - T2 extends T1
+  // - There exists a type U, such that T2 is a subtype of U, and U is a subtype
+  //   of T1
+  std::unordered_map<SchemaTypeId, std::unordered_set<SchemaTypeId>>
+      schema_subtype_id_map_;
+
+  // Manager of section (indexable property) and joinable property related
+  // metadata for all Schemas.
+  std::unique_ptr<const SchemaTypeManager> schema_type_manager_;
+
+  std::unique_ptr<Header> header_;
 };
 
 }  // namespace lib
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index 957fd89..8cc7008 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -18,19 +18,30 @@
 #include <string>
 #include <vector>
 
+#include "icing/text_classifier/lib3/utils/base/status.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
+#include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/file/version-util.h"
 #include "icing/portable/equals-proto.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/document.pb.h"
+#include "icing/proto/logging.pb.h"
 #include "icing/proto/schema.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-util.h"
 #include "icing/schema/section-manager.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
 #include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
 
 namespace icing {
 namespace lib {
@@ -40,51 +51,166 @@ namespace {
 using ::icing::lib::portable_equals_proto::EqualsProto;
 using ::testing::ElementsAre;
 using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Gt;
+using ::testing::HasSubstr;
 using ::testing::Not;
 using ::testing::Pointee;
+using ::testing::Return;
+using ::testing::SizeIs;
+using ::testing::UnorderedElementsAre;
+
+constexpr int64_t kDefaultTimestamp = 12345678;
 
 class SchemaStoreTest : public ::testing::Test {
  protected:
-  SchemaStoreTest() : test_dir_(GetTestTempDir() + "/icing") {
-    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
-
-    auto type = schema_.add_types();
-    type->set_schema_type("email");
-
-    // Add an indexed property so we generate section metadata on it
-    auto property = type->add_properties();
-    property->set_property_name("subject");
-    property->set_data_type(PropertyConfigProto::DataType::STRING);
-    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    property->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    property->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/icing";
+    schema_store_dir_ = test_dir_ + "/schema_store";
+    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+    schema_ = SchemaBuilder()
+                  .AddType(SchemaTypeConfigBuilder()
+                               .SetType("email")
+                               .AddProperty(
+                                   // Add an indexed property so we generate
+                                   // section metadata on it
+                                   PropertyConfigBuilder()
+                                       .SetName("subject")
+                                       .SetDataTypeString(TERM_MATCH_EXACT,
+                                                          TOKENIZER_PLAIN)
+                                       .SetCardinality(CARDINALITY_OPTIONAL))
+                               .AddProperty(
+                                   PropertyConfigBuilder()
+                                       .SetName("timestamp")
+                                       .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                       .SetCardinality(CARDINALITY_OPTIONAL)))
+                  .Build();
   }
 
   void TearDown() override {
-    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+    // Check that the schema store directory is the *only* directory in the
+    // schema_store_dir_. IOW, ensure that all temporary directories have been
+    // properly cleaned up.
+    std::vector<std::string> sub_dirs;
+    ASSERT_TRUE(filesystem_.ListDirectory(test_dir_.c_str(), &sub_dirs));
+    ASSERT_THAT(sub_dirs, ElementsAre("schema_store"));
+
+    // Finally, clean everything up.
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
   }
 
-  const Filesystem filesystem_;
-  const std::string test_dir_;
+  Filesystem filesystem_;
+  std::string test_dir_;
+  std::string schema_store_dir_;
   SchemaProto schema_;
+  FakeClock fake_clock_;
 };
 
 TEST_F(SchemaStoreTest, CreationWithNullPointerShouldFail) {
-  EXPECT_THAT(SchemaStore::Create(/*filesystem=*/nullptr, test_dir_),
+  EXPECT_THAT(SchemaStore::Create(/*filesystem=*/nullptr, schema_store_dir_,
+                                  &fake_clock_),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
+TEST_F(SchemaStoreTest, SchemaStoreMoveConstructible) {
+  // Create an instance of SchemaStore.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("type_a").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop1")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 expected_checksum,
+                             schema_store->ComputeChecksum());
+
+  // Move construct an instance of SchemaStore
+  SchemaStore move_constructed_schema_store(std::move(*schema_store));
+  EXPECT_THAT(move_constructed_schema_store.GetSchema(),
+              IsOkAndHolds(Pointee(EqualsProto(schema))));
+  EXPECT_THAT(move_constructed_schema_store.ComputeChecksum(),
+              IsOkAndHolds(Eq(expected_checksum)));
+  SectionMetadata expected_metadata(/*id_in=*/0, TYPE_STRING, TOKENIZER_PLAIN,
+                                    TERM_MATCH_EXACT, NUMERIC_MATCH_UNKNOWN,
+                                    "prop1");
+  EXPECT_THAT(move_constructed_schema_store.GetSectionMetadata("type_a"),
+              IsOkAndHolds(Pointee(ElementsAre(expected_metadata))));
+}
+
+TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) {
+  // Create an instance of SchemaStore.
+  SchemaProto schema1 =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("type_a").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop1")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema1, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 expected_checksum,
+                             schema_store->ComputeChecksum());
+
+  // Construct another instance of SchemaStore
+  SchemaProto schema2 =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("type_b").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop2")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> move_assigned_schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema2, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Move assign the first instance into the second one.
+  *move_assigned_schema_store = std::move(*schema_store);
+  EXPECT_THAT(move_assigned_schema_store->GetSchema(),
+              IsOkAndHolds(Pointee(EqualsProto(schema1))));
+  EXPECT_THAT(move_assigned_schema_store->ComputeChecksum(),
+              IsOkAndHolds(Eq(expected_checksum)));
+  SectionMetadata expected_metadata(/*id_in=*/0, TYPE_STRING, TOKENIZER_PLAIN,
+                                    TERM_MATCH_EXACT, NUMERIC_MATCH_UNKNOWN,
+                                    "prop1");
+  EXPECT_THAT(move_assigned_schema_store->GetSectionMetadata("type_a"),
+              IsOkAndHolds(Pointee(ElementsAre(expected_metadata))));
+}
+
 TEST_F(SchemaStoreTest, CorruptSchemaError) {
   {
-    ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                               SchemaStore::Create(&filesystem_, test_dir_));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
     // Set it for the first time
     SchemaStore::SetSchemaResult result;
     result.success = true;
-    EXPECT_THAT(schema_store->SetSchema(schema_),
+    result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+    EXPECT_THAT(schema_store->SetSchema(
+                    schema_, /*ignore_errors_and_delete_documents=*/false,
+                    /*allow_circular_schema_definitions=*/false),
                 IsOkAndHolds(EqualsSetSchemaResult(result)));
     ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
                                schema_store->GetSchema());
@@ -94,30 +220,37 @@ TEST_F(SchemaStoreTest, CorruptSchemaError) {
   // "Corrupt" the  ground truth schema by adding new data to it. This will mess
   // up the checksum of the schema store
 
-  SchemaProto corrupt_schema;
-  auto type = corrupt_schema.add_types();
-  type->set_schema_type("corrupted");
+  SchemaProto corrupt_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("corrupted"))
+          .Build();
 
-  const std::string schema_file = absl_ports::StrCat(test_dir_, "/schema.pb");
+  const std::string schema_file =
+      absl_ports::StrCat(schema_store_dir_, "/schema.pb");
   const std::string serialized_schema = corrupt_schema.SerializeAsString();
 
   filesystem_.Write(schema_file.c_str(), serialized_schema.data(),
                     serialized_schema.size());
 
   // If ground truth was corrupted, we won't know what to do
-  EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_),
-              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  EXPECT_THAT(
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+      StatusIs(libtextclassifier3::StatusCode::INTERNAL));
 }
 
 TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
   {
-    ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                               SchemaStore::Create(&filesystem_, test_dir_));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
     // Set it for the first time
     SchemaStore::SetSchemaResult result;
     result.success = true;
-    EXPECT_THAT(schema_store->SetSchema(schema_),
+    result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+    EXPECT_THAT(schema_store->SetSchema(
+                    schema_, /*ignore_errors_and_delete_documents=*/false,
+                    /*allow_circular_schema_definitions=*/false),
                 IsOkAndHolds(EqualsSetSchemaResult(result)));
     ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
                                schema_store->GetSchema());
@@ -131,11 +264,59 @@ TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
   // regenerated from ground truth
 
   const std::string schema_type_mapper_dir =
-      absl_ports::StrCat(test_dir_, "/schema_type_mapper");
+      absl_ports::StrCat(schema_store_dir_, "/schema_type_mapper");
   filesystem_.DeleteDirectoryRecursively(schema_type_mapper_dir.c_str());
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  InitializeStatsProto initialize_stats;
+  fake_clock_.SetTimerElapsedMilliseconds(123);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_,
+                          &initialize_stats));
+  EXPECT_THAT(initialize_stats.schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::IO_ERROR));
+  EXPECT_THAT(initialize_stats.schema_store_recovery_latency_ms(), Eq(123));
+
+  // Everything looks fine, ground truth and derived data
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+  EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+}
+
+TEST_F(SchemaStoreTest, RecoverDiscardDerivedFilesOk) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+    // Set it for the first time
+    SchemaStore::SetSchemaResult result;
+    result.success = true;
+    result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+    EXPECT_THAT(schema_store->SetSchema(
+                    schema_, /*ignore_errors_and_delete_documents=*/false,
+                    /*allow_circular_schema_definitions=*/false),
+                IsOkAndHolds(EqualsSetSchemaResult(result)));
+    ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                               schema_store->GetSchema());
+    EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+
+    EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+  }
+
+  ICING_ASSERT_OK(
+      SchemaStore::DiscardDerivedFiles(&filesystem_, schema_store_dir_));
+
+  InitializeStatsProto initialize_stats;
+  fake_clock_.SetTimerElapsedMilliseconds(123);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_,
+                          &initialize_stats));
+  EXPECT_THAT(initialize_stats.schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::IO_ERROR));
+  EXPECT_THAT(initialize_stats.schema_store_recovery_latency_ms(), Eq(123));
 
   // Everything looks fine, ground truth and derived data
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -146,13 +327,17 @@ TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
 
 TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
   {
-    ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                               SchemaStore::Create(&filesystem_, test_dir_));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
     // Set it for the first time
     SchemaStore::SetSchemaResult result;
     result.success = true;
-    EXPECT_THAT(schema_store->SetSchema(schema_),
+    result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+    EXPECT_THAT(schema_store->SetSchema(
+                    schema_, /*ignore_errors_and_delete_documents=*/false,
+                    /*allow_circular_schema_definitions=*/false),
                 IsOkAndHolds(EqualsSetSchemaResult(result)));
     ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
                                schema_store->GetSchema());
@@ -165,15 +350,16 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
   // the recalculated checksum on initialization. This will force a regeneration
   // of derived files from ground truth.
   const std::string header_file =
-      absl_ports::StrCat(test_dir_, "/schema_store_header");
-  SchemaStore::Header header;
+      absl_ports::StrCat(schema_store_dir_, "/schema_store_header");
+  SchemaStore::LegacyHeader header;
   header.magic = SchemaStore::Header::kMagic;
   header.checksum = 10;  // Arbitrary garbage checksum
   filesystem_.DeleteFile(header_file.c_str());
   filesystem_.Write(header_file.c_str(), &header, sizeof(header));
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
   // Everything looks fine, ground truth and derived data
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -183,70 +369,125 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
 }
 
 TEST_F(SchemaStoreTest, CreateNoPreviousSchemaOk) {
-  EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  // The apis to retrieve information about the schema should fail gracefully.
+  EXPECT_THAT(store->GetSchema(),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(store->GetSchemaTypeConfig("foo"),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(store->GetSchemaTypeId("foo"),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(store->GetSectionMetadata(/*schema_type_id=*/0, /*section_id=*/0),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(store->GetJoinablePropertyMetadata(/*schema_type_id=*/0,
+                                                 /*property_path=*/"A"),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  // The apis to extract content from a document should fail gracefully.
+  DocumentProto doc;
+  PropertyProto* prop = doc.add_properties();
+  prop->set_name("name");
+  prop->add_string_values("foo bar baz");
+
+  EXPECT_THAT(store->ExtractSections(doc),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(store->ExtractJoinableProperties(doc),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  // The apis to persist and checksum data should succeed.
+  EXPECT_THAT(store->ComputeChecksum(), IsOkAndHolds(Crc32()));
+  EXPECT_THAT(store->PersistToDisk(), IsOk());
 }
 
 TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  EXPECT_THAT(schema_store->SetSchema(schema_),
+  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema_, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
 
   schema_store.reset();
-  EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_), IsOk());
+  EXPECT_THAT(
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+      IsOk());
 }
 
 TEST_F(SchemaStoreTest, MultipleCreateOk) {
   DocumentProto document;
   document.set_schema("email");
-  auto properties = document.add_properties();
-  properties->set_name("subject");
-  properties->add_string_values("subject_content");
+  auto subject_property = document.add_properties();
+  subject_property->set_name("subject");
+  subject_property->add_string_values("subject_content");
+  auto timestamp_property = document.add_properties();
+  timestamp_property->set_name("timestamp");
+  timestamp_property->add_int64_values(kDefaultTimestamp);
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  EXPECT_THAT(schema_store->SetSchema(schema_),
+  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema_, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
 
   // Verify that our in-memory structures are ok
   EXPECT_THAT(schema_store->GetSchemaTypeConfig("email"),
               IsOkAndHolds(Pointee(EqualsProto(schema_.types(0)))));
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Section> sections,
+  ICING_ASSERT_OK_AND_ASSIGN(SectionGroup section_group,
                              schema_store->ExtractSections(document));
-  EXPECT_THAT(sections[0].content, ElementsAre("subject_content"));
+  EXPECT_THAT(section_group.string_sections[0].content,
+              ElementsAre("subject_content"));
+  EXPECT_THAT(section_group.integer_sections[0].content,
+              ElementsAre(kDefaultTimestamp));
 
   // Verify that our persisted data is ok
   EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
 
   schema_store.reset();
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
   // Verify that our in-memory structures are ok
   EXPECT_THAT(schema_store->GetSchemaTypeConfig("email"),
               IsOkAndHolds(Pointee(EqualsProto(schema_.types(0)))));
 
-  ICING_ASSERT_OK_AND_ASSIGN(sections, schema_store->ExtractSections(document));
-  EXPECT_THAT(sections[0].content, ElementsAre("subject_content"));
+  ICING_ASSERT_OK_AND_ASSIGN(section_group,
+                             schema_store->ExtractSections(document));
+  EXPECT_THAT(section_group.string_sections[0].content,
+              ElementsAre("subject_content"));
+  EXPECT_THAT(section_group.integer_sections[0].content,
+              ElementsAre(kDefaultTimestamp));
 
   // Verify that our persisted data is ok
   EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
 }
 
 TEST_F(SchemaStoreTest, SetNewSchemaOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  EXPECT_THAT(schema_store->SetSchema(schema_),
+  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema_, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
                              schema_store->GetSchema());
@@ -254,33 +495,45 @@ TEST_F(SchemaStoreTest, SetNewSchemaOk) {
 }
 
 TEST_F(SchemaStoreTest, SetSameSchemaOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  EXPECT_THAT(schema_store->SetSchema(schema_),
+  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema_, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
                              schema_store->GetSchema());
   EXPECT_THAT(*actual_schema, EqualsProto(schema_));
 
   // And one more for fun
-  EXPECT_THAT(schema_store->SetSchema(schema_),
+  result = SchemaStore::SetSchemaResult();
+  result.success = true;
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema_, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
   EXPECT_THAT(*actual_schema, EqualsProto(schema_));
 }
 
 TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  EXPECT_THAT(schema_store->SetSchema(schema_),
+  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema_, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
                              schema_store->GetSchema());
@@ -290,55 +543,73 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
   schema_.clear_types();
 
   // Set the incompatible schema
+  result = SchemaStore::SetSchemaResult();
   result.success = false;
   result.schema_types_deleted_by_name.emplace("email");
   result.schema_types_deleted_by_id.emplace(0);
-  EXPECT_THAT(schema_store->SetSchema(schema_),
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema_, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
 }
 
 TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("email");
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  EXPECT_THAT(schema_store->SetSchema(schema),
+  result.schema_types_new_by_name.insert("email");
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
                              schema_store->GetSchema());
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 
   // Add a type, shouldn't affect the index or cached SchemaTypeIds
-  type = schema.add_types();
-  type->set_schema_type("new_type");
+  schema = SchemaBuilder(schema)
+               .AddType(SchemaTypeConfigBuilder().SetType("new_type"))
+               .Build();
 
   // Set the compatible schema
-  EXPECT_THAT(schema_store->SetSchema(schema),
+  result = SchemaStore::SetSchemaResult();
+  result.success = true;
+  result.schema_types_new_by_name.insert("new_type");
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 }
 
 TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("email");
-  type = schema.add_types();
-  type->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  EXPECT_THAT(schema_store->SetSchema(schema),
+  result.schema_types_new_by_name.insert("email");
+  result.schema_types_new_by_name.insert("message");
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
                              schema_store->GetSchema());
@@ -350,9 +621,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
                              schema_store->GetSchemaTypeId("message"));
 
   // Remove "email" type, this also changes previous SchemaTypeIds
-  schema.Clear();
-  type = schema.add_types();
-  type->set_schema_type("message");
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("message"))
+               .Build();
 
   SchemaStore::SetSchemaResult incompatible_result;
   incompatible_result.success = false;
@@ -363,7 +634,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
       old_email_schema_type_id);
 
   // Can't set the incompatible schema
-  EXPECT_THAT(schema_store->SetSchema(schema),
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(incompatible_result)));
 
   SchemaStore::SetSchemaResult force_result;
@@ -374,109 +647,208 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
 
   // Force set the incompatible schema
   EXPECT_THAT(schema_store->SetSchema(
-                  schema, /*ignore_errors_and_delete_documents=*/true),
+                  schema, /*ignore_errors_and_delete_documents=*/true,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(force_result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 }
 
 TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("email");
-  type = schema.add_types();
-  type->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  EXPECT_THAT(schema_store->SetSchema(schema),
+  result.schema_types_new_by_name.insert("email");
+  result.schema_types_new_by_name.insert("message");
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
                              schema_store->GetSchema());
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 
   // Reorder the types
-  schema.clear_types();
-  type = schema.add_types();
-  type->set_schema_type("message");
-  type = schema.add_types();
-  type->set_schema_type("email");
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("message"))
+               .AddType(SchemaTypeConfigBuilder().SetType("email"))
+               .Build();
 
   // Since we assign SchemaTypeIds based on order in the SchemaProto, this will
   // cause SchemaTypeIds to change
+  result = SchemaStore::SetSchemaResult();
+  result.success = true;
   result.old_schema_type_ids_changed.emplace(0);  // Old SchemaTypeId of "email"
   result.old_schema_type_ids_changed.emplace(
       1);  // Old SchemaTypeId of "message"
 
   // Set the compatible schema
-  EXPECT_THAT(schema_store->SetSchema(schema),
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 }
 
-TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
-
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("email");
-
-  // Add an unindexed property
-  auto property = type->add_properties();
-  property->set_property_name("subject");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              // Add an unindexed property
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataType(TYPE_STRING)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  EXPECT_THAT(schema_store->SetSchema(schema),
+  result.schema_types_new_by_name.insert("email");
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
                              schema_store->GetSchema());
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 
   // Make a previously unindexed property indexed
-  property = schema.mutable_types(0)->mutable_properties(0);
-  property->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  property->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-
-  // With a new indexed property, we'll need to reindex
-  result.index_incompatible = true;
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("subject")
+                       .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                       .SetCardinality(CARDINALITY_OPTIONAL)))
+               .Build();
 
   // Set the compatible schema
-  EXPECT_THAT(schema_store->SetSchema(schema),
+  result = SchemaStore::SetSchemaResult();
+  result.success = true;
+  result.schema_types_index_incompatible_by_name.insert("email");
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 }
 
-TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  // Make two schemas. One that sets index_nested_properties to false and one
+  // that sets it to true.
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto no_nested_index_schema =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(SchemaTypeConfigBuilder().SetType("person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("emails")
+                  .SetDataTypeDocument("email",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  SchemaProto nested_index_schema =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(SchemaTypeConfigBuilder().SetType("person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("emails")
+                  .SetDataTypeDocument("email",
+                                       /*index_nested_properties=*/true)
+                  .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  // Set schema with index_nested_properties=false to start.
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  result.schema_types_new_by_name.insert("email");
+  result.schema_types_new_by_name.insert("person");
+  EXPECT_THAT(
+      schema_store->SetSchema(no_nested_index_schema,
+                              /*ignore_errors_and_delete_documents=*/false,
+                              /*allow_circular_schema_definitions=*/false),
+      IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema));
 
-  SchemaProto schema;
-  auto type = schema.add_types();
-  type->set_schema_type("email");
+  // Set schema with index_nested_properties=true and confirm that the change to
+  // 'person' is index incompatible.
+  result = SchemaStore::SetSchemaResult();
+  result.success = true;
+  result.schema_types_index_incompatible_by_name.insert("person");
+  EXPECT_THAT(
+      schema_store->SetSchema(nested_index_schema,
+                              /*ignore_errors_and_delete_documents=*/false,
+                              /*allow_circular_schema_definitions=*/false),
+      IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(nested_index_schema));
 
-  // Add a STRING property
-  auto property = type->add_properties();
-  property->set_property_name("subject");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  // Set schema with index_nested_properties=false and confirm that the change
+  // to 'person' is index incompatible.
+  result = SchemaStore::SetSchemaResult();
+  result.success = true;
+  result.schema_types_index_incompatible_by_name.insert("person");
+  EXPECT_THAT(
+      schema_store->SetSchema(no_nested_index_schema,
+                              /*ignore_errors_and_delete_documents=*/false,
+                              /*allow_circular_schema_definitions=*/false),
+      IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema));
+}
+
+TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              // Add a STRING property
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataType(TYPE_STRING)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  EXPECT_THAT(schema_store->SetSchema(schema),
+  result.schema_types_new_by_name.insert("email");
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
   ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
                              schema_store->GetSchema());
@@ -486,8 +858,14 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
                              schema_store->GetSchemaTypeId("email"));
 
   // Make a previously STRING property into DOUBLE
-  property = schema.mutable_types(0)->mutable_properties(0);
-  property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+                   // Add a STRING property
+                   PropertyConfigBuilder()
+                       .SetName("subject")
+                       .SetDataType(TYPE_DOUBLE)
+                       .SetCardinality(CARDINALITY_OPTIONAL)))
+               .Build();
 
   SchemaStore::SetSchemaResult incompatible_result;
   incompatible_result.success = false;
@@ -496,7 +874,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
       old_email_schema_type_id);
 
   // Can't set the incompatible schema
-  EXPECT_THAT(schema_store->SetSchema(schema),
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(incompatible_result)));
 
   SchemaStore::SetSchemaResult force_result;
@@ -507,15 +887,338 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
 
   // Force set the incompatible schema
   EXPECT_THAT(schema_store->SetSchema(
-                  schema, /*ignore_errors_and_delete_documents=*/true),
+                  schema, /*ignore_errors_and_delete_documents=*/true,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(force_result)));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 }
 
+TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleNestedTypesOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  // 1. Create a ContactPoint type with a repeated property and set that schema
+  SchemaTypeConfigBuilder contact_point_repeated_label =
+      SchemaTypeConfigBuilder()
+          .SetType("ContactPoint")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("label")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REPEATED));
+  SchemaProto old_schema =
+      SchemaBuilder().AddType(contact_point_repeated_label).Build();
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      old_schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_contact_point_type_id,
+                             schema_store->GetSchemaTypeId("ContactPoint"));
+
+  // 2. Create a type that references the ContactPoint type and make a backwards
+  // incompatible change to ContactPoint
+  SchemaTypeConfigBuilder contact_point_optional_label =
+      SchemaTypeConfigBuilder()
+          .SetType("ContactPoint")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("label")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL));
+  SchemaTypeConfigBuilder person =
+      SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+          PropertyConfigBuilder()
+              .SetName("contactPoints")
+              .SetDataTypeDocument("ContactPoint",
+                                   /*index_nested_properties=*/true)
+              .SetCardinality(CARDINALITY_REPEATED));
+  SchemaProto new_schema = SchemaBuilder()
+                               .AddType(contact_point_optional_label)
+                               .AddType(person)
+                               .Build();
+
+  // 3. SetSchema should fail with ignore_errors_and_delete_documents=false and
+  // the old schema should remain
+  SchemaStore::SetSchemaResult expected_result;
+  expected_result.success = false;
+  expected_result.schema_types_incompatible_by_name.insert("ContactPoint");
+  expected_result.schema_types_incompatible_by_id.insert(
+      old_contact_point_type_id);
+  expected_result.schema_types_new_by_name.insert("Person");
+  EXPECT_THAT(
+      schema_store->SetSchema(new_schema,
+                              /*ignore_errors_and_delete_documents=*/false,
+                              /*allow_circular_schema_definitions=*/false),
+      IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(old_schema));
+
+  // 4. SetSchema should succeed with ignore_errors_and_delete_documents=true
+  // and the new schema should be set
+  expected_result.success = true;
+  EXPECT_THAT(
+      schema_store->SetSchema(new_schema,
+                              /*ignore_errors_and_delete_documents=*/true,
+                              /*allow_circular_schema_definitions=*/false),
+      IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
+  ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(new_schema));
+}
+
+TEST_F(SchemaStoreTest, SetSchemaWithIndexIncompatibleNestedTypesOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  // 1. Create a ContactPoint type with label that matches prefix and set that
+  // schema
+  SchemaTypeConfigBuilder contact_point_prefix_label =
+      SchemaTypeConfigBuilder()
+          .SetType("ContactPoint")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("label")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REPEATED));
+  SchemaProto old_schema =
+      SchemaBuilder().AddType(contact_point_prefix_label).Build();
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      old_schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // 2. Create a type that references the ContactPoint type and make a index
+  // backwards incompatible change to ContactPoint
+  SchemaTypeConfigBuilder contact_point_exact_label =
+      SchemaTypeConfigBuilder()
+          .SetType("ContactPoint")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("label")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_REPEATED));
+  SchemaTypeConfigBuilder person =
+      SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+          PropertyConfigBuilder()
+              .SetName("contactPoints")
+              .SetDataTypeDocument("ContactPoint",
+                                   /*index_nested_properties=*/true)
+              .SetCardinality(CARDINALITY_REPEATED));
+  SchemaProto new_schema = SchemaBuilder()
+                               .AddType(contact_point_exact_label)
+                               .AddType(person)
+                               .Build();
+
+  // SetSchema should succeed, and only ContactPoint should be in
+  // schema_types_index_incompatible_by_name.
+  SchemaStore::SetSchemaResult expected_result;
+  expected_result.success = true;
+  expected_result.schema_types_index_incompatible_by_name.insert(
+      "ContactPoint");
+  expected_result.schema_types_new_by_name.insert("Person");
+  EXPECT_THAT(
+      schema_store->SetSchema(new_schema,
+                              /*ignore_errors_and_delete_documents=*/false,
+                              /*allow_circular_schema_definitions=*/false),
+      IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(new_schema));
+}
+
+TEST_F(SchemaStoreTest, SetSchemaWithCompatibleNestedTypesOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  // 1. Create a ContactPoint type with a optional property and set that schema
+  SchemaTypeConfigBuilder contact_point_optional_label =
+      SchemaTypeConfigBuilder()
+          .SetType("ContactPoint")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("label")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL));
+  SchemaProto old_schema =
+      SchemaBuilder().AddType(contact_point_optional_label).Build();
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      old_schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // 2. Create a type that references the ContactPoint type and make a backwards
+  // compatible change to ContactPoint
+  SchemaTypeConfigBuilder contact_point_repeated_label =
+      SchemaTypeConfigBuilder()
+          .SetType("ContactPoint")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("label")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REPEATED));
+  SchemaTypeConfigBuilder person =
+      SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+          PropertyConfigBuilder()
+              .SetName("contactPoints")
+              .SetDataTypeDocument("ContactPoint",
+                                   /*index_nested_properties=*/true)
+              .SetCardinality(CARDINALITY_REPEATED));
+  SchemaProto new_schema = SchemaBuilder()
+                               .AddType(contact_point_repeated_label)
+                               .AddType(person)
+                               .Build();
+
+  // 3. SetSchema should succeed, and only ContactPoint should be in
+  // schema_types_changed_fully_compatible_by_name.
+  SchemaStore::SetSchemaResult expected_result;
+  expected_result.success = true;
+  expected_result.schema_types_changed_fully_compatible_by_name.insert(
+      "ContactPoint");
+  expected_result.schema_types_new_by_name.insert("Person");
+  EXPECT_THAT(schema_store->SetSchema(
+                  new_schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(new_schema));
+}
+
+TEST_F(SchemaStoreTest, SetSchemaWithAddedIndexableNestedTypeOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  // 1. Create a ContactPoint type with a optional property, and a type that
+  //    references the ContactPoint type.
+  SchemaTypeConfigBuilder contact_point =
+      SchemaTypeConfigBuilder()
+          .SetType("ContactPoint")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("label")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REPEATED));
+  SchemaTypeConfigBuilder person =
+      SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+          PropertyConfigBuilder()
+              .SetName("contactPoints")
+              .SetDataTypeDocument("ContactPoint",
+                                   /*index_nested_properties=*/true)
+              .SetCardinality(CARDINALITY_REPEATED));
+  SchemaProto old_schema =
+      SchemaBuilder().AddType(contact_point).AddType(person).Build();
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      old_schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // 2. Add another nested document property to "Person" that has type
+  //    "ContactPoint"
+  SchemaTypeConfigBuilder new_person =
+      SchemaTypeConfigBuilder()
+          .SetType("Person")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("contactPoints")
+                  .SetDataTypeDocument("ContactPoint",
+                                       /*index_nested_properties=*/true)
+                  .SetCardinality(CARDINALITY_REPEATED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("anotherContactPoint")
+                  .SetDataTypeDocument("ContactPoint",
+                                       /*index_nested_properties=*/true)
+                  .SetCardinality(CARDINALITY_REPEATED));
+  SchemaProto new_schema =
+      SchemaBuilder().AddType(contact_point).AddType(new_person).Build();
+
+  // 3. Set to new schema. "Person" should be index-incompatible since we need
+  //    to index an additional property: 'anotherContactPoint.label'.
+  // - "Person" is also considered join-incompatible since the added nested
+  //   document property could also contain a joinable property.
+  SchemaStore::SetSchemaResult expected_result;
+  expected_result.success = true;
+  expected_result.schema_types_index_incompatible_by_name.insert("Person");
+  expected_result.schema_types_join_incompatible_by_name.insert("Person");
+
+  EXPECT_THAT(schema_store->SetSchema(
+                  new_schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(new_schema));
+}
+
+TEST_F(SchemaStoreTest, SetSchemaWithAddedJoinableNestedTypeOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  // 1. Create a ContactPoint type with a optional property, and a type that
+  //    references the ContactPoint type.
+  SchemaTypeConfigBuilder contact_point =
+      SchemaTypeConfigBuilder()
+          .SetType("ContactPoint")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("label")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/false)
+                  .SetCardinality(CARDINALITY_REQUIRED));
+  SchemaTypeConfigBuilder person =
+      SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+          PropertyConfigBuilder()
+              .SetName("contactPoints")
+              .SetDataTypeDocument("ContactPoint",
+                                   /*index_nested_properties=*/true)
+              .SetCardinality(CARDINALITY_OPTIONAL));
+  SchemaProto old_schema =
+      SchemaBuilder().AddType(contact_point).AddType(person).Build();
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      old_schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // 2. Add another nested document property to "Person" that has type
+  //    "ContactPoint", but make it non-indexable
+  SchemaTypeConfigBuilder new_person =
+      SchemaTypeConfigBuilder()
+          .SetType("Person")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("contactPoints")
+                  .SetDataTypeDocument("ContactPoint",
+                                       /*index_nested_properties=*/true)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("anotherContactPoint")
+                  .SetDataTypeDocument("ContactPoint",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL));
+  SchemaProto new_schema =
+      SchemaBuilder().AddType(contact_point).AddType(new_person).Build();
+
+  // 3. Set to new schema. "Person" should be join-incompatible but
+  //    index-compatible.
+  SchemaStore::SetSchemaResult expected_result;
+  expected_result.success = true;
+  expected_result.schema_types_join_incompatible_by_name.insert("Person");
+
+  EXPECT_THAT(schema_store->SetSchema(
+                  new_schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(new_schema));
+}
+
 TEST_F(SchemaStoreTest, GetSchemaTypeId) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
   schema_.clear_types();
 
@@ -531,7 +1234,11 @@ TEST_F(SchemaStoreTest, GetSchemaTypeId) {
   // Set it for the first time
   SchemaStore::SetSchemaResult result;
   result.success = true;
-  EXPECT_THAT(schema_store->SetSchema(schema_),
+  result.schema_types_new_by_name.insert(first_type);
+  result.schema_types_new_by_name.insert(second_type);
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema_, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
               IsOkAndHolds(EqualsSetSchemaResult(result)));
 
   EXPECT_THAT(schema_store->GetSchemaTypeId(first_type), IsOkAndHolds(0));
@@ -539,22 +1246,25 @@ TEST_F(SchemaStoreTest, GetSchemaTypeId) {
 }
 
 TEST_F(SchemaStoreTest, ComputeChecksumDefaultOnEmptySchemaStore) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
   Crc32 default_checksum;
   EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(default_checksum));
 }
 
 TEST_F(SchemaStoreTest, ComputeChecksumSameBetweenCalls) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
-  SchemaProto foo_schema;
-  auto type_config = foo_schema.add_types();
-  type_config->set_schema_type("foo");
+  SchemaProto foo_schema =
+      SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
 
-  ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      foo_schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
 
@@ -563,66 +1273,76 @@ TEST_F(SchemaStoreTest, ComputeChecksumSameBetweenCalls) {
 }
 
 TEST_F(SchemaStoreTest, ComputeChecksumSameAcrossInstances) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
-  SchemaProto foo_schema;
-  auto type_config = foo_schema.add_types();
-  type_config->set_schema_type("foo");
+  SchemaProto foo_schema =
+      SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
 
-  ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      foo_schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
 
   // Destroy the previous instance and recreate SchemaStore
   schema_store.reset();
 
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
   EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(checksum));
 }
 
 TEST_F(SchemaStoreTest, ComputeChecksumChangesOnModification) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
-  SchemaProto foo_schema;
-  auto type_config = foo_schema.add_types();
-  type_config->set_schema_type("foo");
+  SchemaProto foo_schema =
+      SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
 
-  ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      foo_schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
 
   // Modifying the SchemaStore changes the checksum
-  SchemaProto foo_bar_schema;
-  type_config = foo_bar_schema.add_types();
-  type_config->set_schema_type("foo");
-  type_config = foo_bar_schema.add_types();
-  type_config->set_schema_type("bar");
+  SchemaProto foo_bar_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("foo"))
+          .AddType(SchemaTypeConfigBuilder().SetType("bar"))
+          .Build();
 
-  ICING_EXPECT_OK(schema_store->SetSchema(foo_bar_schema));
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      foo_bar_schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(Not(Eq(checksum))));
 }
 
 TEST_F(SchemaStoreTest, PersistToDiskFineForEmptySchemaStore) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
   // Persisting is fine and shouldn't affect anything
   ICING_EXPECT_OK(schema_store->PersistToDisk());
 }
 
 TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("foo");
+  SchemaProto schema =
+      SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
 
-  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   // Persisting shouldn't change anything
   ICING_EXPECT_OK(schema_store->PersistToDisk());
@@ -632,20 +1352,1843 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 
   // Modify the schema so that something different is persisted next time
-  type_config = schema.add_types();
-  type_config->set_schema_type("bar");
-  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+  schema = SchemaBuilder(schema)
+               .AddType(SchemaTypeConfigBuilder().SetType("bar"))
+               .Build();
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   // Should also persist on destruction
   schema_store.reset();
 
   // And we get the same schema back on reinitialization
-  ICING_ASSERT_OK_AND_ASSIGN(schema_store,
-                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
   ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
   EXPECT_THAT(*actual_schema, EqualsProto(schema));
 }
 
+TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  // Create a schema with two types: one simple type and one type that uses all
+  // 64 sections.
+  PropertyConfigProto prop =
+      PropertyConfigBuilder()
+          .SetName("subject")
+          .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .Build();
+  SchemaTypeConfigBuilder full_sections_type_builder =
+      SchemaTypeConfigBuilder().SetType("fullSectionsType");
+  for (int i = 0; i < 64; ++i) {
+    full_sections_type_builder.AddProperty(
+        PropertyConfigBuilder(prop).SetName("prop" + std::to_string(i)));
+  }
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder(prop)))
+          .AddType(full_sections_type_builder)
+          .Build();
+
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  result.schema_types_new_by_name.insert("email");
+  result.schema_types_new_by_name.insert("fullSectionsType");
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+
+  SchemaStoreStorageInfoProto storage_info = schema_store->GetStorageInfo();
+  EXPECT_THAT(storage_info.schema_store_size(), Ge(0));
+  EXPECT_THAT(storage_info.num_schema_types(), Eq(2));
+  EXPECT_THAT(storage_info.num_total_sections(), Eq(65));
+  EXPECT_THAT(storage_info.num_schema_types_sections_exhausted(), Eq(1));
+}
+
+TEST_F(SchemaStoreTest, GetDebugInfo) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  // Set schema
+  ASSERT_THAT(
+      schema_store->SetSchema(schema_,
+                              /*ignore_errors_and_delete_documents=*/false,
+                              /*allow_circular_schema_definitions=*/false),
+      IsOkAndHolds(EqualsSetSchemaResult(SchemaStore::SetSchemaResult{
+          .success = true,
+          .schema_types_new_by_name = {schema_.types(0).schema_type()}})));
+
+  // Check debug info
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaDebugInfoProto out,
+                             schema_store->GetDebugInfo());
+  EXPECT_THAT(out.schema(), EqualsProto(schema_));
+  EXPECT_THAT(out.crc(), Gt(0));
+}
+
+TEST_F(SchemaStoreTest, GetDebugInfoForEmptySchemaStore) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  // Check debug info before setting a schema
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaDebugInfoProto out,
+                             schema_store->GetDebugInfo());
+  SchemaDebugInfoProto expected_out;
+  expected_out.set_crc(0);
+  EXPECT_THAT(out, EqualsProto(expected_out));
+}
+
+TEST_F(SchemaStoreTest, InitializeRegenerateDerivedFilesFailure) {
+  // This test covers the first point that RegenerateDerivedFiles could fail.
+  // This should simply result in SetSchema::Create returning an INTERNAL error.
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    SchemaProto schema = SchemaBuilder()
+                             .AddType(SchemaTypeConfigBuilder().SetType("Type"))
+                             .Build();
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+  }
+
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  ON_CALL(*mock_filesystem,
+          CreateDirectoryRecursively(HasSubstr("key_mapper_dir")))
+      .WillByDefault(Return(false));
+  {
+    EXPECT_THAT(SchemaStore::Create(mock_filesystem.get(), schema_store_dir_,
+                                    &fake_clock_),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(SchemaStoreTest, SetSchemaRegenerateDerivedFilesFailure) {
+  // This test covers the second point that RegenerateDerivedFiles could fail.
+  // If handled correctly, the schema store and section manager should still be
+  // in the original, valid state.
+  SchemaTypeConfigProto type =
+      SchemaTypeConfigBuilder()
+          .SetType("Type")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("intProp1")
+                           .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("stringProp1")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    SchemaProto schema = SchemaBuilder().AddType(type).Build();
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+  }
+
+  {
+    auto mock_filesystem = std::make_unique<MockFilesystem>();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(mock_filesystem.get(), schema_store_dir_,
+                            &fake_clock_));
+
+    ON_CALL(*mock_filesystem,
+            CreateDirectoryRecursively(HasSubstr("key_mapper_dir")))
+        .WillByDefault(Return(false));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(type)
+            .AddType(SchemaTypeConfigBuilder().SetType("Type2"))
+            .Build();
+    EXPECT_THAT(
+        schema_store->SetSchema(std::move(schema),
+                                /*ignore_errors_and_delete_documents=*/false,
+                                /*allow_circular_schema_definitions=*/false),
+        StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+    DocumentProto document =
+        DocumentBuilder()
+            .SetSchema("Type")
+            .AddInt64Property("intProp1", 1, 2, 3)
+            .AddStringProperty("stringProp1", "foo bar baz")
+            .Build();
+    SectionMetadata expected_int_prop1_metadata(
+        /*id_in=*/0, TYPE_INT64, TOKENIZER_NONE, TERM_MATCH_UNKNOWN,
+        NUMERIC_MATCH_RANGE, "intProp1");
+    SectionMetadata expected_string_prop1_metadata(
+        /*id_in=*/1, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_EXACT,
+        NUMERIC_MATCH_UNKNOWN, "stringProp1");
+    ICING_ASSERT_OK_AND_ASSIGN(SectionGroup section_group,
+                               schema_store->ExtractSections(document));
+    ASSERT_THAT(section_group.string_sections, SizeIs(1));
+    EXPECT_THAT(section_group.string_sections.at(0).metadata,
+                Eq(expected_string_prop1_metadata));
+    EXPECT_THAT(section_group.string_sections.at(0).content,
+                ElementsAre("foo bar baz"));
+    ASSERT_THAT(section_group.integer_sections, SizeIs(1));
+    EXPECT_THAT(section_group.integer_sections.at(0).metadata,
+                Eq(expected_int_prop1_metadata));
+    EXPECT_THAT(section_group.integer_sections.at(0).content,
+                ElementsAre(1, 2, 3));
+  }
+}
+
+TEST_F(SchemaStoreTest, CanCheckForPropertiesDefinedInSchema) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  // Set it for the first time
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+
+  // Don't use schema_ defined in the test suite, as we want to make sure that
+  // the test is written correctly without referring to what the suite has
+  // defined.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("email")
+                  .AddProperty(
+                      // Add an indexed property so we generate
+                      // section metadata on it
+                      PropertyConfigBuilder()
+                          .SetName("subject")
+                          .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                          .SetCardinality(CARDINALITY_OPTIONAL))
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("timestamp")
+                                   .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                   .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId schema_id,
+                             schema_store->GetSchemaTypeId("email"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(schema_id, "subject"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(schema_id, "timestamp"));
+  EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(schema_id, "foobar"));
+}
+
+TEST_F(SchemaStoreTest, GetSchemaTypeIdsWithChildren) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  // Create a schema with the following inheritance relation:
+  //       A
+  //     /   \
+  //    B     E
+  //   /  \
+  //  C    D
+  //       |
+  //       F
+  SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder().SetType("D").AddParentType("B").Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder().SetType("E").AddParentType("A").Build();
+  SchemaTypeConfigProto type_f =
+      SchemaTypeConfigBuilder().SetType("F").AddParentType("D").Build();
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_c)
+                           .AddType(type_d)
+                           .AddType(type_e)
+                           .AddType(type_f)
+                           .Build();
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Get schema type id for each type.
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_a_id,
+                             schema_store->GetSchemaTypeId("A"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_b_id,
+                             schema_store->GetSchemaTypeId("B"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_c_id,
+                             schema_store->GetSchemaTypeId("C"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_d_id,
+                             schema_store->GetSchemaTypeId("D"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_e_id,
+                             schema_store->GetSchemaTypeId("E"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_f_id,
+                             schema_store->GetSchemaTypeId("F"));
+
+  // Check the results from GetSchemaTypeIdsWithChildren
+  EXPECT_THAT(
+      schema_store->GetSchemaTypeIdsWithChildren("A"),
+      IsOkAndHolds(Pointee(UnorderedElementsAre(
+          type_a_id, type_b_id, type_c_id, type_d_id, type_e_id, type_f_id))));
+  EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("B"),
+              IsOkAndHolds(Pointee(UnorderedElementsAre(
+                  type_b_id, type_c_id, type_d_id, type_f_id))));
+  EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("C"),
+              IsOkAndHolds(Pointee(UnorderedElementsAre(type_c_id))));
+  EXPECT_THAT(
+      schema_store->GetSchemaTypeIdsWithChildren("D"),
+      IsOkAndHolds(Pointee(UnorderedElementsAre(type_d_id, type_f_id))));
+  EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("E"),
+              IsOkAndHolds(Pointee(UnorderedElementsAre(type_e_id))));
+  EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("F"),
+              IsOkAndHolds(Pointee(UnorderedElementsAre(type_f_id))));
+}
+
+TEST_F(SchemaStoreTest, DiamondGetSchemaTypeIdsWithChildren) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+  // Create a schema with the following inheritance relation:
+  //       A
+  //     /   \
+  //    B     E
+  //   /  \  /
+  //  C    D
+  //   \  /
+  //     F
+  SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
+  SchemaTypeConfigProto type_d = SchemaTypeConfigBuilder()
+                                     .SetType("D")
+                                     .AddParentType("B")
+                                     .AddParentType("E")
+                                     .Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder().SetType("E").AddParentType("A").Build();
+  SchemaTypeConfigProto type_f = SchemaTypeConfigBuilder()
+                                     .SetType("F")
+                                     .AddParentType("C")
+                                     .AddParentType("D")
+                                     .Build();
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_c)
+                           .AddType(type_d)
+                           .AddType(type_e)
+                           .AddType(type_f)
+                           .Build();
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  // Get schema type id for each type.
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_a_id,
+                             schema_store->GetSchemaTypeId("A"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_b_id,
+                             schema_store->GetSchemaTypeId("B"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_c_id,
+                             schema_store->GetSchemaTypeId("C"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_d_id,
+                             schema_store->GetSchemaTypeId("D"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_e_id,
+                             schema_store->GetSchemaTypeId("E"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_f_id,
+                             schema_store->GetSchemaTypeId("F"));
+
+  // Check the results from GetSchemaTypeIdsWithChildren
+  EXPECT_THAT(
+      schema_store->GetSchemaTypeIdsWithChildren("A"),
+      IsOkAndHolds(Pointee(UnorderedElementsAre(
+          type_a_id, type_b_id, type_c_id, type_d_id, type_e_id, type_f_id))));
+  EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("B"),
+              IsOkAndHolds(Pointee(UnorderedElementsAre(
+                  type_b_id, type_c_id, type_d_id, type_f_id))));
+  EXPECT_THAT(
+      schema_store->GetSchemaTypeIdsWithChildren("C"),
+      IsOkAndHolds(Pointee(UnorderedElementsAre(type_c_id, type_f_id))));
+  EXPECT_THAT(
+      schema_store->GetSchemaTypeIdsWithChildren("D"),
+      IsOkAndHolds(Pointee(UnorderedElementsAre(type_d_id, type_f_id))));
+  EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("E"),
+              IsOkAndHolds(Pointee(
+                  UnorderedElementsAre(type_e_id, type_d_id, type_f_id))));
+  EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("F"),
+              IsOkAndHolds(Pointee(UnorderedElementsAre(type_f_id))));
+}
+
+TEST_F(SchemaStoreTest, IndexableFieldsAreDefined) {
+  SchemaTypeConfigProto email_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Email")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("senderQualifiedId")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/true)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("recipients")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_REPEATED))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("recipientIds")
+                           .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                           .SetCardinality(CARDINALITY_REPEATED))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("timestamp")
+                           .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(email_type).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/true));
+  constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+
+  // Indexables.
+  EXPECT_TRUE(
+      schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "subject"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+                                                      "senderQualifiedId"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+                                                      "recipients"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+                                                      "recipientIds"));
+  EXPECT_TRUE(
+      schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "timestamp"));
+}
+
+TEST_F(SchemaStoreTest, JoinableFieldsAreDefined) {
+  SchemaTypeConfigProto email_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("tagQualifiedId")
+                           .SetDataType(TYPE_STRING)
+                           .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                        /*propagate_delete=*/true)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("senderQualifiedId")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/true)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(email_type).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/true));
+  constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+
+  // Joinables.
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+                                                      "tagQualifiedId"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+                                                      "senderQualifiedId"));
+}
+
+TEST_F(SchemaStoreTest, NonIndexableFieldsAreDefined) {
+  SchemaTypeConfigProto email_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Email")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("attachment")
+                           .SetDataType(TYPE_BYTES)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("nonindexableInteger")
+                           .SetDataType(TYPE_INT64)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(email_type).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/true));
+  constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+
+  // Non-indexables.
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+                                                      "attachment"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+                                                      "nonindexableInteger"));
+  EXPECT_TRUE(
+      schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "text"));
+}
+
+TEST_F(SchemaStoreTest, NonExistentFieldsAreUndefined) {
+  SchemaTypeConfigProto email_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Email")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("senderQualifiedId")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/true)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("timestamp")
+                           .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("nonindexableInteger")
+                           .SetDataType(TYPE_INT64)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(email_type).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/true));
+  constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+
+  // Non-existents.
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "foobar"));
+  EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+                                                       "timestamp.foo"));
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "time"));
+}
+
+TEST_F(SchemaStoreTest, NestedIndexableFieldsAreDefined) {
+  SchemaTypeConfigProto email_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("tagQualifiedId")
+                           .SetDataType(TYPE_STRING)
+                           .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                        /*propagate_delete=*/true)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("timestamp")
+                           .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .Build();
+
+  SchemaTypeConfigProto conversation_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Conversation")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("emails")
+                           .SetDataTypeDocument(
+                               "Email", /*index_nested_properties=*/true)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("nestedNonIndexable")
+                  .SetDataTypeDocument("Email",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema =
+      SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/true));
+  constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+  // Indexables.
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+                                                      "emails.subject"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+                                                      "emails.timestamp"));
+}
+
+TEST_F(SchemaStoreTest, NestedJoinableFieldsAreDefined) {
+  SchemaTypeConfigProto email_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("tagQualifiedId")
+                           .SetDataType(TYPE_STRING)
+                           .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                        /*propagate_delete=*/true)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("timestamp")
+                           .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .Build();
+
+  SchemaTypeConfigProto conversation_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Conversation")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("emails")
+                           .SetDataTypeDocument(
+                               "Email", /*index_nested_properties=*/true)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("nestedNonIndexable")
+                  .SetDataTypeDocument("Email",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema =
+      SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/true));
+  constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+  // Joinables.
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+                                                      "emails.tagQualifiedId"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(
+      kTypeConversationSchemaId, "nestedNonIndexable.tagQualifiedId"));
+}
+
+TEST_F(SchemaStoreTest, NestedNonIndexableFieldsAreDefined) {
+  SchemaTypeConfigProto email_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("tagQualifiedId")
+                           .SetDataType(TYPE_STRING)
+                           .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                        /*propagate_delete=*/true)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("timestamp")
+                           .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .Build();
+
+  SchemaTypeConfigProto conversation_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Conversation")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("emails")
+                           .SetDataTypeDocument(
+                               "Email", /*index_nested_properties=*/true)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("nestedNonIndexable")
+                  .SetDataTypeDocument("Email",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema =
+      SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/true));
+  constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+  // Non-indexables.
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+                                                      "emails.text"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(
+      kTypeConversationSchemaId, "nestedNonIndexable.subject"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(
+      kTypeConversationSchemaId, "nestedNonIndexable.text"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(
+      kTypeConversationSchemaId, "nestedNonIndexable.timestamp"));
+}
+
+TEST_F(SchemaStoreTest, NestedNonExistentFieldsAreUndefined) {
+  SchemaTypeConfigProto email_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("tagQualifiedId")
+                           .SetDataType(TYPE_STRING)
+                           .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                        /*propagate_delete=*/true)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("timestamp")
+                           .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .Build();
+
+  SchemaTypeConfigProto conversation_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Conversation")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("emails")
+                           .SetDataTypeDocument(
+                               "Email", /*index_nested_properties=*/true)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("nestedNonIndexable")
+                  .SetDataTypeDocument("Email",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema =
+      SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/true));
+  constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+  // Non-existents.
+  EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(
+      kTypeConversationSchemaId, "emails.foobar"));
+  EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(
+      kTypeConversationSchemaId, "nestedNonIndexable.foobar"));
+  EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(
+      kTypeConversationSchemaId, "emails.timestamp.foo"));
+  EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(
+      kTypeConversationSchemaId, "emails.time"));
+}
+
+TEST_F(SchemaStoreTest, IntermediateDocumentPropertiesAreDefined) {
+  SchemaTypeConfigProto email_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("tagQualifiedId")
+                           .SetDataType(TYPE_STRING)
+                           .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                        /*propagate_delete=*/true)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("timestamp")
+                           .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                           .SetCardinality(CARDINALITY_REQUIRED))
+          .Build();
+
+  SchemaTypeConfigProto conversation_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Conversation")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("emails")
+                           .SetDataTypeDocument(
+                               "Email", /*index_nested_properties=*/true)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("nestedNonIndexable")
+                  .SetDataTypeDocument("Email",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema =
+      SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/true));
+  constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+  // Intermediate documents props.
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+                                                      "emails"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+                                                      "nestedNonIndexable"));
+}
+
+TEST_F(SchemaStoreTest, CyclePathsAreDefined) {
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/true));
+  constexpr SchemaTypeId kTypeASchemaId = 0;
+  constexpr SchemaTypeId kTypeBSchemaId = 1;
+
+  // A's top-level properties
+  EXPECT_TRUE(
+      schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "subject"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b"));
+
+  // A's nested properties in B
+  EXPECT_TRUE(
+      schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.body"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a"));
+
+  // A's nested properties in B's nested property in A
+  EXPECT_TRUE(
+      schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.subject"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.b"));
+
+  // B's top-level properties
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "body"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a"));
+
+  // B's nested properties in A
+  EXPECT_TRUE(
+      schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.subject"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b"));
+
+  // B's nested properties in A's nested property in B
+  EXPECT_TRUE(
+      schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.body"));
+  EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.a"));
+}
+
+TEST_F(SchemaStoreTest, WrongTypeCyclePathsAreUndefined) {
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/true));
+  constexpr SchemaTypeId kTypeASchemaId = 0;
+  constexpr SchemaTypeId kTypeBSchemaId = 1;
+
+  // The same paths as above, but we check the wrong types instead.
+  // A's top-level properties
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "subject"));
+  EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b"));
+
+  // A's nested properties in B
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.body"));
+  EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.a"));
+
+  // A's nested properties in B's nested property in A
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.a.subject"));
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.a.b"));
+
+  // B's top-level properties
+  EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "body"));
+  EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a"));
+
+  // B's nested properties in A
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.subject"));
+  EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.b"));
+
+  // B's nested properties in A's nested property in B
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.b.body"));
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.b.a"));
+}
+
+TEST_F(SchemaStoreTest, CyclePathsNonexistentPropertiesAreUndefined) {
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/true));
+  constexpr SchemaTypeId kTypeASchemaId = 0;
+  constexpr SchemaTypeId kTypeBSchemaId = 1;
+
+  // Undefined paths in A
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.subject"));
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.body"));
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.a"));
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.subject.b"));
+
+  // Undefined paths in B
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.body"));
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.subject"));
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.b"));
+  EXPECT_FALSE(
+      schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.body.a"));
+}
+
+TEST_F(SchemaStoreTest, LoadsOverlaySchemaOnInit) {
+  // Create a schema that is rollback incompatible and will trigger us to create
+  // an overlay schema.
+  PropertyConfigBuilder indexed_string_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("type_b")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  {
+    // Create a new of the schema store and check that the same schema is
+    // present.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+
+    // The overlay should exist
+    std::string overlay_schema_path = schema_store_dir_ + "/overlay_schema.pb";
+    ASSERT_TRUE(filesystem_.FileExists(overlay_schema_path.c_str()));
+
+    // The base schema should hold a compatible schema
+    SchemaTypeConfigProto modified_type_a =
+        SchemaTypeConfigBuilder()
+            .SetType("type_a")
+            .AddProperty(indexed_string_property_builder.SetName("prop0"))
+            .AddProperty(PropertyConfigBuilder()
+                             .SetName("propRfc")
+                             .SetCardinality(CARDINALITY_OPTIONAL)
+                             .SetDataType(TYPE_STRING))
+            .Build();
+    SchemaProto expected_base_schema =
+        SchemaBuilder().AddType(modified_type_a).AddType(type_b).Build();
+    std::string base_schema_path = schema_store_dir_ + "/schema.pb";
+    auto base_schema_file_ = std::make_unique<FileBackedProto<SchemaProto>>(
+        filesystem_, base_schema_path);
+    ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* base_schema,
+                               base_schema_file_->Read());
+    EXPECT_THAT(*base_schema, EqualsProto(expected_base_schema));
+  }
+}
+
+TEST_F(SchemaStoreTest, LoadsBaseSchemaWithNoOverlayOnInit) {
+  // Create a normal schema that won't require an overlay.
+  PropertyConfigBuilder indexed_string_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("type_b")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  {
+    // Create a new instance of the schema store and check that the same schema
+    // is present.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+
+    // Additionally, the overlay should not exist
+    std::string overlay_schema_path = schema_store_dir_ + "/overlay_schema.pb";
+    ASSERT_FALSE(filesystem_.FileExists(overlay_schema_path.c_str()));
+  }
+}
+
+TEST_F(SchemaStoreTest, LoadSchemaBackupSchemaMissing) {
+  // Create a schema that is rollback incompatible and will trigger us to create
+  // a backup schema.
+  PropertyConfigBuilder indexed_string_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("type_b")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  // Delete the backup schema.
+  std::string backup_schema_path = schema_store_dir_ + "/schema.pb";
+  ASSERT_TRUE(filesystem_.DeleteFile(backup_schema_path.c_str()));
+
+  {
+    // Create a new instance of the schema store and check that it fails because
+    // the backup schema is not available.
+    EXPECT_THAT(
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+        StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(SchemaStoreTest, LoadSchemaOverlaySchemaMissing) {
+  // Create a schema that is rollback incompatible and will trigger us to create
+  // a backup schema.
+  PropertyConfigBuilder indexed_string_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("type_b")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  // Delete the overlay schema.
+  std::string overlay_schema_path = schema_store_dir_ + "/overlay_schema.pb";
+  ASSERT_TRUE(filesystem_.DeleteFile(overlay_schema_path.c_str()));
+
+  {
+    // Create a new instance of the schema store and check that it fails because
+    // the overlay schema is not available when we expected it to be.
+    EXPECT_THAT(
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+        StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(SchemaStoreTest, LoadSchemaHeaderMissing) {
+  // Create a schema that is rollback incompatible and will trigger us to create
+  // a backup schema.
+  PropertyConfigBuilder indexed_string_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("type_b")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  // Delete the overlay schema.
+  std::string schema_header_path = schema_store_dir_ + "/schema_store_header";
+  ASSERT_TRUE(filesystem_.DeleteFile(schema_header_path.c_str()));
+
+  {
+    // Create a new of the schema store and check that the same schema is
+    // present.
+    EXPECT_THAT(
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+        StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(SchemaStoreTest, LoadSchemaNoOverlayHeaderMissing) {
+  // Create a normal schema that won't require a backup.
+  PropertyConfigBuilder indexed_string_property_builder =
+      PropertyConfigBuilder()
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("type_b")
+          .AddProperty(indexed_string_property_builder.SetName("prop0"))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  // Delete the schema header.
+  std::string schema_header_path = schema_store_dir_ + "/schema_store_header";
+  ASSERT_TRUE(filesystem_.DeleteFile(schema_header_path.c_str()));
+
+  {
+    // Create a new instance of the schema store and check that it fails because
+    // the schema header (which is now a part of the ground truth) is not
+    // available.
+    EXPECT_THAT(
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+        StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaCompatibleNoChange) {
+  // Create a schema that is rollback incompatible and will trigger us to create
+  // a backup schema.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+      &filesystem_, schema_store_dir_, version_util::StateChange::kCompatible,
+      version_util::kVersion));
+
+  {
+    // Create a new of the schema store and check that the same schema is
+    // present.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaUpgradeNoChange) {
+  // Create a schema that is rollback incompatible and will trigger us to create
+  // a backup schema.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+      &filesystem_, schema_store_dir_, version_util::StateChange::kUpgrade,
+      version_util::kVersion + 1));
+
+  {
+    // Create a new of the schema store and check that the same schema is
+    // present.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaVersionZeroUpgradeNoChange) {
+  // Because we are upgrading from version zero, the schema must be compatible
+  // with version zero.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  ICING_EXPECT_OK(
+      SchemaStore::MigrateSchema(&filesystem_, schema_store_dir_,
+                                 version_util::StateChange::kVersionZeroUpgrade,
+                                 version_util::kVersion + 1));
+
+  {
+    // Create a new of the schema store and check that the same schema is
+    // present.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+}
+
+TEST_F(SchemaStoreTest,
+       MigrateSchemaRollbackDiscardsIncompatibleOverlaySchema) {
+  // Because we are upgrading from version zero, the schema must be compatible
+  // with version zero.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  // Rollback to a version before kVersionOne. The schema header will declare
+  // that the overlay is compatible with any version starting with kVersionOne.
+  // So kVersionOne - 1 is incompatible and will throw out the schema.
+  ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+      &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
+      version_util::kVersionOne - 1));
+
+  {
+    // Create a new of the schema store and check that we fell back to the
+    // base schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+    SchemaTypeConfigProto other_type_a =
+        SchemaTypeConfigBuilder()
+            .SetType("type_a")
+            .AddProperty(PropertyConfigBuilder()
+                             .SetName("propRfc")
+                             .SetCardinality(CARDINALITY_OPTIONAL)
+                             .SetDataType(TYPE_STRING))
+            .Build();
+    SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build();
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+  }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaRollbackKeepsCompatibleOverlaySchema) {
+  // Because we are upgrading from version zero, the schema must be compatible
+  // with version zero.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  // Rollback to kVersion. The schema header will declare that the overlay is
+  // compatible with any version starting with kVersion. So we will be
+  // compatible and retain the overlay schema.
+  ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+      &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
+      version_util::kVersion));
+
+  {
+    // Create a new of the schema store and check that the same schema is
+    // present.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaRollforwardRetainsBaseSchema) {
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  // Rollback to a version before kVersionOne. The schema header will declare
+  // that the overlay is compatible with any version starting with kVersionOne.
+  // So kVersionOne - 1 is incompatible and will throw out the schema.
+  ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+      &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
+      version_util::kVersionOne - 1));
+
+  SchemaTypeConfigProto other_type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("propRfc")
+                           .SetCardinality(CARDINALITY_OPTIONAL)
+                           .SetDataType(TYPE_STRING))
+          .Build();
+  SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build();
+
+  {
+    // Create a new of the schema store and check that we fell back to the
+    // base schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+  }
+
+  // Now rollforward to a new version. This should accept whatever schema is
+  // present (currently base schema)
+  ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+      &filesystem_, schema_store_dir_, version_util::StateChange::kRollForward,
+      version_util::kVersion));
+  {
+    // Create a new of the schema store and check that we fell back to the
+    // base schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+  }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaRollforwardRetainsOverlaySchema) {
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  // Rollback to kVersion. The schema header will declare that the overlay is
+  // compatible with any version starting with kVersion. So we will be
+  // compatible and retain the overlay schema.
+  ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+      &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
+      version_util::kVersion));
+
+  {
+    // Create a new of the schema store and check that the same schema is
+    // present.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  // Now rollforward to a new version. This should accept whatever schema is
+  // present (currently overlay schema)
+  ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+      &filesystem_, schema_store_dir_, version_util::StateChange::kRollForward,
+      version_util::kVersion));
+  {
+    // Create a new of the schema store and check that the same schema is
+    // present.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+}
+
+TEST_F(SchemaStoreTest,
+       MigrateSchemaVersionZeroRollforwardDiscardsOverlaySchema) {
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  // A VersionZeroRollforward will always discard the overlay schema because it
+  // could be stale.
+  ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+      &filesystem_, schema_store_dir_,
+      version_util::StateChange::kVersionZeroRollForward,
+      version_util::kVersion));
+
+  SchemaTypeConfigProto other_type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("propRfc")
+                           .SetCardinality(CARDINALITY_OPTIONAL)
+                           .SetDataType(TYPE_STRING))
+          .Build();
+  SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build();
+
+  {
+    // Create a new of the schema store and check that we fell back to the
+    // base schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+  }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaVersionUndeterminedDiscardsOverlaySchema) {
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("propRfc")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+  {
+    // Create an instance of the schema store and set the schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ICING_ASSERT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(schema))));
+  }
+
+  // An Undetermined will always discard the overlay schema because it doesn't
+  // know which state we're in and so it fallback to the base schema because
+  // it should always be valid.
+  ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+      &filesystem_, schema_store_dir_, version_util::StateChange::kUndetermined,
+      version_util::kVersion));
+
+  SchemaTypeConfigProto other_type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("type_a")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("propRfc")
+                           .SetCardinality(CARDINALITY_OPTIONAL)
+                           .SetDataType(TYPE_STRING))
+          .Build();
+  SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build();
+
+  {
+    // Create a new of the schema store and check that we fell back to the
+    // base schema.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+    EXPECT_THAT(schema_store->GetSchema(),
+                IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+  }
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/schema/schema-type-manager.cc b/icing/schema/schema-type-manager.cc
new file mode 100644
index 0000000..4a6b7f2
--- /dev/null
+++ b/icing/schema/schema-type-manager.cc
@@ -0,0 +1,108 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-type-manager.h"
+
+#include <memory>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/schema/joinable-property-manager.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/schema-property-iterator.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section-manager.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/key-mapper.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<SchemaTypeManager>>
+SchemaTypeManager::Create(const SchemaUtil::TypeConfigMap& type_config_map,
+                          const KeyMapper<SchemaTypeId>* schema_type_mapper) {
+  ICING_RETURN_ERROR_IF_NULL(schema_type_mapper);
+
+  SectionManager::Builder section_manager_builder(*schema_type_mapper);
+  JoinablePropertyManager::Builder joinable_property_manager_builder(
+      *schema_type_mapper);
+
+  for (const auto& [type_config_name, type_config] : type_config_map) {
+    ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+                           schema_type_mapper->Get(type_config_name));
+
+    // Use iterator to traverse all leaf properties of the schema.
+    SchemaPropertyIterator iterator(type_config, type_config_map);
+    while (true) {
+      libtextclassifier3::Status status = iterator.Advance();
+      if (!status.ok()) {
+        if (absl_ports::IsOutOfRange(status)) {
+          break;
+        }
+        return status;
+      }
+
+      // Process section (indexable property)
+      if (iterator.GetCurrentPropertyIndexable()) {
+        ICING_RETURN_IF_ERROR(
+            section_manager_builder.ProcessSchemaTypePropertyConfig(
+                schema_type_id, iterator.GetCurrentPropertyConfig(),
+                iterator.GetCurrentPropertyPath()));
+      }
+
+      // Process joinable property
+      ICING_RETURN_IF_ERROR(
+          joinable_property_manager_builder.ProcessSchemaTypePropertyConfig(
+              schema_type_id, iterator.GetCurrentPropertyConfig(),
+              iterator.GetCurrentPropertyPath()));
+    }
+
+    // Process unknown property paths in the indexable_nested_properties_list.
+    // These property paths should consume sectionIds but are currently
+    // not indexed.
+    //
+    // SectionId assignment order:
+    // - We assign section ids to known (existing) properties first in alphabet
+    //   order.
+    // - After handling all known properties, we assign section ids to all
+    //   unknown (non-existent) properties that are specified in the
+    //  indexable_nested_properties_list.
+    // - As a result, assignment of the entire section set is not done
+    //   alphabetically, but assignment is still deterministic and alphabetical
+    //   order is preserved inside the known properties and unknown properties
+    //   sets individually.
+    for (const auto& property_path :
+         iterator.unknown_indexable_nested_property_paths()) {
+      PropertyConfigProto unknown_property_config;
+      unknown_property_config.set_property_name(std::string(
+          property_util::SplitPropertyPathExpr(property_path).back()));
+      unknown_property_config.set_data_type(
+          PropertyConfigProto::DataType::UNKNOWN);
+
+      ICING_RETURN_IF_ERROR(
+          section_manager_builder.ProcessSchemaTypePropertyConfig(
+              schema_type_id, unknown_property_config,
+              std::string(property_path)));
+    }
+  }
+
+  return std::unique_ptr<SchemaTypeManager>(new SchemaTypeManager(
+      std::move(section_manager_builder).Build(),
+      std::move(joinable_property_manager_builder).Build()));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/schema/schema-type-manager.h b/icing/schema/schema-type-manager.h
new file mode 100644
index 0000000..f2adbd9
--- /dev/null
+++ b/icing/schema/schema-type-manager.h
@@ -0,0 +1,79 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_SCHEMA_TYPE_MANAGER_H_
+#define ICING_SCHEMA_SCHEMA_TYPE_MANAGER_H_
+
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/schema/joinable-property-manager.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section-manager.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/key-mapper.h"
+
+namespace icing {
+namespace lib {
+
+// This class is a wrapper of SectionManager and JoinablePropertyManager.
+class SchemaTypeManager {
+ public:
+  // Schema type ids are continuous, and so we use a vector instead of an
+  // unordered map for the mappings.
+  using SchemaTypeIdToPropertiesVector =
+      std::vector<std::unordered_set<std::string>>;
+  // Factory function to create a SchemaTypeManager which does not take
+  // ownership of any input components, and all pointers must refer to valid
+  // objects that outlive the created SchemaTypeManager instance.
+  //
+  // Returns:
+  //   - A SchemaTypeManager on success
+  //   - FAILED_PRECONDITION_ERROR on any null pointer input
+  //   - OUT_OF_RANGE_ERROR if # of indexable properties in a single Schema
+  //     exceeds the threshold (kTotalNumSections, kTotalNumJoinableProperties)
+  //   - INVALID_ARGUMENT_ERROR if type_config_map contains incorrect
+  //     information that causes errors (e.g. invalid schema type id, cycle
+  //     dependency in nested schema)
+  //   - NOT_FOUND_ERROR if any nested schema name is not found in
+  //     type_config_map
+  static libtextclassifier3::StatusOr<std::unique_ptr<SchemaTypeManager>>
+  Create(const SchemaUtil::TypeConfigMap& type_config_map,
+         const KeyMapper<SchemaTypeId>* schema_type_mapper);
+
+  const SectionManager& section_manager() const { return *section_manager_; }
+
+  const JoinablePropertyManager& joinable_property_manager() const {
+    return *joinable_property_manager_;
+  }
+
+ private:
+  explicit SchemaTypeManager(
+      std::unique_ptr<SectionManager> section_manager,
+      std::unique_ptr<JoinablePropertyManager> joinable_property_manager)
+      : section_manager_(std::move(section_manager)),
+        joinable_property_manager_(std::move(joinable_property_manager)) {}
+
+  std::unique_ptr<SectionManager> section_manager_;
+
+  std::unique_ptr<JoinablePropertyManager> joinable_property_manager_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCHEMA_SCHEMA_TYPE_MANAGER_H_
diff --git a/icing/schema/schema-type-manager_test.cc b/icing/schema/schema-type-manager_test.cc
new file mode 100644
index 0000000..eafc612
--- /dev/null
+++ b/icing/schema/schema-type-manager_test.cc
@@ -0,0 +1,356 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-type-manager.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Pointee;
+
+// type and property names of EmailMessage
+static constexpr char kTypeEmail[] = "EmailMessage";
+static constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+// indexable (in lexicographical order)
+static constexpr char kPropertyRecipientIds[] = "recipientIds";
+static constexpr char kPropertyRecipients[] = "recipients";
+static constexpr char kPropertySenderQualifiedId[] =
+    "senderQualifiedId";  // QUALIFIED_ID joinable
+static constexpr char kPropertySubject[] = "subject";
+static constexpr char kPropertyTimestamp[] = "timestamp";
+// non-indexable
+static constexpr char kPropertyAttachment[] = "attachment";
+static constexpr char kPropertyNonIndexableInteger[] = "nonIndexableInteger";
+static constexpr char kPropertyTagQualifiedId[] =
+    "tagQualifiedId";  // QUALIFIED_ID joinable
+static constexpr char kPropertyText[] = "text";
+
+// type and property names of Conversation
+static constexpr char kTypeConversation[] = "Conversation";
+static constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+// indexable (in lexicographical order)
+static constexpr char kPropertyEmails[] = "emails";
+static constexpr char kPropertyGroupQualifiedId[] =
+    "groupQualifiedId";  // QUALIFIED_ID joinable
+static constexpr char kPropertyName[] = "name";
+// non-indexable
+static constexpr char kPropertyNestedNonIndexable[] = "nestedNonIndexable";
+static constexpr char kPropertySuperTagQualifiedId[] =
+    "superTagQualifiedId";  // QUALIFIED_ID joinable
+
+PropertyConfigProto CreateReceipientIdsPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyRecipientIds)
+      .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+      .SetCardinality(CARDINALITY_REPEATED)
+      .Build();
+}
+
+PropertyConfigProto CreateRecipientsPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyRecipients)
+      .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+      .SetCardinality(CARDINALITY_REPEATED)
+      .Build();
+}
+
+PropertyConfigProto CreateSenderQualifiedIdPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertySenderQualifiedId)
+      .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+      .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+      .SetCardinality(CARDINALITY_REQUIRED)
+      .Build();
+}
+
+PropertyConfigProto CreateSubjectPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertySubject)
+      .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+      .SetCardinality(CARDINALITY_REQUIRED)
+      .Build();
+}
+
+PropertyConfigProto CreateTimestampPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyTimestamp)
+      .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+      .SetCardinality(CARDINALITY_REQUIRED)
+      .Build();
+}
+
+PropertyConfigProto CreateTagQualifiedIdPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyTagQualifiedId)
+      .SetDataType(TYPE_STRING)
+      .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+      .SetCardinality(CARDINALITY_REQUIRED)
+      .Build();
+}
+
+PropertyConfigProto CreateGroupQualifiedIdPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyGroupQualifiedId)
+      .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+      .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+      .SetCardinality(CARDINALITY_REQUIRED)
+      .Build();
+}
+
+PropertyConfigProto CreateSuperTagQualifiedIdPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertySuperTagQualifiedId)
+      .SetDataType(TYPE_STRING)
+      .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, /*propagate_delete=*/true)
+      .SetCardinality(CARDINALITY_REQUIRED)
+      .Build();
+}
+
+PropertyConfigProto CreateNamePropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyName)
+      .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+      .SetCardinality(CARDINALITY_OPTIONAL)
+      .Build();
+}
+
+SchemaTypeConfigProto CreateEmailTypeConfig() {
+  return SchemaTypeConfigBuilder()
+      .SetType(kTypeEmail)
+      .AddProperty(CreateTagQualifiedIdPropertyConfig())
+      .AddProperty(CreateSubjectPropertyConfig())
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertyText)
+                       .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+                       .SetCardinality(CARDINALITY_OPTIONAL))
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertyAttachment)
+                       .SetDataType(TYPE_BYTES)
+                       .SetCardinality(CARDINALITY_REQUIRED))
+      .AddProperty(CreateSenderQualifiedIdPropertyConfig())
+      .AddProperty(CreateRecipientsPropertyConfig())
+      .AddProperty(CreateReceipientIdsPropertyConfig())
+      .AddProperty(CreateTimestampPropertyConfig())
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertyNonIndexableInteger)
+                       .SetDataType(TYPE_INT64)
+                       .SetCardinality(CARDINALITY_REQUIRED))
+      .Build();
+}
+
+SchemaTypeConfigProto CreateConversationTypeConfig() {
+  return SchemaTypeConfigBuilder()
+      .SetType(kTypeConversation)
+      .AddProperty(CreateSuperTagQualifiedIdPropertyConfig())
+      .AddProperty(CreateNamePropertyConfig())
+      .AddProperty(CreateGroupQualifiedIdPropertyConfig())
+      .AddProperty(
+          PropertyConfigBuilder()
+              .SetName(kPropertyEmails)
+              .SetDataTypeDocument(kTypeEmail, /*index_nested_properties=*/true)
+              .SetCardinality(CARDINALITY_REPEATED))
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertyNestedNonIndexable)
+                       .SetDataTypeDocument(kTypeEmail,
+                                            /*index_nested_properties=*/false)
+                       .SetCardinality(CARDINALITY_REPEATED))
+      .Build();
+}
+
+class SchemaTypeManagerTest : public ::testing::Test {
+ protected:
+  void SetUp() override { test_dir_ = GetTestTempDir() + "/icing"; }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  std::string test_dir_;
+};
+
+TEST_F(SchemaTypeManagerTest, Create) {
+  SchemaUtil::TypeConfigMap type_config_map;
+  type_config_map.emplace(kTypeEmail, CreateEmailTypeConfig());
+  type_config_map.emplace(kTypeConversation, CreateConversationTypeConfig());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put(kTypeEmail, kTypeEmailSchemaId));
+  ICING_ASSERT_OK(
+      schema_type_mapper->Put(kTypeConversation, kTypeConversationSchemaId));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
+
+  // Check SectionManager
+  // In the Email type, "recipientIds", "recipients", "senderQualifiedId",
+  // "subject" and "timestamp" are indexable properties. "attachment",
+  // "nonIndexableInteger", "tagQualifiedId" and "text" are non-indexable.
+  EXPECT_THAT(
+      schema_type_manager->section_manager().GetMetadataList(kTypeEmail),
+      IsOkAndHolds(Pointee(ElementsAre(
+          EqualsSectionMetadata(/*expected_id=*/0,
+                                /*expected_property_path=*/"recipientIds",
+                                CreateReceipientIdsPropertyConfig()),
+          EqualsSectionMetadata(/*expected_id=*/1,
+                                /*expected_property_path=*/"recipients",
+                                CreateRecipientsPropertyConfig()),
+          EqualsSectionMetadata(/*expected_id=*/2,
+                                /*expected_property_path=*/"senderQualifiedId",
+                                CreateSenderQualifiedIdPropertyConfig()),
+          EqualsSectionMetadata(/*expected_id=*/3,
+                                /*expected_property_path=*/"subject",
+                                CreateSubjectPropertyConfig()),
+          EqualsSectionMetadata(/*expected_id=*/4,
+                                /*expected_property_path=*/"timestamp",
+                                CreateTimestampPropertyConfig())))));
+
+  // In the Conversation type, "groupQualifiedId" and "name" are indexable
+  // properties as are the indexable properties of the email in the "emails"
+  // property. All properties of the email in the "nestedNonIndexable" property
+  // are not indexable.
+  EXPECT_THAT(
+      schema_type_manager->section_manager().GetMetadataList(kTypeConversation),
+      IsOkAndHolds(Pointee(ElementsAre(
+          EqualsSectionMetadata(
+              /*expected_id=*/0,
+              /*expected_property_path=*/"emails.recipientIds",
+              CreateReceipientIdsPropertyConfig()),
+          EqualsSectionMetadata(/*expected_id=*/1,
+                                /*expected_property_path=*/"emails.recipients",
+                                CreateRecipientsPropertyConfig()),
+          EqualsSectionMetadata(
+              /*expected_id=*/2,
+              /*expected_property_path=*/"emails.senderQualifiedId",
+              CreateSenderQualifiedIdPropertyConfig()),
+          EqualsSectionMetadata(/*expected_id=*/3,
+                                /*expected_property_path=*/"emails.subject",
+                                CreateSubjectPropertyConfig()),
+          EqualsSectionMetadata(/*expected_id=*/4,
+                                /*expected_property_path=*/"emails.timestamp",
+                                CreateTimestampPropertyConfig()),
+          EqualsSectionMetadata(/*expected_id=*/5,
+                                /*expected_property_path=*/"groupQualifiedId",
+                                CreateGroupQualifiedIdPropertyConfig()),
+          EqualsSectionMetadata(/*expected_id=*/6,
+                                /*expected_property_path=*/"name",
+                                CreateNamePropertyConfig())))));
+
+  // Check JoinablePropertyManager
+  // In the Email type, "senderQualifiedId" and "tagQualifiedId" are joinable
+  // properties.
+  EXPECT_THAT(
+      schema_type_manager->joinable_property_manager().GetMetadataList(
+          kTypeEmail),
+      IsOkAndHolds(Pointee(ElementsAre(
+          EqualsJoinablePropertyMetadata(
+              /*expected_id=*/0, /*expected_property_path=*/"senderQualifiedId",
+              CreateSenderQualifiedIdPropertyConfig()),
+          EqualsJoinablePropertyMetadata(
+              /*expected_id=*/1, /*expected_property_path=*/"tagQualifiedId",
+              CreateTagQualifiedIdPropertyConfig())))));
+  // In the Conversation type, "groupQualifiedId" and "superTagQualifiedId" are
+  // joinable properties as are the joinable properties of the email in the
+  // "emails" and "nestedNonIndexable" property.
+  EXPECT_THAT(
+      schema_type_manager->joinable_property_manager().GetMetadataList(
+          kTypeConversation),
+      IsOkAndHolds(Pointee(ElementsAre(
+          EqualsJoinablePropertyMetadata(
+              /*expected_id=*/0,
+              /*expected_property_path=*/"emails.senderQualifiedId",
+              CreateSenderQualifiedIdPropertyConfig()),
+          EqualsJoinablePropertyMetadata(
+              /*expected_id=*/1,
+              /*expected_property_path=*/"emails.tagQualifiedId",
+              CreateTagQualifiedIdPropertyConfig()),
+          EqualsJoinablePropertyMetadata(
+              /*expected_id=*/2, /*expected_property_path=*/"groupQualifiedId",
+              CreateGroupQualifiedIdPropertyConfig()),
+          EqualsJoinablePropertyMetadata(
+              /*expected_id=*/3,
+              /*expected_property_path=*/"nestedNonIndexable.senderQualifiedId",
+              CreateSenderQualifiedIdPropertyConfig()),
+          EqualsJoinablePropertyMetadata(
+              /*expected_id=*/4,
+              /*expected_property_path=*/"nestedNonIndexable.tagQualifiedId",
+              CreateTagQualifiedIdPropertyConfig()),
+          EqualsJoinablePropertyMetadata(
+              /*expected_id=*/5,
+              /*expected_property_path=*/"superTagQualifiedId",
+              CreateSuperTagQualifiedIdPropertyConfig())))));
+}
+
+TEST_F(SchemaTypeManagerTest, CreateWithNullPointerShouldFail) {
+  SchemaUtil::TypeConfigMap type_config_map;
+  EXPECT_THAT(SchemaTypeManager::Create(type_config_map,
+                                        /*schema_type_mapper=*/nullptr),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(SchemaTypeManagerTest, CreateWithSchemaNotInSchemaTypeMapperShouldFail) {
+  SchemaTypeConfigProto type_config;
+  type_config.set_schema_type("type");
+
+  auto property = type_config.add_properties();
+  property->set_property_name("property");
+  property->set_data_type(TYPE_STRING);
+  property->set_cardinality(CARDINALITY_REQUIRED);
+  property->mutable_string_indexing_config()->set_term_match_type(
+      TERM_MATCH_EXACT);
+
+  SchemaUtil::TypeConfigMap type_config_map;
+  type_config_map.emplace("type", type_config);
+
+  // Create an empty schema type mapper
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+
+  EXPECT_THAT(
+      SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index 7413d73..72287a8 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -14,19 +14,21 @@
 
 #include "icing/schema/schema-util.h"
 
+#include <algorithm>
 #include <cstdint>
+#include <queue>
 #include <string>
 #include <string_view>
 #include <unordered_map>
 #include <unordered_set>
 #include <utility>
+#include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/absl_ports/annotate.h"
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/absl_ports/str_cat.h"
 #include "icing/absl_ports/str_join.h"
-#include "icing/legacy/core/icing-string-util.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/util/logging.h"
@@ -37,6 +39,20 @@ namespace lib {
 
 namespace {
 
+bool ArePropertiesEqual(const PropertyConfigProto& old_property,
+                        const PropertyConfigProto& new_property) {
+  return old_property.property_name() == new_property.property_name() &&
+         old_property.data_type() == new_property.data_type() &&
+         old_property.schema_type() == new_property.schema_type() &&
+         old_property.cardinality() == new_property.cardinality() &&
+         old_property.string_indexing_config().term_match_type() ==
+             new_property.string_indexing_config().term_match_type() &&
+         old_property.string_indexing_config().tokenizer_type() ==
+             new_property.string_indexing_config().tokenizer_type() &&
+         old_property.document_indexing_config().index_nested_properties() ==
+             new_property.document_indexing_config().index_nested_properties();
+}
+
 bool IsCardinalityCompatible(const PropertyConfigProto& old_property,
                              const PropertyConfigProto& new_property) {
   if (old_property.cardinality() < new_property.cardinality()) {
@@ -87,39 +103,481 @@ bool IsPropertyCompatible(const PropertyConfigProto& old_property,
          IsCardinalityCompatible(old_property, new_property);
 }
 
-bool IsTermMatchTypeCompatible(const IndexingConfig& old_indexed,
-                               const IndexingConfig& new_indexed) {
+bool IsTermMatchTypeCompatible(const StringIndexingConfig& old_indexed,
+                               const StringIndexingConfig& new_indexed) {
   return old_indexed.term_match_type() == new_indexed.term_match_type() &&
          old_indexed.tokenizer_type() == new_indexed.tokenizer_type();
 }
 
+bool IsIntegerNumericMatchTypeCompatible(
+    const IntegerIndexingConfig& old_indexed,
+    const IntegerIndexingConfig& new_indexed) {
+  return old_indexed.numeric_match_type() == new_indexed.numeric_match_type();
+}
+
+bool IsDocumentIndexingCompatible(const DocumentIndexingConfig& old_indexed,
+                                  const DocumentIndexingConfig& new_indexed) {
+  // TODO(b/265304217): This could mark the new schema as incompatible and
+  // generate some unnecessary index rebuilds if the two schemas have an
+  // equivalent set of indexed properties, but changed the way that it is
+  // declared.
+  if (old_indexed.index_nested_properties() !=
+      new_indexed.index_nested_properties()) {
+    return false;
+  }
+
+  if (old_indexed.indexable_nested_properties_list().size() !=
+      new_indexed.indexable_nested_properties_list().size()) {
+    return false;
+  }
+
+  std::unordered_set<std::string_view> old_indexable_nested_properies_set(
+      old_indexed.indexable_nested_properties_list().begin(),
+      old_indexed.indexable_nested_properties_list().end());
+  for (const auto& property : new_indexed.indexable_nested_properties_list()) {
+    if (old_indexable_nested_properies_set.find(property) ==
+        old_indexable_nested_properies_set.end()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+void AddIncompatibleChangeToDelta(
+    std::unordered_set<std::string>& incompatible_delta,
+    const SchemaTypeConfigProto& old_type_config,
+    const SchemaUtil::DependentMap& new_schema_dependent_map,
+    const SchemaUtil::TypeConfigMap& old_type_config_map,
+    const SchemaUtil::TypeConfigMap& new_type_config_map) {
+  // If this type is incompatible, then every type that depends on it might
+  // also be incompatible. Use the dependent map to mark those ones as
+  // incompatible too.
+  incompatible_delta.insert(old_type_config.schema_type());
+  auto dependent_types_itr =
+      new_schema_dependent_map.find(old_type_config.schema_type());
+  if (dependent_types_itr != new_schema_dependent_map.end()) {
+    for (const auto& [dependent_type, _] : dependent_types_itr->second) {
+      // The types from new_schema that depend on the current
+      // old_type_config may not present in old_schema.
+      // Those types will be listed at schema_delta.schema_types_new
+      // instead.
+      std::string dependent_type_str(dependent_type);
+      if (old_type_config_map.find(dependent_type_str) !=
+          old_type_config_map.end()) {
+        incompatible_delta.insert(std::move(dependent_type_str));
+      }
+    }
+  }
+}
+
+// Returns if C1 <= C2 based on the following rule, where C1 and C2 are
+// cardinalities that can be one of REPEATED, OPTIONAL, or REQUIRED.
+//
+// Rule: REQUIRED < OPTIONAL < REPEATED
+bool CardinalityLessThanEq(PropertyConfigProto::Cardinality::Code C1,
+                           PropertyConfigProto::Cardinality::Code C2) {
+  if (C1 == C2) {
+    return true;
+  }
+  if (C1 == PropertyConfigProto::Cardinality::REQUIRED) {
+    return C2 == PropertyConfigProto::Cardinality::OPTIONAL ||
+           C2 == PropertyConfigProto::Cardinality::REPEATED;
+  }
+  if (C1 == PropertyConfigProto::Cardinality::OPTIONAL) {
+    return C2 == PropertyConfigProto::Cardinality::REPEATED;
+  }
+  return false;
+}
+
+// Check if set1 is a subset of set2.
+template <typename T>
+bool IsSubset(const std::unordered_set<T>& set1,
+              const std::unordered_set<T>& set2) {
+  for (const auto& item : set1) {
+    if (set2.find(item) == set2.end()) {
+      return false;
+    }
+  }
+  return true;
+}
+
 }  // namespace
 
-libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
-  // Tracks SchemaTypeConfigs that we've validated already.
-  std::unordered_set<std::string_view> known_schema_types;
+libtextclassifier3::Status CalculateTransitiveNestedTypeRelations(
+    const SchemaUtil::DependentMap& direct_nested_types_map,
+    const std::unordered_set<std::string_view>& joinable_types,
+    std::string_view type, bool path_contains_joinable_property,
+    SchemaUtil::DependentMap* expanded_nested_types_map,
+    std::unordered_map<std::string_view, bool>&&
+        pending_expansion_paths_indexable,
+    std::unordered_set<std::string_view>* sink_types) {
+  // TODO(b/280698121): Implement optimizations to this code to avoid reentering
+  // a node after it's already been expanded.
 
-  // Tracks SchemaTypeConfigs that have been mentioned (by other
-  // SchemaTypeConfigs), but we haven't validated yet.
-  std::unordered_set<std::string_view> unknown_schema_types;
+  auto itr = direct_nested_types_map.find(type);
+  if (itr == direct_nested_types_map.end()) {
+    // It's a sink node. Just return.
+    sink_types->insert(type);
+    return libtextclassifier3::Status::OK;
+  }
+  std::unordered_map<std::string_view, std::vector<const PropertyConfigProto*>>
+      expanded_relations;
+
+  // Add all of the adjacent outgoing relations.
+  expanded_relations.reserve(itr->second.size());
+  expanded_relations.insert(itr->second.begin(), itr->second.end());
+
+  // Iterate through each adjacent outgoing relation and add their indirect
+  // outgoing relations.
+  for (const auto& [adjacent_type, adjacent_property_protos] : itr->second) {
+    // Make a copy of pending_expansion_paths_indexable for every iteration.
+    std::unordered_map<std::string_view, bool> pending_expansion_paths_copy(
+        pending_expansion_paths_indexable);
+
+    // 1. Check the nested indexable config of the edge (type -> adjacent_type),
+    //    and the joinable config of the current path up to adjacent_type.
+    //
+    // The nested indexable config is true if any of the PropertyConfigProtos
+    // representing the connecting edge has index_nested_properties=true.
+    bool is_edge_nested_indexable = std::any_of(
+        adjacent_property_protos.begin(), adjacent_property_protos.end(),
+        [](const PropertyConfigProto* property_config) {
+          return property_config->document_indexing_config()
+              .index_nested_properties();
+        });
+    // TODO(b/265304217): change this once we add joinable_properties_list.
+    // Check if addition of the new edge (type->adjacent_type) makes the path
+    // joinable.
+    bool new_path_contains_joinable_property =
+        joinable_types.count(type) > 0 || path_contains_joinable_property;
+    // Set is_nested_indexable field for the current edge
+    pending_expansion_paths_copy[type] = is_edge_nested_indexable;
+
+    // If is_edge_nested_indexable=false, then all paths to adjacent_type
+    // currently in the pending_expansions map are also not nested indexable.
+    if (!is_edge_nested_indexable) {
+      for (auto& pending_expansion : pending_expansion_paths_copy) {
+        pending_expansion.second = false;
+      }
+    }
+
+    // 2. Check if we're in the middle of expanding this type - IOW
+    // there's a cycle!
+    //
+    // This cycle is not allowed if either:
+    //  1. The cycle starting at adjacent_type is nested indexable, OR
+    //  2. The current path contains a joinable property.
+    auto adjacent_itr = pending_expansion_paths_copy.find(adjacent_type);
+    if (adjacent_itr != pending_expansion_paths_copy.end()) {
+      if (adjacent_itr->second || new_path_contains_joinable_property) {
+        return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+            "Invalid cycle detected in type configs. '", type,
+            "' references itself and is nested-indexable or nested-joinable."));
+      }
+      // The cycle is allowed and there's no need to keep iterating the loop.
+      // Move on to the next adjacent value.
+      continue;
+    }
+
+    // 3. Expand this type as needed.
+    ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
+        direct_nested_types_map, joinable_types, adjacent_type,
+        new_path_contains_joinable_property, expanded_nested_types_map,
+        std::move(pending_expansion_paths_copy), sink_types));
+    if (sink_types->count(adjacent_type) > 0) {
+      // "adjacent" is a sink node. Just skip to the next.
+      continue;
+    }
+
+    // 4. "adjacent" has been fully expanded. Add all of its transitive
+    // outgoing relations to this type's transitive outgoing relations.
+    auto adjacent_expanded_itr = expanded_nested_types_map->find(adjacent_type);
+    for (const auto& [transitive_reachable, _] :
+         adjacent_expanded_itr->second) {
+      // Insert a transitive reachable node `transitive_reachable` for `type` if
+      // it wasn't previously reachable.
+      // Since there is no direct edge between `type` and `transitive_reachable`
+      // we insert an empty vector into the dependent map.
+      expanded_relations.insert({transitive_reachable, {}});
+    }
+  }
+  for (const auto& kvp : expanded_relations) {
+    expanded_nested_types_map->operator[](type).insert(kvp);
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+libtextclassifier3::Status CalculateAcyclicTransitiveRelations(
+    const SchemaUtil::TypeRelationMap<T>& direct_relation_map,
+    std::string_view type,
+    SchemaUtil::TypeRelationMap<T>* expanded_relation_map,
+    std::unordered_set<std::string_view>* pending_expansions,
+    std::unordered_set<std::string_view>* sink_types) {
+  auto expanded_itr = expanded_relation_map->find(type);
+  if (expanded_itr != expanded_relation_map->end()) {
+    // We've already expanded this type. Just return.
+    return libtextclassifier3::Status::OK;
+  }
+  auto itr = direct_relation_map.find(type);
+  if (itr == direct_relation_map.end()) {
+    // It's a sink node. Just return.
+    sink_types->insert(type);
+    return libtextclassifier3::Status::OK;
+  }
+  pending_expansions->insert(type);
+  std::unordered_map<std::string_view, T> expanded_relations;
+
+  // Add all of the adjacent outgoing relations.
+  expanded_relations.reserve(itr->second.size());
+  expanded_relations.insert(itr->second.begin(), itr->second.end());
+
+  // Iterate through each adjacent outgoing relation and add their indirect
+  // outgoing relations.
+  for (const auto& [adjacent, _] : itr->second) {
+    // 1. Check if we're in the middle of expanding this type - IOW there's a
+    // cycle!
+    if (pending_expansions->count(adjacent) > 0) {
+      return absl_ports::InvalidArgumentError(
+          absl_ports::StrCat("Invalid cycle detected in type configs. '", type,
+                             "' references or inherits from itself."));
+    }
+
+    // 2. Expand this type as needed.
+    ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
+        direct_relation_map, adjacent, expanded_relation_map,
+        pending_expansions, sink_types));
+    if (sink_types->count(adjacent) > 0) {
+      // "adjacent" is a sink node. Just skip to the next.
+      continue;
+    }
+
+    // 3. "adjacent" has been fully expanded. Add all of its transitive outgoing
+    // relations to this type's transitive outgoing relations.
+    auto adjacent_expanded_itr = expanded_relation_map->find(adjacent);
+    for (const auto& [transitive_reachable, _] :
+         adjacent_expanded_itr->second) {
+      // Insert a transitive reachable node `transitive_reachable` for `type`.
+      // Also since there is no direct edge between `type` and
+      // `transitive_reachable`, the direct edge is initialized by default.
+      expanded_relations.insert({transitive_reachable, T()});
+    }
+  }
+  expanded_relation_map->insert({type, std::move(expanded_relations)});
+  pending_expansions->erase(type);
+  return libtextclassifier3::Status::OK;
+}
+
+// Calculate and return the expanded nested-type map from
+// direct_nested_type_map. This expands the direct_nested_type_map to also
+// include indirect nested-type relations.
+//
+// Ex. Suppose we have the following relations in direct_nested_type_map.
+//
+// C -> B (Schema type B has a document property of type C)
+// B -> A (Schema type A has a document property of type B)
+//
+// Then, this function would expand the map by adding C -> A to the map.
+libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
+CalculateTransitiveNestedTypeRelations(
+    const SchemaUtil::DependentMap& direct_nested_type_map,
+    const std::unordered_set<std::string_view>& joinable_types,
+    bool allow_circular_schema_definitions) {
+  SchemaUtil::DependentMap expanded_nested_type_map;
+  // Types that have no outgoing relations.
+  std::unordered_set<std::string_view> sink_types;
+
+  if (allow_circular_schema_definitions) {
+    // Map of nodes that are pending expansion -> whether the path from each key
+    // node to the 'current' node is nested_indexable.
+    // A copy of this map is made for each new node that we expand.
+    std::unordered_map<std::string_view, bool>
+        pending_expansion_paths_indexable;
+    for (const auto& kvp : direct_nested_type_map) {
+      ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
+          direct_nested_type_map, joinable_types, kvp.first,
+          /*path_contains_joinable_property=*/false, &expanded_nested_type_map,
+          std::unordered_map<std::string_view, bool>(
+              pending_expansion_paths_indexable),
+          &sink_types));
+    }
+  } else {
+    // If allow_circular_schema_definitions is false, then fallback to the old
+    // way of detecting cycles.
+    // Types that we are expanding.
+    std::unordered_set<std::string_view> pending_expansions;
+    for (const auto& kvp : direct_nested_type_map) {
+      ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
+          direct_nested_type_map, kvp.first, &expanded_nested_type_map,
+          &pending_expansions, &sink_types));
+    }
+  }
+  return expanded_nested_type_map;
+}
+
+// Calculate and return the expanded inheritance map from
+// direct_nested_type_map. This expands the direct_inheritance_map to also
+// include indirect inheritance relations.
+//
+// Ex. Suppose we have the following relations in direct_inheritance_map.
+//
+// C -> B (Schema type C is B's parent_type )
+// B -> A (Schema type B is A's parent_type)
+//
+// Then, this function would expand the map by adding C -> A to the map.
+libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
+CalculateTransitiveInheritanceRelations(
+    const SchemaUtil::InheritanceMap& direct_inheritance_map) {
+  SchemaUtil::InheritanceMap expanded_inheritance_map;
+
+  // Types that we are expanding.
+  std::unordered_set<std::string_view> pending_expansions;
+
+  // Types that have no outgoing relation.
+  std::unordered_set<std::string_view> sink_types;
+  for (const auto& kvp : direct_inheritance_map) {
+    ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
+        direct_inheritance_map, kvp.first, &expanded_inheritance_map,
+        &pending_expansions, &sink_types));
+  }
+  return expanded_inheritance_map;
+}
+
+// Builds a transitive dependent map. Types with no dependents will not be
+// present in the map as keys.
+//
+// Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
+// type B and B has a property of type C. C and D only have non-document
+// properties.
+//
+// The transitive dependent map for this schema would be:
+// C -> A, B (both A and B depend on C)
+// B -> A (A depends on B)
+//
+// A and D will not be present in the map as keys because no type depends on
+// them.
+//
+// RETURNS:
+//   On success, a transitive dependent map of all types in the schema.
+//   INVALID_ARGUMENT if the schema contains a cycle or an undefined type.
+//   ALREADY_EXISTS if a schema type is specified more than once in the schema
+libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
+BuildTransitiveDependentGraph(const SchemaProto& schema,
+                              bool allow_circular_schema_definitions) {
+  // We expand the nested-type dependent map and inheritance map differently
+  // when calculating transitive relations. These two types of relations also
+  // should not be transitive so we keep these as separate maps.
+  //
+  // e.g. For schema type A, B and C, B depends on A through inheritance, and
+  // C depends on B by having a property with type B, we will have the two
+  // relations {A, B} and {B, C} in the dependent map, but will not have {A, C}
+  // in the map.
+  SchemaUtil::DependentMap direct_nested_type_map;
+  SchemaUtil::InheritanceMap direct_inheritance_map;
+
+  // Set of schema types that have at least one joinable property.
+  std::unordered_set<std::string_view> joinable_types;
+
+  // Add all first-order dependents.
+  std::unordered_set<std::string_view> known_types;
+  std::unordered_set<std::string_view> unknown_types;
+  for (const auto& type_config : schema.types()) {
+    std::string_view schema_type(type_config.schema_type());
+    if (known_types.count(schema_type) > 0) {
+      return absl_ports::AlreadyExistsError(absl_ports::StrCat(
+          "Field 'schema_type' '", schema_type, "' is already defined"));
+    }
+    known_types.insert(schema_type);
+    unknown_types.erase(schema_type);
+    // Insert inheritance relations into the inheritance map.
+    for (std::string_view parent_schema_type : type_config.parent_types()) {
+      if (known_types.count(parent_schema_type) == 0) {
+        unknown_types.insert(parent_schema_type);
+      }
+      direct_inheritance_map[parent_schema_type][schema_type] = true;
+    }
+    for (const auto& property_config : type_config.properties()) {
+      if (property_config.joinable_config().value_type() !=
+          JoinableConfig::ValueType::NONE) {
+        joinable_types.insert(schema_type);
+      }
+      // Insert nested-type relations into the nested-type map.
+      if (property_config.data_type() ==
+          PropertyConfigProto::DataType::DOCUMENT) {
+        // Need to know what schema_type these Document properties should be
+        // validated against
+        std::string_view property_schema_type(property_config.schema_type());
+        if (known_types.count(property_schema_type) == 0) {
+          unknown_types.insert(property_schema_type);
+        }
+        direct_nested_type_map[property_schema_type][schema_type].push_back(
+            &property_config);
+      }
+    }
+  }
+  if (!unknown_types.empty()) {
+    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+        "Undefined 'schema_type's: ", absl_ports::StrJoin(unknown_types, ",")));
+  }
+
+  // Merge two expanded maps into a single dependent_map, without making
+  // inheritance and nested-type relations transitive.
+  ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap merged_dependent_map,
+                         CalculateTransitiveNestedTypeRelations(
+                             direct_nested_type_map, joinable_types,
+                             allow_circular_schema_definitions));
+  ICING_ASSIGN_OR_RETURN(
+      SchemaUtil::InheritanceMap expanded_inheritance_map,
+      CalculateTransitiveInheritanceRelations(direct_inheritance_map));
+  for (const auto& [parent_type, inheritance_relation] :
+       expanded_inheritance_map) {
+    // Insert the parent_type into the dependent map if it is not present
+    // already.
+    merged_dependent_map.insert({parent_type, {}});
+    for (const auto& [child_type, _] : inheritance_relation) {
+      // Insert the child_type into parent_type's dependent map if it's not
+      // present already, in which case the value will be an empty vector.
+      merged_dependent_map[parent_type].insert({child_type, {}});
+    }
+  }
+  return merged_dependent_map;
+}
+
+libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
+SchemaUtil::BuildTransitiveInheritanceGraph(const SchemaProto& schema) {
+  SchemaUtil::InheritanceMap direct_inheritance_map;
+  for (const auto& type_config : schema.types()) {
+    for (std::string_view parent_schema_type : type_config.parent_types()) {
+      direct_inheritance_map[parent_schema_type][type_config.schema_type()] =
+          true;
+    }
+  }
+  return CalculateTransitiveInheritanceRelations(direct_inheritance_map);
+}
+
+libtextclassifier3::StatusOr<SchemaUtil::DependentMap> SchemaUtil::Validate(
+    const SchemaProto& schema, bool allow_circular_schema_definitions) {
+  // 1. Build the dependent map. This will detect any cycles, non-existent or
+  // duplicate types in the schema.
+  ICING_ASSIGN_OR_RETURN(
+      SchemaUtil::DependentMap dependent_map,
+      BuildTransitiveDependentGraph(schema, allow_circular_schema_definitions));
 
   // Tracks PropertyConfigs within a SchemaTypeConfig that we've validated
   // already.
   std::unordered_set<std::string_view> known_property_names;
 
+  // Tracks PropertyConfigs containing joinable properties.
+  std::unordered_set<std::string_view> schema_types_with_joinable_property;
+
+  // 2. Validate the properties of each type.
   for (const auto& type_config : schema.types()) {
     std::string_view schema_type(type_config.schema_type());
     ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type));
 
-    // We can't have duplicate schema_types
-    if (!known_schema_types.insert(schema_type).second) {
-      return absl_ports::AlreadyExistsError(absl_ports::StrCat(
-          "Field 'schema_type' '", schema_type, "' is already defined"));
-    }
-    unknown_schema_types.erase(schema_type);
-
     // We only care about properties being unique within one type_config
     known_property_names.clear();
+
     for (const auto& property_config : type_config.properties()) {
       std::string_view property_name(property_config.property_name());
       ICING_RETURN_IF_ERROR(ValidatePropertyName(property_name, schema_type));
@@ -146,32 +604,78 @@ libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
               validated_status,
               absl_ports::StrCat("Field 'schema_type' is required for DOCUMENT "
                                  "data_types in schema property '",
-                                 schema_type, " ", property_name, "'"));
+                                 schema_type, ".", property_name, "'"));
         }
 
-        // Need to make sure we eventually see/validate this schema_type
-        if (known_schema_types.count(property_schema_type) == 0) {
-          unknown_schema_types.insert(property_schema_type);
-        }
+        ICING_RETURN_IF_ERROR(ValidateDocumentIndexingConfig(
+            property_config.document_indexing_config(), schema_type,
+            property_name));
       }
 
       ICING_RETURN_IF_ERROR(ValidateCardinality(property_config.cardinality(),
                                                 schema_type, property_name));
 
-      ICING_RETURN_IF_ERROR(
-          ValidateIndexingConfig(property_config.indexing_config(), data_type));
+      if (data_type == PropertyConfigProto::DataType::STRING) {
+        ICING_RETURN_IF_ERROR(ValidateStringIndexingConfig(
+            property_config.string_indexing_config(), data_type, schema_type,
+            property_name));
+      }
+
+      ICING_RETURN_IF_ERROR(ValidateJoinableConfig(
+          property_config.joinable_config(), data_type,
+          property_config.cardinality(), schema_type, property_name));
+      if (property_config.joinable_config().value_type() !=
+          JoinableConfig::ValueType::NONE) {
+        schema_types_with_joinable_property.insert(schema_type);
+      }
     }
   }
 
-  // An Document property claimed to be of a schema_type that we never
-  // saw/validated
-  if (!unknown_schema_types.empty()) {
-    return absl_ports::UnknownError(
-        absl_ports::StrCat("Undefined 'schema_type's: ",
-                           absl_ports::StrJoin(unknown_schema_types, ",")));
+  // BFS traverse the dependent graph to make sure that no nested levels
+  // (properties with DOCUMENT data type) have REPEATED cardinality while
+  // depending on schema types with joinable property.
+  std::queue<std::string_view> frontier;
+  for (const auto& schema_type : schema_types_with_joinable_property) {
+    frontier.push(schema_type);
   }
+  std::unordered_set<std::string_view> traversed =
+      std::move(schema_types_with_joinable_property);
+  while (!frontier.empty()) {
+    std::string_view schema_type = frontier.front();
+    frontier.pop();
 
-  return libtextclassifier3::Status::OK;
+    const auto it = dependent_map.find(schema_type);
+    if (it == dependent_map.end()) {
+      continue;
+    }
+
+    // Check every type that has a property of type schema_type.
+    for (const auto& [next_schema_type, property_configs] : it->second) {
+      // Check all properties in "next_schema_type" that are of type
+      // "schema_type".
+      for (const PropertyConfigProto* property_config : property_configs) {
+        if (property_config != nullptr &&
+            property_config->cardinality() ==
+                PropertyConfigProto::Cardinality::REPEATED) {
+          return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+              "Schema type '", next_schema_type,
+              "' cannot have REPEATED nested document property '",
+              property_config->property_name(),
+              "' while connecting to some joinable properties"));
+        }
+      }
+
+      if (traversed.count(next_schema_type) == 0) {
+        traversed.insert(next_schema_type);
+        frontier.push(next_schema_type);
+      }
+    }
+  }
+
+  // Verify that every child type's property set has included all compatible
+  // properties from parent types.
+  ICING_RETURN_IF_ERROR(ValidateInheritedProperties(schema));
+  return dependent_map;
 }
 
 libtextclassifier3::Status SchemaUtil::ValidateSchemaType(
@@ -214,7 +718,7 @@ libtextclassifier3::Status SchemaUtil::ValidateDataType(
   if (data_type == PropertyConfigProto::DataType::UNKNOWN) {
     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
         "Field 'data_type' cannot be UNKNOWN for schema property '",
-        schema_type, " ", property_name, "'"));
+        schema_type, ".", property_name, "'"));
   }
 
   return libtextclassifier3::Status::OK;
@@ -228,22 +732,196 @@ libtextclassifier3::Status SchemaUtil::ValidateCardinality(
   if (cardinality == PropertyConfigProto::Cardinality::UNKNOWN) {
     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
         "Field 'cardinality' cannot be UNKNOWN for schema property '",
-        schema_type, " ", property_name, "'"));
+        schema_type, ".", property_name, "'"));
   }
 
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status SchemaUtil::ValidateIndexingConfig(
-    const IndexingConfig& config,
-    PropertyConfigProto::DataType::Code data_type) {
-  if (data_type == PropertyConfigProto::DataType::DOCUMENT) {
-    return libtextclassifier3::Status::OK;
+libtextclassifier3::Status SchemaUtil::ValidateStringIndexingConfig(
+    const StringIndexingConfig& config,
+    PropertyConfigProto::DataType::Code data_type, std::string_view schema_type,
+    std::string_view property_name) {
+  if (config.term_match_type() == TermMatchType::UNKNOWN &&
+      config.tokenizer_type() != StringIndexingConfig::TokenizerType::NONE) {
+    // They set a tokenizer type, but no term match type.
+    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+        "Indexed string property '", schema_type, ".", property_name,
+        "' cannot have a term match type UNKNOWN"));
   }
+
   if (config.term_match_type() != TermMatchType::UNKNOWN &&
-      config.tokenizer_type() == IndexingConfig::TokenizerType::NONE) {
+      config.tokenizer_type() == StringIndexingConfig::TokenizerType::NONE) {
+    // They set a term match type, but no tokenizer type
     return absl_ports::InvalidArgumentError(
-        "TermMatchType properties cannot have a tokenizer type of NONE");
+        absl_ports::StrCat("Indexed string property '", property_name,
+                           "' cannot have a tokenizer type of NONE"));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status SchemaUtil::ValidateJoinableConfig(
+    const JoinableConfig& config, PropertyConfigProto::DataType::Code data_type,
+    PropertyConfigProto::Cardinality::Code cardinality,
+    std::string_view schema_type, std::string_view property_name) {
+  if (config.value_type() == JoinableConfig::ValueType::QUALIFIED_ID) {
+    if (data_type != PropertyConfigProto::DataType::STRING) {
+      return absl_ports::InvalidArgumentError(
+          absl_ports::StrCat("Qualified id joinable property '", property_name,
+                             "' is required to have STRING data type"));
+    }
+
+    if (cardinality == PropertyConfigProto::Cardinality::REPEATED) {
+      return absl_ports::InvalidArgumentError(
+          absl_ports::StrCat("Qualified id joinable property '", property_name,
+                             "' cannot have REPEATED cardinality"));
+    }
+  }
+
+  if (config.propagate_delete() &&
+      config.value_type() != JoinableConfig::ValueType::QUALIFIED_ID) {
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("Field 'property_name' '", property_name,
+                           "' is required to have QUALIFIED_ID joinable "
+                           "value type with delete propagation enabled"));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status SchemaUtil::ValidateDocumentIndexingConfig(
+    const DocumentIndexingConfig& config, std::string_view schema_type,
+    std::string_view property_name) {
+  if (!config.indexable_nested_properties_list().empty() &&
+      config.index_nested_properties()) {
+    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+        "DocumentIndexingConfig.index_nested_properties is required to be "
+        "false when providing a non-empty indexable_nested_properties_list "
+        "for property '",
+        schema_type, ".", property_name, "'"));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+/* static */ bool SchemaUtil::IsIndexedProperty(
+    const PropertyConfigProto& property_config) {
+  switch (property_config.data_type()) {
+    case PropertyConfigProto::DataType::STRING:
+      return property_config.string_indexing_config().term_match_type() !=
+                 TermMatchType::UNKNOWN &&
+             property_config.string_indexing_config().tokenizer_type() !=
+                 StringIndexingConfig::TokenizerType::NONE;
+    case PropertyConfigProto::DataType::INT64:
+      return property_config.integer_indexing_config().numeric_match_type() !=
+             IntegerIndexingConfig::NumericMatchType::UNKNOWN;
+    case PropertyConfigProto::DataType::DOCUMENT:
+      // A document property is considered indexed if it has
+      // index_nested_properties=true, or a non-empty
+      // indexable_nested_properties_list.
+      return property_config.document_indexing_config()
+                 .index_nested_properties() ||
+             !property_config.document_indexing_config()
+                  .indexable_nested_properties_list()
+                  .empty();
+    case PropertyConfigProto::DataType::UNKNOWN:
+    case PropertyConfigProto::DataType::DOUBLE:
+    case PropertyConfigProto::DataType::BOOLEAN:
+    case PropertyConfigProto::DataType::BYTES:
+      return false;
+  }
+}
+
+bool SchemaUtil::IsParent(const SchemaUtil::InheritanceMap& inheritance_map,
+                          std::string_view parent_type,
+                          std::string_view child_type) {
+  auto iter = inheritance_map.find(parent_type);
+  if (iter == inheritance_map.end()) {
+    return false;
+  }
+  return iter->second.count(child_type) > 0;
+}
+
+bool SchemaUtil::IsInheritedPropertyCompatible(
+    const SchemaUtil::InheritanceMap& inheritance_map,
+    const PropertyConfigProto& child_property_config,
+    const PropertyConfigProto& parent_property_config) {
+  // Check if child_property_config->cardinality() <=
+  // parent_property_config->cardinality().
+  // Subtype may require a stricter cardinality, but cannot loosen cardinality
+  // requirements.
+  if (!CardinalityLessThanEq(child_property_config.cardinality(),
+                             parent_property_config.cardinality())) {
+    return false;
+  }
+
+  // Now we can assume T1 and T2 are not nullptr, and cardinality check passes.
+  if (child_property_config.data_type() !=
+          PropertyConfigProto::DataType::DOCUMENT ||
+      parent_property_config.data_type() !=
+          PropertyConfigProto::DataType::DOCUMENT) {
+    return child_property_config.data_type() ==
+           parent_property_config.data_type();
+  }
+
+  // Now we can assume T1 and T2 are both document type.
+  return child_property_config.schema_type() ==
+             parent_property_config.schema_type() ||
+         IsParent(inheritance_map, parent_property_config.schema_type(),
+                  child_property_config.schema_type());
+}
+
+libtextclassifier3::Status SchemaUtil::ValidateInheritedProperties(
+    const SchemaProto& schema) {
+  // Create a inheritance map
+  ICING_ASSIGN_OR_RETURN(SchemaUtil::InheritanceMap inheritance_map,
+                         BuildTransitiveInheritanceGraph(schema));
+
+  // Create a map that maps from type name to property names, and then from
+  // property names to PropertyConfigProto.
+  std::unordered_map<
+      std::string, std::unordered_map<std::string, const PropertyConfigProto*>>
+      property_map;
+  for (const SchemaTypeConfigProto& type_config : schema.types()) {
+    // Skipping building entries for types without any child or parent, since
+    // such entry will never be used.
+    if (type_config.parent_types().empty() &&
+        inheritance_map.count(type_config.schema_type()) == 0) {
+      continue;
+    }
+    auto& curr_property_map = property_map[type_config.schema_type()];
+    for (const PropertyConfigProto& property_config :
+         type_config.properties()) {
+      curr_property_map[property_config.property_name()] = &property_config;
+    }
+  }
+
+  // Validate child properties.
+  for (const SchemaTypeConfigProto& type_config : schema.types()) {
+    const std::string& child_type_name = type_config.schema_type();
+    auto& child_property_map = property_map[child_type_name];
+
+    for (const std::string& parent_type_name : type_config.parent_types()) {
+      auto& parent_property_map = property_map[parent_type_name];
+
+      for (const auto& [property_name, parent_property_config] :
+           parent_property_map) {
+        auto child_property_iter = child_property_map.find(property_name);
+        if (child_property_iter == child_property_map.end()) {
+          return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+              "Property ", property_name, " is not present in child type ",
+              child_type_name, ", but it is defined in the parent type ",
+              parent_type_name, "."));
+        }
+        if (!IsInheritedPropertyCompatible(inheritance_map,
+                                           *child_property_iter->second,
+                                           *parent_property_config)) {
+          return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+              "Property ", property_name, " from child type ", child_type_name,
+              " is not compatible to the parent type ", parent_type_name, "."));
+        }
+      }
+    }
   }
   return libtextclassifier3::Status::OK;
 }
@@ -260,21 +938,35 @@ SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
     const SchemaTypeConfigProto& type_config) {
   ParsedPropertyConfigs parsed_property_configs;
 
-  // TODO(samzheng): consider caching property_config_map for some properties,
+  // TODO(cassiewang): consider caching property_config_map for some properties,
   // e.g. using LRU cache. Or changing schema.proto to use go/protomap.
   for (const PropertyConfigProto& property_config : type_config.properties()) {
-    parsed_property_configs.property_config_map.emplace(
-        property_config.property_name(), &property_config);
+    std::string_view property_name = property_config.property_name();
+    parsed_property_configs.property_config_map.emplace(property_name,
+                                                        &property_config);
     if (property_config.cardinality() ==
         PropertyConfigProto::Cardinality::REQUIRED) {
-      parsed_property_configs.num_required_properties++;
+      parsed_property_configs.required_properties.insert(property_name);
     }
 
     // A non-default term_match_type indicates that this property is meant to be
     // indexed.
-    if (property_config.indexing_config().term_match_type() !=
-        TermMatchType::UNKNOWN) {
-      parsed_property_configs.num_indexed_properties++;
+    if (IsIndexedProperty(property_config)) {
+      parsed_property_configs.indexed_properties.insert(property_name);
+    }
+
+    // A non-default value_type indicates that this property is meant to be
+    // joinable.
+    if (property_config.joinable_config().value_type() !=
+        JoinableConfig::ValueType::NONE) {
+      parsed_property_configs.joinable_properties.insert(property_name);
+    }
+
+    // Also keep track of how many nested document properties there are. Adding
+    // new nested document properties will result in join-index rebuild.
+    if (property_config.data_type() ==
+        PropertyConfigProto::DataType::DOCUMENT) {
+      parsed_property_configs.nested_document_properties.insert(property_name);
     }
   }
 
@@ -282,11 +974,12 @@ SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
 }
 
 const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
-    const SchemaProto& old_schema, const SchemaProto& new_schema) {
+    const SchemaProto& old_schema, const SchemaProto& new_schema,
+    const DependentMap& new_schema_dependent_map) {
   SchemaDelta schema_delta;
-  schema_delta.index_incompatible = false;
 
-  TypeConfigMap new_type_config_map;
+  TypeConfigMap old_type_config_map, new_type_config_map;
+  BuildTypeConfigMap(old_schema, &old_type_config_map);
   BuildTypeConfigMap(new_schema, &new_type_config_map);
 
   // Iterate through and check each field of the old schema
@@ -297,9 +990,9 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
     if (new_schema_type_and_config == new_type_config_map.end()) {
       // Didn't find the old schema type in the new schema, all the old
       // documents of this schema type are invalid without the schema
-      ICING_VLOG(1) << absl_ports::StrCat("Previously defined schema type ",
+      ICING_VLOG(1) << absl_ports::StrCat("Previously defined schema type '",
                                           old_type_config.schema_type(),
-                                          " was not defined in new schema");
+                                          "' was not defined in new schema");
       schema_delta.schema_types_deleted.insert(old_type_config.schema_type());
       continue;
     }
@@ -310,9 +1003,48 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
     // We only need to check the old, existing properties to see if they're
     // compatible since we'll have old data that may be invalidated or need to
     // be reindexed.
-    int32_t old_required_properties = 0;
-    int32_t old_indexed_properties = 0;
+    std::unordered_set<std::string_view> old_required_properties;
+    std::unordered_set<std::string_view> old_indexed_properties;
+    std::unordered_set<std::string_view> old_joinable_properties;
+    std::unordered_set<std::string_view> old_nested_document_properties;
+
+    // If there is a different number of properties, then there must have been a
+    // change.
+    bool has_property_changed =
+        old_type_config.properties_size() !=
+        new_schema_type_and_config->second.properties_size();
+    bool is_incompatible = false;
+    bool is_index_incompatible = false;
+    bool is_join_incompatible = false;
     for (const auto& old_property_config : old_type_config.properties()) {
+      std::string_view property_name = old_property_config.property_name();
+      if (old_property_config.cardinality() ==
+          PropertyConfigProto::Cardinality::REQUIRED) {
+        old_required_properties.insert(property_name);
+      }
+
+      // A non-default term_match_type indicates that this property is meant to
+      // be indexed.
+      bool is_indexed_property = IsIndexedProperty(old_property_config);
+      if (is_indexed_property) {
+        old_indexed_properties.insert(property_name);
+      }
+
+      bool is_joinable_property =
+          old_property_config.joinable_config().value_type() !=
+          JoinableConfig::ValueType::NONE;
+      if (is_joinable_property) {
+        old_joinable_properties.insert(property_name);
+      }
+
+      // A nested-document property is a property of DataType::DOCUMENT.
+      bool is_nested_document_property =
+          old_property_config.data_type() ==
+          PropertyConfigProto::DataType::DOCUMENT;
+      if (is_nested_document_property) {
+        old_nested_document_properties.insert(property_name);
+      }
+
       auto new_property_name_and_config =
           new_parsed_property_configs.property_config_map.find(
               old_property_config.property_name());
@@ -320,42 +1052,48 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
       if (new_property_name_and_config ==
           new_parsed_property_configs.property_config_map.end()) {
         // Didn't find the old property
-        ICING_VLOG(1) << absl_ports::StrCat("Previously defined property type ",
-                                            old_type_config.schema_type(), ".",
-                                            old_property_config.property_name(),
-                                            " was not defined in new schema");
-        schema_delta.schema_types_incompatible.insert(
-            old_type_config.schema_type());
+        ICING_VLOG(1) << absl_ports::StrCat(
+            "Previously defined property type '", old_type_config.schema_type(),
+            ".", old_property_config.property_name(),
+            "' was not defined in new schema");
+        is_incompatible = true;
+        is_index_incompatible |= is_indexed_property;
+        is_join_incompatible |=
+            is_joinable_property || is_nested_document_property;
         continue;
       }
 
       const PropertyConfigProto* new_property_config =
           new_property_name_and_config->second;
+      if (!has_property_changed &&
+          !ArePropertiesEqual(old_property_config, *new_property_config)) {
+        // Finally found a property that changed.
+        has_property_changed = true;
+      }
 
       if (!IsPropertyCompatible(old_property_config, *new_property_config)) {
         ICING_VLOG(1) << absl_ports::StrCat(
-            "Property ", old_type_config.schema_type(), ".",
-            old_property_config.property_name(), " is incompatible.");
-        schema_delta.schema_types_incompatible.insert(
-            old_type_config.schema_type());
+            "Property '", old_type_config.schema_type(), ".",
+            old_property_config.property_name(), "' is incompatible.");
+        is_incompatible = true;
       }
 
-      if (old_property_config.cardinality() ==
-          PropertyConfigProto::Cardinality::REQUIRED) {
-        ++old_required_properties;
-      }
-
-      // A non-default term_match_type indicates that this property is meant to
-      // be indexed.
-      if (old_property_config.indexing_config().term_match_type() !=
-          TermMatchType::UNKNOWN) {
-        ++old_indexed_properties;
+      // Any change in the indexed property requires a reindexing
+      if (!IsTermMatchTypeCompatible(
+              old_property_config.string_indexing_config(),
+              new_property_config->string_indexing_config()) ||
+          !IsIntegerNumericMatchTypeCompatible(
+              old_property_config.integer_indexing_config(),
+              new_property_config->integer_indexing_config()) ||
+          !IsDocumentIndexingCompatible(
+              old_property_config.document_indexing_config(),
+              new_property_config->document_indexing_config())) {
+        is_index_incompatible = true;
       }
 
-      // Any change in the indexed property requires a reindexing
-      if (!IsTermMatchTypeCompatible(old_property_config.indexing_config(),
-                                     new_property_config->indexing_config())) {
-        schema_delta.index_incompatible = true;
+      if (old_property_config.joinable_config().value_type() !=
+          new_property_config->joinable_config().value_type()) {
+        is_join_incompatible = true;
       }
     }
 
@@ -364,27 +1102,79 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
     // guaranteed from our previous checks that all the old properties are also
     // present in the new property config, so we can do a simple int comparison
     // here to detect new required properties.
-    if (new_parsed_property_configs.num_required_properties >
-        old_required_properties) {
+    if (!IsSubset(new_parsed_property_configs.required_properties,
+                  old_required_properties)) {
       ICING_VLOG(1) << absl_ports::StrCat(
-          "New schema ", old_type_config.schema_type(),
-          " has REQUIRED properties that are not "
+          "New schema '", old_type_config.schema_type(),
+          "' has REQUIRED properties that are not "
           "present in the previously defined schema");
-      schema_delta.schema_types_incompatible.insert(
-          old_type_config.schema_type());
+      is_incompatible = true;
     }
 
-    // If we've gained any new indexed properties, then the section ids may
-    // change. Since the section ids are stored in the index, we'll need to
+    // If we've gained any new indexed properties (this includes gaining new
+    // indexed nested document properties), then the section ids may change.
+    // Since the section ids are stored in the index, we'll need to
     // reindex everything.
-    if (new_parsed_property_configs.num_indexed_properties >
-        old_indexed_properties) {
-      ICING_VLOG(1) << absl_ports::StrCat(
-          "Set of indexed properties in schema type '",
-          old_type_config.schema_type(),
-          "' has  changed, required reindexing.");
-      schema_delta.index_incompatible = true;
+    if (!IsSubset(new_parsed_property_configs.indexed_properties,
+                  old_indexed_properties)) {
+      ICING_VLOG(1) << "Set of indexed properties in schema type '"
+                    << old_type_config.schema_type()
+                    << "' has changed, required reindexing.";
+      is_index_incompatible = true;
+    }
+
+    // If we've gained any new joinable properties, then the joinable property
+    // ids may change. Since the joinable property ids are stored in the cache,
+    // we'll need to reconstruct join index.
+    // If we've gained any new nested document properties, we also rebuild the
+    // join index. This is because we index all nested joinable properties, so
+    // adding a nested document property will most probably result in having
+    // more joinable properties.
+    if (!IsSubset(new_parsed_property_configs.joinable_properties,
+                  old_joinable_properties) ||
+        !IsSubset(new_parsed_property_configs.nested_document_properties,
+                  old_nested_document_properties)) {
+      ICING_VLOG(1) << "Set of joinable properties in schema type '"
+                    << old_type_config.schema_type()
+                    << "' has changed, required reconstructing joinable cache.";
+      is_join_incompatible = true;
+    }
+
+    if (is_incompatible) {
+      AddIncompatibleChangeToDelta(schema_delta.schema_types_incompatible,
+                                   old_type_config, new_schema_dependent_map,
+                                   old_type_config_map, new_type_config_map);
+    }
+
+    if (is_index_incompatible) {
+      AddIncompatibleChangeToDelta(schema_delta.schema_types_index_incompatible,
+                                   old_type_config, new_schema_dependent_map,
+                                   old_type_config_map, new_type_config_map);
     }
+
+    if (is_join_incompatible) {
+      AddIncompatibleChangeToDelta(schema_delta.schema_types_join_incompatible,
+                                   old_type_config, new_schema_dependent_map,
+                                   old_type_config_map, new_type_config_map);
+    }
+
+    if (!is_incompatible && !is_index_incompatible && !is_join_incompatible &&
+        has_property_changed) {
+      schema_delta.schema_types_changed_fully_compatible.insert(
+          old_type_config.schema_type());
+    }
+
+    // Lastly, remove this type from the map. We know that this type can't
+    // come up in future iterations through the old schema types because the old
+    // type config has unique types.
+    new_type_config_map.erase(old_type_config.schema_type());
+  }
+
+  // Any types that are still present in the new_type_config_map are newly added
+  // types.
+  schema_delta.schema_types_new.reserve(new_type_config_map.size());
+  for (auto& kvp : new_type_config_map) {
+    schema_delta.schema_types_new.insert(std::move(kvp.first));
   }
 
   return schema_delta;
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index d65dd10..4f09915 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -22,6 +22,7 @@
 #include <unordered_set>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/proto/schema.pb.h"
 
 namespace icing {
@@ -32,13 +33,41 @@ class SchemaUtil {
   using TypeConfigMap =
       std::unordered_map<std::string, const SchemaTypeConfigProto>;
 
-  struct SchemaDelta {
-    // Whether an indexing config has changed, requiring the index to be
-    // regenerated. We don't list out all the types that make the index
-    // incompatible because our index isn't optimized for that. It's much easier
-    // to reset the entire index and reindex every document.
-    bool index_incompatible = false;
+  // A data structure that stores the relationships between schema types. The
+  // keys in TypeRelationMap are schema types, and the values are sets of schema
+  // types that are directly or indirectly related to the key.
+  template <typename T>
+  using TypeRelationMap =
+      std::unordered_map<std::string_view,
+                         std::unordered_map<std::string_view, T>>;
+
+  // If A -> B is indicated in the map, then type A must be built before
+  // building type B, which implies one of the following situations.
+  //
+  // 1. B has a property of type A.
+  // 2. A is a parent type of B via polymorphism.
+  //
+  // For the first case, this map will also include all PropertyConfigProto
+  // (with DOCUMENT data_type) pointers which *directly* connects type A and B.
+  // IOW, this vector of PropertyConfigProto* are "direct edges" connecting A
+  // and B directly. It will be an empty vector if A and B are not "directly"
+  // connected, but instead via another intermediate level of schema type. For
+  // example, the actual dependency is A -> C -> B, so there will be A -> C and
+  // C -> B with valid PropertyConfigProto* respectively in this map, but we
+  // will also expand transitive dependents: add A -> B into dependent map with
+  // empty vector of "edges".
+  using DependentMap = TypeRelationMap<std::vector<const PropertyConfigProto*>>;
 
+  // If A -> B is indicated in the map, then type A is a parent type of B,
+  // directly or indirectly. If directly, the bool value in the map will be
+  // true, otherwise false.
+  //
+  // Note that all relationships contained in this map are also entries in the
+  // DependentMap, i.e. if B inherits from A, then there will be a mapping from
+  // A to B in both this map and the DependentMap.
+  using InheritanceMap = TypeRelationMap<bool>;
+
+  struct SchemaDelta {
     // Which schema types were present in the old schema, but were deleted from
     // the new schema.
     std::unordered_set<std::string> schema_types_deleted;
@@ -47,10 +76,35 @@ class SchemaUtil {
     // could invalidate existing Documents of that schema type.
     std::unordered_set<std::string> schema_types_incompatible;
 
+    // Schema types that were added in the new schema. Represented by the
+    // `schema_type` field in the SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_new;
+
+    // Schema types that were changed in a way that was backwards compatible and
+    // didn't invalidate the index. Represented by the `schema_type` field in
+    // the SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_changed_fully_compatible;
+
+    // Schema types that were changed in a way that was backwards compatible,
+    // but invalidated the index. Represented by the `schema_type` field in the
+    // SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_index_incompatible;
+
+    // Schema types that were changed in a way that was backwards compatible,
+    // but invalidated the joinable cache. Represented by the `schema_type`
+    // field in the SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_join_incompatible;
+
     bool operator==(const SchemaDelta& other) const {
-      return index_incompatible == other.index_incompatible &&
-             schema_types_deleted == other.schema_types_deleted &&
-             schema_types_incompatible == other.schema_types_incompatible;
+      return schema_types_deleted == other.schema_types_deleted &&
+             schema_types_incompatible == other.schema_types_incompatible &&
+             schema_types_new == other.schema_types_new &&
+             schema_types_changed_fully_compatible ==
+                 other.schema_types_changed_fully_compatible &&
+             schema_types_index_incompatible ==
+                 other.schema_types_index_incompatible &&
+             schema_types_join_incompatible ==
+                 other.schema_types_join_incompatible;
     }
   };
 
@@ -59,11 +113,17 @@ class SchemaUtil {
     std::unordered_map<std::string_view, const PropertyConfigProto*>
         property_config_map;
 
-    // Total number of properties that have an indexing config
-    int32_t num_indexed_properties = 0;
+    // Properties that have an indexing config
+    std::unordered_set<std::string_view> indexed_properties;
+
+    // Properties that were REQUIRED
+    std::unordered_set<std::string_view> required_properties;
 
-    // Total number of properties that were REQUIRED
-    int32_t num_required_properties = 0;
+    // Properties that have joinable config
+    std::unordered_set<std::string_view> joinable_properties;
+
+    // Properties that have DataType::DOCUMENT
+    std::unordered_set<std::string_view> nested_document_properties;
   };
 
   // This function validates:
@@ -80,12 +140,58 @@ class SchemaUtil {
   //   9. PropertyConfigProtos.schema_type's must correspond to a
   //      SchemaTypeConfigProto.schema_type
   //  10. Property names can only be alphanumeric.
+  //  11. Any STRING data types have a valid string_indexing_config
+  //  12. PropertyConfigProtos.joinable_config must be valid. See
+  //      ValidateJoinableConfig for more details.
+  //  13. Any PropertyConfigProtos with nested DOCUMENT data type must not have
+  //      REPEATED cardinality if they reference a schema type containing
+  //      joinable property.
+  //  14. The schema definition cannot have invalid cycles. A cycle is invalid
+  //      if:
+  //      a. SchemaTypeConfigProto.parent_type definitions form an inheritance
+  //         cycle.
+  //      b. The schema's property definitions have schema_types that form a
+  //         cycle, and all properties on the cycle declare
+  //         DocumentIndexingConfig.index_nested_properties=true.
+  //      c. The schema's property definitions have schema_types that form a
+  //         cycle, and the cycle leads to an invalid joinable property config.
+  //         This is the case if:
+  //           i. Any type node in the cycle itself has a joinable proprty
+  //              (property whose joinable config is not NONE), OR
+  //          ii. Any type node in the cycle has a nested-type (direct or
+  //              indirect) with a joinable property.
+  //  15. For DOCUMENT data types, if
+  //      DocumentIndexingConfig.indexable_nested_properties_list is non-empty,
+  //      DocumentIndexingConfig.index_nested_properties must be false.
   //
   // Returns:
+  //   On success, a dependent map from each types to their dependent types
+  //   that depend on it directly or indirectly.
   //   ALREADY_EXISTS for case 1 and 2
-  //   INVALID_ARGUMENT for 3-10
-  //   OK otherwise
-  static libtextclassifier3::Status Validate(const SchemaProto& schema);
+  //   INVALID_ARGUMENT for 3-15
+  static libtextclassifier3::StatusOr<DependentMap> Validate(
+      const SchemaProto& schema, bool allow_circular_schema_definitions);
+
+  // Builds a transitive inheritance map.
+  //
+  // Ex. Suppose we have a schema with four types A, B, C and D, and we have the
+  // following direct inheritance relation.
+  //
+  // A -> B (A is the parent type of B)
+  // B -> C (B is the parent type of C)
+  // C -> D (C is the parent type of D)
+  //
+  // Then, the transitive inheritance map for this schema would be:
+  //
+  // A -> B, C, D
+  // B -> C, D
+  // C -> D
+  //
+  // RETURNS:
+  //   On success, a transitive inheritance map of all types in the schema.
+  //   INVALID_ARGUMENT if the inheritance graph contains a cycle.
+  static libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
+  BuildTransitiveInheritanceGraph(const SchemaProto& schema);
 
   // Creates a mapping of schema type -> schema type config proto. The
   // type_config_map is cleared, and then each schema-type_config_proto pair is
@@ -107,6 +213,8 @@ class SchemaUtil {
   //      `SchemaDelta.schema_types_deleted`
   //   3. A schema type's new definition would mean any existing data of the old
   //      definition is now incompatible.
+  //   4. The derived join index would be incompatible. This is held in
+  //      `SchemaDelta.join_incompatible`.
   //
   // For case 1, the two schemas would result in an incompatible index if:
   //   1.1. The new SchemaProto has a different set of indexed properties than
@@ -129,30 +237,146 @@ class SchemaUtil {
   //        scale defined as:
   //          LEAST <REPEATED - OPTIONAL - REQUIRED> MOST
   //
+  // For case 4, the two schemas would result in an incompatible join if:
+  //   4.1. A SchematypeConfig exists in the new SchemaProto that has a
+  //        different set of joinable properties than it did in the old
+  //        SchemaProto.
+  //
   // A property is defined by the combination of the
   // SchemaTypeConfig.schema_type and the PropertyConfigProto.property_name.
   //
   // Returns a SchemaDelta that captures the aforementioned differences.
   static const SchemaDelta ComputeCompatibilityDelta(
-      const SchemaProto& old_schema, const SchemaProto& new_schema);
+      const SchemaProto& old_schema, const SchemaProto& new_schema,
+      const DependentMap& new_schema_dependent_map);
+
+  // Validates the 'property_name' field.
+  //   1. Can't be an empty string
+  //   2. Can only contain alphanumeric characters
+  //
+  // NOTE: schema_type is only used for logging. It is not necessary to populate
+  // it.
+  //
+  // RETURNS:
+  //   - OK if property_name is valid
+  //   - INVALID_ARGUMENT if property name is empty or contains an
+  //     non-alphabetic character.
+  static libtextclassifier3::Status ValidatePropertyName(
+      std::string_view property_name, std::string_view schema_type = "");
+
+  static bool IsIndexedProperty(const PropertyConfigProto& property_config);
 
  private:
+  // Validates the 'schema_type' field
+  //
+  // Returns:
+  //   INVALID_ARGUMENT if 'schema_type' is an empty string.
+  //   OK on success
   static libtextclassifier3::Status ValidateSchemaType(
       std::string_view schema_type);
-  static libtextclassifier3::Status ValidatePropertyName(
-      std::string_view property_name, std::string_view schema_type);
+
+  // Validates the 'data_type' field.
+  //
+  // Returns:
+  //   INVALID_ARGUMENT if it's UNKNOWN
+  //   OK on success
   static libtextclassifier3::Status ValidateDataType(
       PropertyConfigProto::DataType::Code data_type,
       std::string_view schema_type, std::string_view property_name);
-  static libtextclassifier3::Status ValidatePropertySchemaType(
-      std::string_view property_schema_type, std::string_view schema_type,
-      std::string_view property_name);
+
+  // Validates the 'cardinality' field.
+  //
+  // Returns:
+  //   INVALID_ARGUMENT if it's UNKNOWN
+  //   OK on success
   static libtextclassifier3::Status ValidateCardinality(
       PropertyConfigProto::Cardinality::Code cardinality,
       std::string_view schema_type, std::string_view property_name);
-  static libtextclassifier3::Status ValidateIndexingConfig(
-      const IndexingConfig& config,
-      PropertyConfigProto::DataType::Code data_type);
+
+  // Checks that the 'string_indexing_config' satisfies the following rules:
+  //   1. Only STRING data types can be indexed
+  //   2. An indexed property must have a valid tokenizer type
+  //
+  // Returns:
+  //   INVALID_ARGUMENT if any of the rules are not followed
+  //   OK on success
+  static libtextclassifier3::Status ValidateStringIndexingConfig(
+      const StringIndexingConfig& config,
+      PropertyConfigProto::DataType::Code data_type,
+      std::string_view schema_type, std::string_view property_name);
+
+  // Checks that the 'joinable_config' satisfies the following rules:
+  //   1. If the data type matches joinable value type
+  //      a. Only STRING data types can use QUALIFIED_ID joinable value type
+  //   2. Only QUALIFIED_ID joinable value type can have delete propagation
+  //      enabled
+  //   3. Any joinable property should have non-REPEATED cardinality
+  //
+  // Returns:
+  //   INVALID_ARGUMENT if any of the rules are not followed
+  //   OK on success
+  static libtextclassifier3::Status ValidateJoinableConfig(
+      const JoinableConfig& config,
+      PropertyConfigProto::DataType::Code data_type,
+      PropertyConfigProto::Cardinality::Code cardinality,
+      std::string_view schema_type, std::string_view property_name);
+
+  // Checks that the 'document_indexing_config' satisfies the following rule:
+  //    1. If indexable_nested_properties is non-empty, index_nested_properties
+  //       must be set to false.
+  //
+  // Returns:
+  //   INVALID_ARGUMENT if any of the rules are not followed
+  //   OK on success
+  static libtextclassifier3::Status ValidateDocumentIndexingConfig(
+      const DocumentIndexingConfig& config, std::string_view schema_type,
+      std::string_view property_name);
+
+  // Returns if 'parent_type' is a direct or indirect parent of 'child_type'.
+  static bool IsParent(const SchemaUtil::InheritanceMap& inheritance_map,
+                       std::string_view parent_type,
+                       std::string_view child_type);
+
+  // Returns if 'child_property_config' in a child type can override
+  // 'parent_property_config' in the parent type.
+  //
+  // Let's assign 'child_property_config' a type T1 and 'parent_property_config'
+  // a type T2 that captures information for their data_type, schema_type and
+  // cardinalities, so that 'child_property_config' can override
+  // 'parent_property_config' if and only if T1 <: T2, i.e. T1 is a subtype of
+  // T2.
+  //
+  // Below are the rules for inferring subtype relations.
+  // - T <: T for every type T.
+  // - If U extends T, then U <: T.
+  // - For every type T1, T2 and T3, if T1 <: T2 and T2 <: T3, then T1 <: T3.
+  // - Optional<T> <: Repeated<T> for every type T.
+  // - Required<T> <: Optional<T> for every type T.
+  // - If T1 <: T2, then
+  //   - Required<T1> <: Required<T2>
+  //   - Optional<T1> <: Optional<T2>
+  //   - Repeated<T1> <: Repeated<T2>
+  //
+  // We assume the Closed World Assumption (CWA), i.e. if T1 <: T2 cannot be
+  // deduced from the above rules, then T1 is not a subtype of T2.
+  static bool IsInheritedPropertyCompatible(
+      const SchemaUtil::InheritanceMap& inheritance_map,
+      const PropertyConfigProto& child_property_config,
+      const PropertyConfigProto& parent_property_config);
+
+  // Verifies that every child type's property set has included all compatible
+  // properties from parent types, based on the following rule:
+  //
+  // - If a property "prop" of type T is in the parent, then the child type must
+  //   also have "prop" that is of type U, such that U <: T, i.e. U is a subtype
+  //   of T.
+  //
+  // RETURNS:
+  //   Ok on validation success
+  //   INVALID_ARGUMENT if an exception that violates the above validation rule
+  //     is found.
+  static libtextclassifier3::Status ValidateInheritedProperties(
+      const SchemaProto& schema);
 };
 
 }  // namespace lib
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index a3ab96f..82683ba 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -14,606 +14,5286 @@
 
 #include "icing/schema/schema-util.h"
 
-#include <cstdint>
+#include <initializer_list>
 #include <string>
 #include <string_view>
+#include <unordered_set>
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/proto/schema.pb.h"
-#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/testing/common-matchers.h"
 
 namespace icing {
 namespace lib {
 namespace {
 
+using portable_equals_proto::EqualsProto;
 using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Pair;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+using ::testing::UnorderedElementsAre;
 
 // Properties/fields in a schema type
 constexpr char kEmailType[] = "EmailMessage";
+constexpr char kMessageType[] = "Text";
 constexpr char kPersonType[] = "Person";
 
-class SchemaUtilTest : public ::testing::Test {
- protected:
-  SchemaProto schema_proto_;
-
-  static SchemaTypeConfigProto CreateSchemaTypeConfig(
-      const std::string_view schema_type,
-      const std::string_view nested_schema_type = "") {
-    SchemaTypeConfigProto type;
-    type.set_schema_type(std::string(schema_type));
-
-    auto string_property = type.add_properties();
-    string_property->set_property_name("string");
-    string_property->set_data_type(PropertyConfigProto::DataType::STRING);
-    string_property->set_cardinality(
-        PropertyConfigProto::Cardinality::REQUIRED);
-
-    auto int_property = type.add_properties();
-    int_property->set_property_name("int");
-    int_property->set_data_type(PropertyConfigProto::DataType::INT64);
-    int_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-    auto double_property = type.add_properties();
-    double_property->set_property_name("double");
-    double_property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
-    double_property->set_cardinality(
-        PropertyConfigProto::Cardinality::REPEATED);
-
-    auto bool_property = type.add_properties();
-    bool_property->set_property_name("boolean");
-    bool_property->set_data_type(PropertyConfigProto::DataType::BOOLEAN);
-    bool_property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-
-    auto bytes_property = type.add_properties();
-    bytes_property->set_property_name("bytes");
-    bytes_property->set_data_type(PropertyConfigProto::DataType::BYTES);
-    bytes_property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-
-    if (!nested_schema_type.empty()) {
-      auto document_property = type.add_properties();
-      document_property->set_property_name("document");
-      document_property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-      document_property->set_cardinality(
-          PropertyConfigProto::Cardinality::REPEATED);
-      document_property->set_schema_type(std::string(nested_schema_type));
-    }
-
-    return type;
+class SchemaUtilTest : public ::testing::TestWithParam<bool> {};
+
+TEST_P(SchemaUtilTest, DependentGraphAlphabeticalOrder) {
+  // Create a schema with the following dependent relation:
+  //         C
+  //       /   \
+  // A - B       E - F
+  //       \   /
+  //         D
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder()
+          .SetType("E")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("f")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("F", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_f =
+      SchemaTypeConfigBuilder()
+          .SetType("F")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  // Provide these in alphabetical order: A, B, C, D, E, F
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_c)
+                           .AddType(type_d)
+                           .AddType(type_e)
+                           .AddType(type_f)
+                           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, testing::SizeIs(5));
+  EXPECT_THAT(
+      d_map["F"],
+      UnorderedElementsAre(Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+                           Pair("C", IsEmpty()), Pair("D", IsEmpty()),
+                           Pair("E", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_e.properties(0)))))));
+  EXPECT_THAT(d_map["E"],
+              UnorderedElementsAre(
+                  Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+                  Pair("C", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_c.properties(0))))),
+                  Pair("D", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_d.properties(0)))))));
+  EXPECT_THAT(
+      d_map["D"],
+      UnorderedElementsAre(Pair("A", IsEmpty()),
+                           Pair("B", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_b.properties(1)))))));
+  EXPECT_THAT(
+      d_map["C"],
+      UnorderedElementsAre(Pair("A", IsEmpty()),
+                           Pair("B", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_b.properties(0)))))));
+  EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+                              "A", UnorderedElementsAre(Pointee(
+                                       EqualsProto(type_a.properties(0)))))));
+}
+
+TEST_P(SchemaUtilTest, DependentGraphReverseAlphabeticalOrder) {
+  // Create a schema with the following dependent relation:
+  //         C
+  //       /   \
+  // A - B       E - F
+  //       \   /
+  //         D
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder()
+          .SetType("E")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("f")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("F", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_f =
+      SchemaTypeConfigBuilder()
+          .SetType("F")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  // Provide these in reverse alphabetical order:
+  //   F, E, D, C, B, A
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_f)
+                           .AddType(type_e)
+                           .AddType(type_d)
+                           .AddType(type_c)
+                           .AddType(type_b)
+                           .AddType(type_a)
+                           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, testing::SizeIs(5));
+  EXPECT_THAT(
+      d_map["F"],
+      UnorderedElementsAre(Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+                           Pair("C", IsEmpty()), Pair("D", IsEmpty()),
+                           Pair("E", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_e.properties(0)))))));
+  EXPECT_THAT(d_map["E"],
+              UnorderedElementsAre(
+                  Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+                  Pair("C", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_c.properties(0))))),
+                  Pair("D", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_d.properties(0)))))));
+  EXPECT_THAT(
+      d_map["D"],
+      UnorderedElementsAre(Pair("A", IsEmpty()),
+                           Pair("B", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_b.properties(1)))))));
+  EXPECT_THAT(
+      d_map["C"],
+      UnorderedElementsAre(Pair("A", IsEmpty()),
+                           Pair("B", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_b.properties(0)))))));
+  EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+                              "A", UnorderedElementsAre(Pointee(
+                                       EqualsProto(type_a.properties(0)))))));
+}
+
+TEST_P(SchemaUtilTest, DependentGraphMixedOrder) {
+  // Create a schema with the following dependent relation:
+  //         C
+  //       /   \
+  // A - B       E - F
+  //       \   /
+  //         D
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder()
+          .SetType("E")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("f")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("F", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_f =
+      SchemaTypeConfigBuilder()
+          .SetType("F")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+
+  // Provide these in a random order: C, E, F, A, B, D
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_c)
+                           .AddType(type_e)
+                           .AddType(type_f)
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_d)
+                           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, testing::SizeIs(5));
+  EXPECT_THAT(
+      d_map["F"],
+      UnorderedElementsAre(Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+                           Pair("C", IsEmpty()), Pair("D", IsEmpty()),
+                           Pair("E", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_e.properties(0)))))));
+  EXPECT_THAT(d_map["E"],
+              UnorderedElementsAre(
+                  Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+                  Pair("C", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_c.properties(0))))),
+                  Pair("D", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_d.properties(0)))))));
+  EXPECT_THAT(
+      d_map["D"],
+      UnorderedElementsAre(Pair("A", IsEmpty()),
+                           Pair("B", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_b.properties(1)))))));
+  EXPECT_THAT(
+      d_map["C"],
+      UnorderedElementsAre(Pair("A", IsEmpty()),
+                           Pair("B", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_b.properties(0)))))));
+  EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+                              "A", UnorderedElementsAre(Pointee(
+                                       EqualsProto(type_a.properties(0)))))));
+}
+
+TEST_P(SchemaUtilTest, TopLevelCycleIndexableTrueInvalid) {
+  // Create a schema with the following nested-type relation:
+  // A - B - B - B - B.... where all edges declare index_nested_properties=true
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, TopLevelCycleIndexableFalseNotJoinableOK) {
+  if (GetParam() != true) {
+    GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+                    "are not allowed.";
   }
-};
 
-TEST_F(SchemaUtilTest, Valid_Empty) {
-  ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+  // Create a schema with the following nested-type relation and
+  // index_nested_properties definition:
+  // A -(true)-> B -(false)-> B -(false)-> B....
+  // Edge B -(false)-> B breaks the invalid cycle, so this is allowed.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+  // Assert Validate status is OK and check dependent map
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(1));
+  EXPECT_THAT(d_map["B"],
+              UnorderedElementsAre(
+                  Pair("A", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_a.properties(0))))),
+                  Pair("B", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_b.properties(0)))))));
+}
+
+TEST_P(SchemaUtilTest, MultiLevelCycleIndexableTrueInvalid) {
+  // Create a schema with the following dependent relation:
+  // A - B - C - A - B - C - A ...
+  // where all edges declare index_nested_properties=true
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs((libtextclassifier3::StatusCode::INVALID_ARGUMENT),
+                       HasSubstr("Invalid cycle")));
 }
 
-TEST_F(SchemaUtilTest, Valid_Nested) {
-  auto email_type = schema_proto_.add_types();
-  *email_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
+TEST_P(SchemaUtilTest, MultiLevelCycleIndexableFalseNotJoinableOK) {
+  if (GetParam() != true) {
+    GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+                    "are not allowed.";
+  }
 
-  auto person_type = schema_proto_.add_types();
-  *person_type = CreateSchemaTypeConfig(kPersonType);
+  // Create a schema with the following nested-type relation:
+  // A -(true)-> B -(false)-> C -(true)-> A -(true)-> B -(false)-> C ...
+  // B -(false)-> C breaking the infinite cycle.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+          .Build();
 
-  ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::OK));
 }
 
-TEST_F(SchemaUtilTest, Valid_ClearedPropertyConfigs) {
-  // No property fields is technically ok, but probably not realistic.
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-  type->clear_properties();
+TEST_P(SchemaUtilTest, MultiLevelCycleDependentMapOk) {
+  if (GetParam() != true) {
+    GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+                    "are not allowed.";
+  }
+
+  // Create a schema with the following nested-type dependent relation:
+  // A -(false)-> B -(false)-> C -(false)-> A --> B --> C ...
+  //  i.e. A is a property of B
+  //       B is a property of C
+  //       C is a property of A
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .Build();
 
-  ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  // Assert Validate status is OK and check dependent map
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(3));
+  EXPECT_THAT(
+      d_map["A"],
+      UnorderedElementsAre(Pair("A", IsEmpty()),
+                           Pair("B", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_b.properties(0))))),
+                           Pair("C", IsEmpty())));
+  EXPECT_THAT(
+      d_map["B"],
+      UnorderedElementsAre(Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+                           Pair("C", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_c.properties(0)))))));
+  EXPECT_THAT(
+      d_map["C"],
+      UnorderedElementsAre(Pair("A", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_a.properties(0))))),
+                           Pair("B", IsEmpty()), Pair("C", IsEmpty())));
 }
 
-TEST_F(SchemaUtilTest, Invalid_ClearedSchemaType) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-  type->clear_schema_type();
+TEST_P(SchemaUtilTest, NestedCycleIndexableTrueInvalid) {
+  // Create a schema with the following dependent relation:
+  // A -(false)-> B <-(true)-> C -(false)-> D.
+  // B <-(true)-> C creates an invalid cycle.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+          .Build();
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_c)
+                           .AddType(type_d)
+                           .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, NestedCycleIndexableFalseNotJoinableOK) {
+  if (GetParam() != true) {
+    GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+                    "are not allowed.";
+  }
+
+  // Create a schema with the following nested-type relation:
+  // A -(true)-> B -(true)-> C -(false)-> B -(true)-> D.
+  //  C -(false)-> B breaks the invalid cycle in B - C - B.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_c)
+                           .AddType(type_d)
+                           .Build();
+  // Assert Validate status is OK and check dependent map
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(3));
+  EXPECT_THAT(d_map["B"],
+              UnorderedElementsAre(
+                  Pair("A", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_a.properties(0))))),
+                  Pair("B", IsEmpty()),
+                  Pair("C", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_c.properties(0)))))));
+  EXPECT_THAT(
+      d_map["C"],
+      UnorderedElementsAre(Pair("A", IsEmpty()),
+                           Pair("B", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_b.properties(0))))),
+                           Pair("C", IsEmpty())));
+  EXPECT_THAT(d_map["D"],
+              UnorderedElementsAre(
+                  Pair("A", IsEmpty()),
+                  Pair("B", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_b.properties(1))))),
+                  Pair("C", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_c.properties(1)))))));
 }
 
-TEST_F(SchemaUtilTest, Invalid_EmptySchemaType) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-  type->set_schema_type("");
+TEST_P(SchemaUtilTest, MultiplePathsAnyPathContainsCycleIsInvalid) {
+  // Create a schema with the following nested-type relation:
+  // C -(false)-> B -(true)-> A
+  //               ^         /
+  //          (true)\       /(true)
+  //                 \     v
+  //                    D
+  //  There is a cycle in B-A-D-B... so this is not allowed
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_d)
+                           .AddType(type_c)
+                           .AddType(type_b)
+                           .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, MultipleCycles_anyCycleIndexableTrueInvalid) {
+  // Create a schema with the following nested-type dependent relation:
+  // Note that the arrows in this graph shows the direction of the dependent
+  // relation, rather than nested-type relations.
+  //    A -(F)-> B
+  //    ^  \     |
+  // (T)| (T)\   |(T)
+  //    |      v v
+  //    D <-(T)- C
+  // There are two cycles: A-B-C-D and A-C-D. The first cycle is allowed because
+  // A-B has nested-indexable=false, but A-C-D
+  //
+  // Schema nested-type property relation graph:
+  // A <-- B
+  // | ^   ^
+  // v   \ |
+  // D --> C
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_d)
+                           .AddType(type_c)
+                           .AddType(type_b)
+                           .AddType(type_a)
+                           .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, AnySchemaTypeOk) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-  type->set_schema_type("abc123!@#$%^&*()_-+=[{]}|\\;:'\",<.>?你好");
+TEST_P(SchemaUtilTest, CycleWithSameTypedProps_allPropsIndexableFalseIsOK) {
+  if (GetParam() != true) {
+    GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+                    "are not allowed.";
+  }
 
-  ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+  // Create a schema with the following nested-type relation and
+  // index_nested_properties definition:
+  // A <-(true)- B <-(false)- A -(false)-> B -(true)-> A
+  // A has 2 properties with type B. A - B breaks the invalid cycle only when
+  // both properties declare index_nested_properties=false.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b1")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b2")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("A")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+  // Assert Validate status is OK and check dependent map
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(2));
+  EXPECT_THAT(
+      d_map["A"],
+      UnorderedElementsAre(Pair("A", IsEmpty()),
+                           Pair("B", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_b.properties(0)))))));
+  EXPECT_THAT(d_map["B"],
+              UnorderedElementsAre(
+                  Pair("A", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_a.properties(0))),
+                                Pointee(EqualsProto(type_a.properties(1))))),
+                  Pair("B", IsEmpty())));
+}
+
+TEST_P(SchemaUtilTest, CycleWithSameTypedProps_anyPropIndexableTrueIsInvalid) {
+  // Create a schema with the following nested-type relation and
+  // index_nested_properties definition:
+  // A <-(true)- B <-(true)- A -(false)-> B -(true)-> A
+  // A has 2 properties with type B. Prop 'b2' declares
+  // index_nested_properties=true, so there is an invalid cycle.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b1")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b2")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("A")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
 }
 
-TEST_F(SchemaUtilTest, Invalid_ClearedPropertyName) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+TEST_P(SchemaUtilTest, CycleWithJoinablePropertyNotAllowed) {
+  // Create a schema with the following dependent relation:
+  //                A
+  //              /  ^
+  //             v    \
+  // (joinable) B ---> C
+  // B also has a string property that is joinable on QUALIFIED_ID
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("joinableProp")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+          .Build();
 
-  auto property = type->add_properties();
-  property->clear_property_name();
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST_P(SchemaUtilTest, NonNestedJoinablePropOutsideCycleOK) {
+  if (GetParam() != true) {
+    GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+                    "are not allowed.";
+  }
+
+  // Create a schema with the following dependent relation:
+  // A -(false)-> B <-(false)-> C...
+  // A has a string property that is joinable on QUALIFIED_ID, but the cycle is
+  // B-C-B, and none of B or C depends on A, so this is fine.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("joinableProp")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .Build();
+
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  // Assert Validate status is OK and check dependent map
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(2));
+  EXPECT_THAT(d_map["B"],
+              UnorderedElementsAre(
+                  Pair("A", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_a.properties(0))))),
+                  Pair("B", IsEmpty()),
+                  Pair("C", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_c.properties(0)))))));
+  EXPECT_THAT(
+      d_map["C"],
+      UnorderedElementsAre(Pair("A", IsEmpty()),
+                           Pair("B", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_b.properties(0))))),
+                           Pair("C", IsEmpty())));
+}
+
+TEST_P(SchemaUtilTest, DirectNestedJoinablePropOutsideCycleNotAllowed) {
+  // Create a schema with the following dependent relation:
+  //       A
+  //     /  ^
+  //    v    \
+  //   B ---> C ---> D(joinable)
+  // All edges have index_nested_properties=false and only D has a joinable
+  // property. The cycle A-B-C... is not allowed since there is a type in the
+  // cycle (C) which has a direct nested-type (D) with a joinable property.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("joinableProp")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_c)
+                           .AddType(type_d)
+                           .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, TransitiveNestedJoinablePropOutsideCycleNotAllowed) {
+  // Create a schema with the following dependent relation:
+  //       A
+  //     /  ^
+  //    v    \
+  //   B ---> C ---> D ---> E (joinable)
+  // All edges have index_nested_properties=false and only D has a joinable
+  // property. The cycle A-B-C... is not allowed since there is a type in the
+  // cycle (C) which has a transitive nested-type (E) with a joinable property.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder()
+          .SetType("E")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("joinableProp")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_c)
+                           .AddType(type_d)
+                           .AddType(type_e)
+                           .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest,
+       NestedJoinablePropOutsideCycleNotAllowed_reverseIterationOrder) {
+  // Create a schema with the following dependent relation:
+  //       E
+  //     /  ^
+  //    v    \
+  //   D ---> C ---> B ---> A (joinable)
+  // All edges have index_nested_properties=false and only D has a joinable
+  // property. The cycle A-B-C... is not allowed since there is a type in the
+  // cycle (C) which has a transitive nested-type (E) with a joinable property.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("joinableProp")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder()
+          .SetType("E")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_c)
+                           .AddType(type_d)
+                           .AddType(type_e)
+                           .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, ComplexCycleWithJoinablePropertyNotAllowed) {
+  // Create a schema with the following dependent relation:
+  //       A
+  //     /   ^
+  //    v     \
+  //    B ---> E
+  //   /  \    ^
+  //  v    v    \
+  //  C    D --> F
+  //
+  // Cycles: A-B-E-A, A-B-D-F-E-A.
+  // All edges have index_nested_properties=false, but D has a joinable property
+  // so the second cycle is not allowed.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("joinableProp")
+                  .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("f")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("F", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("joinableProp")
+                  .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder()
+          .SetType("E")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_f =
+      SchemaTypeConfigBuilder()
+          .SetType("F")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_c)
+                           .AddType(type_d)
+                           .AddType(type_e)
+                           .AddType(type_f)
+                           .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, ComplexCycleWithIndexableTrueNotAllowed) {
+  // Create a schema with the following dependent relation:
+  //       A
+  //     /   ^
+  //    v     \
+  //    B ---> E
+  //   /  \    ^
+  //  v    v    \
+  //  C    D --> F
+  //
+  // Cycles: A-B-E-A, A-B-D-F-E-A.
+  // B->E has index_nested_properties=false, so the first cycle is allowed.
+  // All edges on the second cycle are nested_indexable, so the second cycle is
+  // not allowed
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("joinableProp")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID))
+          .Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("f")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("F", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder()
+          .SetType("E")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_f =
+      SchemaTypeConfigBuilder()
+          .SetType("F")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("e")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_c)
+                           .AddType(type_d)
+                           .AddType(type_e)
+                           .AddType(type_f)
+                           .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, InheritanceAndNestedTypeRelations_noCycle) {
+  if (GetParam() != true) {
+    GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+                    "are not allowed.";
+  }
+
+  // Create a schema with the following relations:
+  // index_nested_properties definition:
+  // 1. Nested-type relations:
+  //    A -(true)-> B -(true)-> C
+  //         (false)|   (false)/ \(false)
+  //                B         B   C
+  //    The properties in the second row are required for B and C to be
+  //    compatible with their parents. index_nested_properties must be false in
+  //    these properties so that no invalid cycle can be formed because of these
+  //    self reference.
+  //
+  // 2. Inheritance relations:
+  //    C -> B -> A (A is a parent of B, which is a parent of C)
+  //
+  // These two relations are separate and do not affect each other. In this
+  // case there is no cycle.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddParentType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddParentType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+          .Build();
+
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(3));
+  // Both A-B and A-C are inheritance relations.
+  EXPECT_THAT(d_map["A"],
+              UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty())));
+  // B-A and B-B are nested-type relations, B-C is both a nested-type and an
+  // inheritance relation.
+  EXPECT_THAT(d_map["B"],
+              UnorderedElementsAre(
+                  Pair("A", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_a.properties(0))))),
+                  Pair("B", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_b.properties(0))))),
+                  Pair("C", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_c.properties(0)))))));
+  // C-C, C-B and C-A are all nested-type relations.
+  EXPECT_THAT(d_map["C"],
+              UnorderedElementsAre(
+                  Pair("B", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_b.properties(1))))),
+                  Pair("C", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_c.properties(1))))),
+                  Pair("A", IsEmpty())));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SchemaUtil::InheritanceMap i_map,
+      SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+  EXPECT_THAT(i_map, SizeIs(2));
+  EXPECT_THAT(i_map["A"],
+              UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsFalse())));
+  EXPECT_THAT(i_map["B"], UnorderedElementsAre(Pair("C", IsTrue())));
+}
+
+TEST_P(SchemaUtilTest, InheritanceAndNestedTypeRelations_nestedTypeCycle) {
+  // Create a schema with the following relations:
+  // index_nested_properties definition:
+  // 1. Nested-type relations:
+  //    A -(true)-> B -(true)-> C
+  //          (true)|   (false)/ \(false)
+  //                B         B   C
+  //
+  // 2. Inheritance relations:
+  //    C -> B -> A (A is a parent of B, which is a parent of C)
+  //
+  // These two relations are separate and do not affect each other, but there is
+  // a cycle in nested-type relations: B - B
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddParentType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddParentType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+          .Build();
+
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, InheritanceAndNestedTypeRelations_inheritanceCycle) {
+  // Create a schema with the following relations:
+  // index_nested_properties definition:
+  // 1. Nested-type relations:
+  //    A -(true)-> B -(true)-> C
+  //         (false)|   (false)/ \(false)
+  //                B         B   C
+  //
+  // 2. Inheritance relations:
+  //    C -> B -> A -> B (A is a parent of B, which is a parent of C and A)
+  //
+  // These two relations are separate and do not affect each other, but there is
+  // a cycle in inheritance relation: B - A - B
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddParentType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddParentType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddParentType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("prop")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+          .Build();
+
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("inherits from itself")));
+}
+
+TEST_P(SchemaUtilTest, NonExistentType) {
+  // Create a schema with the following dependent relation:
+  // A - B - C - X (does not exist)
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("c")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("x")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("X", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, Invalid_EmptyPropertyName) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+TEST_P(SchemaUtilTest, SingleTypeIsBothDirectAndIndirectDependent) {
+  // Create a schema with the following dependent relation, all of which are via
+  // nested document. In this case, C is both a direct dependent and an indirect
+  // dependent of A.
+  //  A
+  //  | \
+  //  |  B
+  //  | /
+  //  C
+  SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder()
+          .SetType("B")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(2));
+  EXPECT_THAT(d_map["A"],
+              UnorderedElementsAre(
+                  Pair("B", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_b.properties(0))))),
+                  Pair("C", UnorderedElementsAre(
+                                Pointee(EqualsProto(type_c.properties(0)))))));
+  EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+                              "C", UnorderedElementsAre(Pointee(
+                                       EqualsProto(type_c.properties(1)))))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SchemaUtil::InheritanceMap i_map,
+      SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+  EXPECT_THAT(i_map, IsEmpty());
+}
+
+TEST_P(SchemaUtilTest, SimpleInheritance) {
+  // Create a schema with the following inheritance relation:
+  // A <- B
+  SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
 
-  auto property = type->add_properties();
-  property->set_property_name("");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(1));
+  EXPECT_THAT(d_map["A"], UnorderedElementsAre(Pair("B", IsEmpty())));
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SchemaUtil::InheritanceMap i_map,
+      SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+  EXPECT_THAT(i_map, SizeIs(1));
+  EXPECT_THAT(i_map["A"], UnorderedElementsAre(Pair("B", IsTrue())));
+}
+
+TEST_P(SchemaUtilTest, SingleInheritanceTypeIsBothDirectAndIndirectChild) {
+  // Create a schema with the following inheritance relation. In this case, C is
+  // both a direct and an indirect child of A.
+  //  A
+  //  | \
+  //  |  B
+  //  | /
+  //  C
+  SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+  SchemaTypeConfigProto type_c = SchemaTypeConfigBuilder()
+                                     .SetType("C")
+                                     .AddParentType("A")
+                                     .AddParentType("B")
+                                     .Build();
+
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(2));
+  EXPECT_THAT(d_map["A"],
+              UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty())));
+  EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair("C", IsEmpty())));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SchemaUtil::InheritanceMap i_map,
+      SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+  EXPECT_THAT(i_map, SizeIs(2));
+  EXPECT_THAT(i_map["A"],
+              UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsTrue())));
+  EXPECT_THAT(i_map["B"], UnorderedElementsAre(Pair("C", IsTrue())));
+}
+
+TEST_P(SchemaUtilTest, ComplexInheritance) {
+  // Create a schema with the following inheritance relation:
+  //       A
+  //     /   \
+  //    B     E
+  //   /  \
+  //  C    D
+  //       |
+  //       F
+  SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder().SetType("D").AddParentType("B").Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder().SetType("E").AddParentType("A").Build();
+  SchemaTypeConfigProto type_f =
+      SchemaTypeConfigBuilder().SetType("F").AddParentType("D").Build();
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_c)
+                           .AddType(type_d)
+                           .AddType(type_e)
+                           .AddType(type_f)
+                           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(3));
+  EXPECT_THAT(d_map["A"],
+              UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()),
+                                   Pair("D", IsEmpty()), Pair("E", IsEmpty()),
+                                   Pair("F", IsEmpty())));
+  EXPECT_THAT(d_map["B"],
+              UnorderedElementsAre(Pair("C", IsEmpty()), Pair("D", IsEmpty()),
+                                   Pair("F", IsEmpty())));
+  EXPECT_THAT(d_map["D"], UnorderedElementsAre(Pair("F", IsEmpty())));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SchemaUtil::InheritanceMap i_map,
+      SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+  EXPECT_THAT(i_map, SizeIs(3));
+  EXPECT_THAT(i_map["A"],
+              UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsFalse()),
+                                   Pair("D", IsFalse()), Pair("E", IsTrue()),
+                                   Pair("F", IsFalse())));
+  EXPECT_THAT(i_map["B"],
+              UnorderedElementsAre(Pair("C", IsTrue()), Pair("D", IsTrue()),
+                                   Pair("F", IsFalse())));
+  EXPECT_THAT(i_map["D"], UnorderedElementsAre(Pair("F", IsTrue())));
+}
+
+TEST_P(SchemaUtilTest, InheritanceCycle) {
+  // Create a schema with the following inheritance relation:
+  // C <- A <- B <- C
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder().SetType("A").AddParentType("C").Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
+
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+TEST_P(SchemaUtilTest, SelfInheritance) {
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder().SetType("A").AddParentType("A").Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
 
-  auto property = type->add_properties();
-  property->set_property_name("_");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+TEST_P(SchemaUtilTest, NonExistentParentType) {
+  // Create a schema with the following inheritance relation:
+  // (does not exist) X <- A <- B <- C
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder().SetType("A").AddParentType("X").Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, AlphanumericPropertyNameOk) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+TEST_P(SchemaUtilTest, SimpleInheritanceWithNestedType) {
+  // Create a schema with the following dependent relation:
+  // A - B (via inheritance)
+  // B - C (via nested document)
+  SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder()
+          .SetType("C")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
 
-  auto property = type->add_properties();
-  property->set_property_name("abc123");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  SchemaProto schema =
+      SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(2));
+  // Nested-type dependency and inheritance dependencies are not transitive.
+  EXPECT_THAT(d_map["A"], UnorderedElementsAre(Pair("B", IsEmpty())));
+  EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+                              "C", UnorderedElementsAre(Pointee(
+                                       EqualsProto(type_c.properties(0)))))));
 
-  ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SchemaUtil::InheritanceMap i_map,
+      SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+  EXPECT_THAT(i_map, SizeIs(1));
+  EXPECT_THAT(i_map["A"], UnorderedElementsAre(Pair("B", IsTrue())));
 }
 
-TEST_F(SchemaUtilTest, Invalid_DuplicatePropertyName) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+TEST_P(SchemaUtilTest, ComplexInheritanceWithNestedType) {
+  // Create a schema with the following dependent relation:
+  //       A
+  //     /   \
+  //    B     E
+  //   /  \
+  //  C    D
+  //       |
+  //       F
+  // Approach:
+  //   B extends A
+  //   C extends B
+  //   D has a nested document of type B
+  //   E has a nested document of type A
+  //   F has a nested document of type D
+  SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+  SchemaTypeConfigProto type_c =
+      SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
+  SchemaTypeConfigProto type_d =
+      SchemaTypeConfigBuilder()
+          .SetType("D")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_e =
+      SchemaTypeConfigBuilder()
+          .SetType("E")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_f =
+      SchemaTypeConfigBuilder()
+          .SetType("F")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("d")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+          .Build();
 
-  auto first_property = type->add_properties();
-  first_property->set_property_name("DuplicatedProperty");
-  first_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  first_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(type_a)
+                           .AddType(type_b)
+                           .AddType(type_c)
+                           .AddType(type_d)
+                           .AddType(type_e)
+                           .AddType(type_f)
+                           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(3));
+  EXPECT_THAT(
+      d_map["A"],
+      UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()),
+                           Pair("E", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_e.properties(0)))))));
+  EXPECT_THAT(
+      d_map["B"],
+      UnorderedElementsAre(Pair("C", IsEmpty()),
+                           Pair("D", UnorderedElementsAre(Pointee(
+                                         EqualsProto(type_d.properties(0))))),
+                           Pair("F", IsEmpty())));
+  EXPECT_THAT(d_map["D"], UnorderedElementsAre(Pair(
+                              "F", UnorderedElementsAre(Pointee(
+                                       EqualsProto(type_f.properties(0)))))));
 
-  auto second_property = type->add_properties();
-  second_property->set_property_name("DuplicatedProperty");
-  second_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  second_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SchemaUtil::InheritanceMap i_map,
+      SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+  EXPECT_THAT(i_map, SizeIs(2));
+  EXPECT_THAT(i_map["A"],
+              UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsFalse())));
+  EXPECT_THAT(i_map["B"], UnorderedElementsAre(Pair("C", IsTrue())));
+}
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
-              StatusIs(libtextclassifier3::StatusCode::ALREADY_EXISTS));
+TEST_P(SchemaUtilTest, InheritanceWithNestedTypeCycle) {
+  // Create a schema that A and B depend on each other, in the sense that B
+  // extends A but A has a nested document of type B.
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, Invalid_ClearedDataType) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+TEST_P(SchemaUtilTest, EmptySchemaProtoIsValid) {
+  SchemaProto schema;
+  ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
+}
 
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->clear_data_type();
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+TEST_P(SchemaUtilTest, Valid_Nested) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("sender")
+                                        .SetDataTypeDocument(
+                                            kPersonType,
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
 }
 
-TEST_F(SchemaUtilTest, Invalid_UnknownDataType) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+TEST_P(SchemaUtilTest, ClearedPropertyConfigsIsValid) {
+  // No property fields is technically ok, but probably not realistic.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType(kEmailType))
+          .Build();
+  ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
+}
 
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->set_data_type(PropertyConfigProto::DataType::UNKNOWN);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+TEST_P(SchemaUtilTest, ClearedSchemaTypeIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder().AddType(SchemaTypeConfigBuilder()).Build();
+  ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST_P(SchemaUtilTest, EmptySchemaTypeIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("")).Build();
+
+  ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, Invalid_ClearedCardinality) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+TEST_P(SchemaUtilTest, AnySchemaTypeOk) {
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType(
+                               "abc123!@#$%^&*()_-+=[{]}|\\;:'\",<.>?你好"))
+                           .Build();
 
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->clear_cardinality();
+  ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
+}
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST_P(SchemaUtilTest, ClearedPropertyNameIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("foo")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  schema.mutable_types(0)->mutable_properties(0)->clear_property_name();
+  ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, Invalid_UnknownCardinality) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+TEST_P(SchemaUtilTest, EmptyPropertyNameIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->set_data_type(PropertyConfigProto::DataType::STRING);
-  property->set_cardinality(PropertyConfigProto::Cardinality::UNKNOWN);
+  ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST_P(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("a_b")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, Invalid_ClearedPropertySchemaType) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+TEST_P(SchemaUtilTest, AlphanumericPropertyNameOk) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("abc123")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-  property->clear_schema_type();
+  ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
+}
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST_P(SchemaUtilTest, DuplicatePropertyNameIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("DuplicatedProperty")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("DuplicatedProperty")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::ALREADY_EXISTS));
+}
+
+TEST_P(SchemaUtilTest, ClearedDataTypeIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  schema.mutable_types(0)->mutable_properties(0)->clear_data_type();
+  ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+TEST_P(SchemaUtilTest, UnknownDataTypeIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType(kEmailType)
+                  .AddProperty(
+                      PropertyConfigBuilder()
+                          .SetName("NewProperty")
+                          .SetDataType(PropertyConfigProto::DataType::UNKNOWN)
+                          .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
 
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-  property->set_schema_type("");
+TEST_P(SchemaUtilTest, ClearedCardinalityIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+  schema.mutable_types(0)->mutable_properties(0)->clear_cardinality();
+  ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST_P(SchemaUtilTest, UnknownCardinalityIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_UNKNOWN)))
+          .Build();
+  ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SchemaUtilTest, Invalid_NoMatchingSchemaType) {
-  auto type = schema_proto_.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+TEST_P(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataType(TYPE_DOCUMENT)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataTypeDocument(
+                                            /*schema_type=*/"",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
 
-  auto property = type->add_properties();
-  property->set_property_name("NewProperty");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-  property->set_schema_type("NewSchemaType");
+TEST_P(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataTypeDocument(
+                                            /*schema_type=*/"NewSchemaType",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
-  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
-              StatusIs(libtextclassifier3::StatusCode::UNKNOWN));
+  ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Undefined 'schema_type'")));
 }
 
-TEST_F(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
+TEST_P(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
   // Configure old schema
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
   // Configure new schema with an optional field, not considered incompatible
   // since it's fine if old data doesn't have this optional field
-  SchemaProto new_schema_with_optional;
-  type = new_schema_with_optional.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("NewOptional");
-  property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  SchemaProto new_schema_with_optional =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewOptional")
+                                        .SetDataType(TYPE_DOUBLE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema,
-                                                    new_schema_with_optional),
+  schema_delta.schema_types_changed_fully_compatible.insert(kEmailType);
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  old_schema, new_schema_with_optional, no_dependents_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
+TEST_P(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
   // Configure old schema
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
   // Configure new schema with a required field, considered incompatible since
   // old data won't have this required field
-  SchemaProto new_schema_with_required;
-  type = new_schema_with_required.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("NewRequired");
-  property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  SchemaProto new_schema_with_required =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewRequired")
+                                        .SetDataType(TYPE_DOUBLE)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
   schema_delta.schema_types_incompatible.emplace(kEmailType);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema,
-                                                    new_schema_with_required),
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  old_schema, new_schema_with_required, no_dependents_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
+TEST_P(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
   // Configure old schema
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("OldOptional");
-  property->set_data_type(PropertyConfigProto::DataType::INT64);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("OldOptional")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   // Configure new schema, new schema needs to at least have all the
   // previously defined properties
-  SchemaProto new_schema;
-  type = new_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
   schema_delta.schema_types_incompatible.emplace(kEmailType);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependents_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
+TEST_P(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
   // Configure less restrictive schema based on cardinality
-  SchemaProto less_restrictive_schema;
-  auto type = less_restrictive_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("Property");
-  property->set_data_type(PropertyConfigProto::DataType::INT64);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+  SchemaProto less_restrictive_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   // Configure more restrictive schema based on cardinality
-  SchemaProto more_restrictive_schema;
-  type = more_restrictive_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+  SchemaProto more_restrictive_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
-  property = type->add_properties();
-  property->set_property_name("Property");
-  property->set_data_type(PropertyConfigProto::DataType::INT64);
-  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-  // We can't have a new schema be less restrictive, REQUIRED->OPTIONAL
+  // We can't have a new schema be more restrictive, REPEATED->OPTIONAL
   SchemaUtil::SchemaDelta incompatible_schema_delta;
   incompatible_schema_delta.schema_types_incompatible.emplace(kEmailType);
+  SchemaUtil::DependentMap no_dependents_map;
   EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
                   /*old_schema=*/less_restrictive_schema,
-                  /*new_schema=*/more_restrictive_schema),
+                  /*new_schema=*/more_restrictive_schema, no_dependents_map),
               Eq(incompatible_schema_delta));
 
-  // We can have the new schema be more restrictive, OPTIONAL->REPEATED;
+  // We can have the new schema be less restrictive, OPTIONAL->REPEATED;
   SchemaUtil::SchemaDelta compatible_schema_delta;
+  compatible_schema_delta.schema_types_changed_fully_compatible.insert(
+      kEmailType);
   EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
                   /*old_schema=*/more_restrictive_schema,
-                  /*new_schema=*/less_restrictive_schema),
+                  /*new_schema=*/less_restrictive_schema, no_dependents_map),
               Eq(compatible_schema_delta));
 }
 
-TEST_F(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
+TEST_P(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
   // Configure old schema, with an int64_t property
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  auto property = type->add_properties();
-  property->set_property_name("Property");
-  property->set_data_type(PropertyConfigProto::DataType::INT64);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   // Configure new schema, with a double property
-  SchemaProto new_schema;
-  type = new_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-
-  property = type->add_properties();
-  property->set_property_name("Property");
-  property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataType(TYPE_DOUBLE)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
   schema_delta.schema_types_incompatible.emplace(kEmailType);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependents_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
+TEST_P(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
   // Configure old schema, where Property is supposed to be a Person type
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kPersonType);
-
-  *type = CreateSchemaTypeConfig(kEmailType);
-  auto property = type->add_properties();
-  property->set_property_name("Property");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-  property->set_schema_type(kPersonType);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kMessageType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeDocument(
+                                            kPersonType,
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   // Configure new schema, where Property is supposed to be an Email type
-  SchemaProto new_schema;
-  type = new_schema.add_types();
-  *type = CreateSchemaTypeConfig(kPersonType);
-
-  *type = CreateSchemaTypeConfig(kEmailType);
-  property = type->add_properties();
-  property->set_property_name("Property");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-  property->set_schema_type(kEmailType);
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kMessageType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeDocument(
+                                            kMessageType,
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
   schema_delta.schema_types_incompatible.emplace(kEmailType);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  // kEmailType depends on kMessageType
+  SchemaUtil::DependentMap dependents_map = {
+      {kMessageType, {{kEmailType, {}}}}};
+  SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
+      old_schema, new_schema, dependents_map);
+  EXPECT_THAT(actual, Eq(schema_delta));
+  EXPECT_THAT(actual.schema_types_incompatible,
+              testing::ElementsAre(kEmailType));
+  EXPECT_THAT(actual.schema_types_deleted, testing::IsEmpty());
+}
+
+TEST_P(SchemaUtilTest, SameNumberOfRequiredFieldsCanBeIncompatible) {
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property1")
+                               .SetDataType(TYPE_STRING)
+                               // Changing required to optional should be fine
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Property2")
+                               .SetDataType(TYPE_STRING)
+                               // Adding a new required property is incompatible
+                               .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  SchemaUtil::SchemaDelta delta = SchemaUtil::ComputeCompatibilityDelta(
+      old_schema, new_schema, /*new_schema_dependent_map=*/{});
+  EXPECT_THAT(delta.schema_types_incompatible,
+              testing::ElementsAre(kEmailType));
+  EXPECT_THAT(delta.schema_types_index_incompatible, testing::IsEmpty());
+  EXPECT_THAT(delta.schema_types_deleted, testing::IsEmpty());
+}
+
+TEST_P(SchemaUtilTest, SameNumberOfIndexedPropertiesCanMakeIndexIncompatible) {
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property1")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property2")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta delta = SchemaUtil::ComputeCompatibilityDelta(
+      old_schema, new_schema, /*new_schema_dependent_map=*/{});
+  EXPECT_THAT(delta.schema_types_incompatible, testing::IsEmpty());
+  EXPECT_THAT(delta.schema_types_index_incompatible,
+              testing::ElementsAre(kEmailType));
+  EXPECT_THAT(delta.schema_types_deleted, testing::IsEmpty());
+}
+
+TEST_P(SchemaUtilTest, SameNumberOfJoinablePropertiesCanMakeJoinIncompatible) {
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property1")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property2")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta delta = SchemaUtil::ComputeCompatibilityDelta(
+      old_schema, new_schema, /*new_schema_dependent_map=*/{});
+  EXPECT_THAT(delta.schema_types_incompatible, testing::IsEmpty());
+  EXPECT_THAT(delta.schema_types_index_incompatible, testing::IsEmpty());
+  EXPECT_THAT(delta.schema_types_deleted, testing::IsEmpty());
+  EXPECT_THAT(delta.schema_types_join_incompatible,
+              testing::ElementsAre(kEmailType));
+}
+
+TEST_P(SchemaUtilTest, ChangingIndexedStringPropertiesMakesIndexIncompatible) {
+  // Configure old schema
+  SchemaProto schema_with_indexed_property =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Configure new schema
+  SchemaProto schema_with_unindexed_property =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_UNKNOWN,
+                                                           TOKENIZER_NONE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_index_incompatible.insert(kPersonType);
+
+  // New schema gained a new indexed string property.
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  schema_with_unindexed_property, schema_with_indexed_property,
+                  no_dependents_map),
+              Eq(schema_delta));
+
+  // New schema lost an indexed string property.
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  schema_with_indexed_property, schema_with_unindexed_property,
+                  no_dependents_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
+TEST_P(SchemaUtilTest, AddingNewIndexedStringPropertyMakesIndexIncompatible) {
   // Configure old schema
-  SchemaProto old_schema;
-  auto old_type = old_schema.add_types();
-  *old_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
-  auto old_property = old_type->add_properties();
-  old_property->set_property_name("Property");
-  old_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  old_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  // Configure new schema
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewIndexedProperty")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_index_incompatible.insert(kPersonType);
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependents_map),
+              Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest,
+       AddingNewNonIndexedStringPropertyShouldRemainIndexCompatible) {
+  // Configure old schema
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   // Configure new schema
-  SchemaProto new_schema;
-  auto new_type = new_schema.add_types();
-  *new_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataTypeString(TERM_MATCH_UNKNOWN,
+                                                           TOKENIZER_NONE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
-  auto new_property = new_type->add_properties();
-  new_property->set_property_name("Property");
-  new_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  new_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependents_map)
+                  .schema_types_index_incompatible,
+              IsEmpty());
+}
+
+TEST_P(SchemaUtilTest, ChangingIndexedIntegerPropertiesMakesIndexIncompatible) {
+  // Configure old schema
+  SchemaProto schema_with_indexed_property =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Configure new schema
+  SchemaProto schema_with_unindexed_property =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
-  schema_delta.index_incompatible = true;
-
-  // New schema gained a new indexed property.
-  old_property->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::UNKNOWN);
-  new_property->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  schema_delta.schema_types_index_incompatible.insert(kPersonType);
+
+  // New schema gained a new indexed integer property.
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  schema_with_unindexed_property, schema_with_indexed_property,
+                  no_dependents_map),
               Eq(schema_delta));
 
-  // New schema lost an indexed property.
-  old_property->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  new_property->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::UNKNOWN);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  // New schema lost an indexed integer property.
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  schema_with_indexed_property, schema_with_unindexed_property,
+                  no_dependents_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) {
+TEST_P(SchemaUtilTest, AddingNewIndexedIntegerPropertyMakesIndexIncompatible) {
   // Configure old schema
-  SchemaProto old_schema;
-  auto old_type = old_schema.add_types();
-  *old_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
-
-  auto old_property = old_type->add_properties();
-  old_property->set_property_name("Property");
-  old_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  old_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   // Configure new schema
-  SchemaProto new_schema;
-  auto new_type = new_schema.add_types();
-  *new_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
-
-  auto new_property = new_type->add_properties();
-  new_property->set_property_name("Property");
-  new_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  new_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-  new_property = new_type->add_properties();
-  new_property->set_property_name("NewIndexedProperty");
-  new_property->set_data_type(PropertyConfigProto::DataType::STRING);
-  new_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  new_property->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewIndexedProperty")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
-  schema_delta.index_incompatible = true;
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  schema_delta.schema_types_index_incompatible.insert(kPersonType);
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependents_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, AddingTypeIsCompatible) {
+TEST_P(SchemaUtilTest,
+       AddingNewNonIndexedIntegerPropertyShouldRemainIndexCompatible) {
+  // Configure old schema
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Configure new schema
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependents_map)
+                  .schema_types_index_incompatible,
+              IsEmpty());
+}
+
+TEST_P(SchemaUtilTest,
+       AddingNewIndexedDocumentPropertyMakesIndexAndJoinIncompatible) {
+  SchemaTypeConfigProto nested_schema =
+      SchemaTypeConfigBuilder()
+          .SetType(kEmailType)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  // Configure old schema
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Configure new schema
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("NewEmailProperty")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_index_incompatible.insert(kPersonType);
+  schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+  SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+  SchemaUtil::SchemaDelta result_schema_delta =
+      SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                            dependents_map);
+  EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(
+    SchemaUtilTest,
+    AddingNewIndexedDocumentPropertyWithIndexableListMakesIndexAndJoinIncompatible) {
+  SchemaTypeConfigProto nested_schema =
+      SchemaTypeConfigBuilder()
+          .SetType(kEmailType)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  // Configure old schema
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Configure new schema. The added nested document property is indexed, so
+  // this is both index and join incompatible
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType(kPersonType)
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("Property")
+                                   .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                   .SetCardinality(CARDINALITY_OPTIONAL))
+                  .AddProperty(
+                      PropertyConfigBuilder()
+                          .SetName("NewEmailProperty")
+                          .SetDataTypeDocument(
+                              kEmailType,
+                              /*indexable_nested_properties_list=*/
+                              std::initializer_list<std::string>{"subject"})
+                          .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_index_incompatible.insert(kPersonType);
+  schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+  SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+  SchemaUtil::SchemaDelta result_schema_delta =
+      SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                            dependents_map);
+  EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest,
+       AddingNewNonIndexedDocumentPropertyMakesJoinIncompatible) {
+  SchemaTypeConfigProto nested_schema =
+      SchemaTypeConfigBuilder()
+          .SetType(kEmailType)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  // Configure old schema
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Configure new schema. The added nested document property is not indexed, so
+  // this is index compatible, but join incompatible
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewEmailProperty")
+                                        .SetDataTypeDocument(
+                                            kEmailType,
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+  SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+  SchemaUtil::SchemaDelta result_schema_delta =
+      SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                            dependents_map);
+  EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, DeletingIndexedDocumentPropertyIsIncompatible) {
+  SchemaTypeConfigProto nested_schema =
+      SchemaTypeConfigBuilder()
+          .SetType(kEmailType)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  // Configure old schemam with two nested document properties of the same type
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("EmailProperty")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("AnotherEmailProperty")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Configure new schema and drop one of the nested document properties
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("EmailProperty")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_incompatible.insert(kPersonType);
+  schema_delta.schema_types_index_incompatible.insert(kPersonType);
+  schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+  SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+  SchemaUtil::SchemaDelta result_schema_delta =
+      SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                            dependents_map);
+  EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, DeletingNonIndexedDocumentPropertyIsIncompatible) {
+  SchemaTypeConfigProto nested_schema =
+      SchemaTypeConfigBuilder()
+          .SetType(kEmailType)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  // Configure old schemam with two nested document properties of the same type
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("EmailProperty")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("AnotherEmailProperty")
+                                        .SetDataTypeDocument(
+                                            kEmailType,
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Configure new schema and drop the non-indexed nested document property
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("EmailProperty")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_incompatible.insert(kPersonType);
+  schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+  SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+  SchemaUtil::SchemaDelta result_schema_delta =
+      SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                            dependents_map);
+  EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, ChangingIndexedDocumentPropertyIsIncompatible) {
+  SchemaTypeConfigProto nested_schema =
+      SchemaTypeConfigBuilder()
+          .SetType(kEmailType)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  // Configure old schemam with two nested document properties of the same type
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("EmailProperty")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("AnotherEmailProperty")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Configure new schema and change one of the nested document properties
+  // to a different name (this is the same as deleting a property and adding
+  // another)
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("EmailProperty")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("DifferentEmailProperty")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_incompatible.insert(kPersonType);
+  schema_delta.schema_types_index_incompatible.insert(kPersonType);
+  schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+  SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+  SchemaUtil::SchemaDelta result_schema_delta =
+      SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                            dependents_map);
+  EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, ChangingNonIndexedDocumentPropertyIsIncompatible) {
+  SchemaTypeConfigProto nested_schema =
+      SchemaTypeConfigBuilder()
+          .SetType(kEmailType)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  // Configure old schemam with two nested document properties of the same type
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("EmailProperty")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("AnotherEmailProperty")
+                                        .SetDataTypeDocument(
+                                            kEmailType,
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Configure new schema and change the non-indexed nested document property to
+  // a different name (this is the same as deleting a property and adding
+  // another)
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(nested_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("EmailProperty")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("DifferentEmailProperty")
+                                        .SetDataTypeDocument(
+                                            kEmailType,
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_incompatible.insert(kPersonType);
+  schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+  SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+  SchemaUtil::SchemaDelta result_schema_delta =
+      SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                            dependents_map);
+  EXPECT_THAT(result_schema_delta, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, ChangingJoinablePropertiesMakesJoinIncompatible) {
+  // Configure old schema
+  SchemaProto schema_with_joinable_property =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Configure new schema
+  SchemaProto schema_with_non_joinable_property =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_NONE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta expected_schema_delta;
+  expected_schema_delta.schema_types_join_incompatible.insert(kPersonType);
+
+  // New schema gained a new joinable property.
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  schema_with_non_joinable_property,
+                  schema_with_joinable_property, no_dependents_map),
+              Eq(expected_schema_delta));
+
+  // New schema lost a joinable property.
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  schema_with_joinable_property,
+                  schema_with_non_joinable_property, no_dependents_map),
+              Eq(expected_schema_delta));
+}
+
+TEST_P(SchemaUtilTest, AddingNewJoinablePropertyMakesJoinIncompatible) {
+  // Configure old schema
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Configure new schema
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewJoinableProperty")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta expected_schema_delta;
+  expected_schema_delta.schema_types_join_incompatible.insert(kPersonType);
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependents_map),
+              Eq(expected_schema_delta));
+}
+
+TEST_P(SchemaUtilTest, AddingNewNonJoinablePropertyShouldRemainJoinCompatible) {
+  // Configure old schema
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("JoinableProperty")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Configure new schema
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("JoinableProperty")
+                                        .SetDataTypeJoinableString(
+                                            JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NewProperty")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependents_map)
+                  .schema_types_join_incompatible,
+              IsEmpty());
+}
+
+TEST_P(SchemaUtilTest, AddingTypeIsCompatible) {
   // Can add a new type, existing data isn't incompatible, since none of them
   // are of this new schema type
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
-  SchemaProto new_schema;
-  type = new_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-  type = new_schema.add_types();
-  *type = CreateSchemaTypeConfig(kPersonType);
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  schema_delta.schema_types_new.insert(kEmailType);
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependents_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, DeletingTypeIsNoted) {
+TEST_P(SchemaUtilTest, DeletingTypeIsNoted) {
   // Can't remove an old type, new schema needs to at least have all the
   // previously defined schema otherwise the Documents of the missing schema
   // are invalid
-  SchemaProto old_schema;
-  auto type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
-  type = old_schema.add_types();
-  *type = CreateSchemaTypeConfig(kPersonType);
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
-  SchemaProto new_schema;
-  type = new_schema.add_types();
-  *type = CreateSchemaTypeConfig(kEmailType);
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   SchemaUtil::SchemaDelta schema_delta;
   schema_delta.schema_types_deleted.emplace(kPersonType);
-  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+  SchemaUtil::DependentMap no_dependents_map;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+                                                    no_dependents_map),
               Eq(schema_delta));
 }
 
-TEST_F(SchemaUtilTest, ValidateNoTokenizer) {
-  SchemaProto schema;
-  auto* type = schema.add_types();
-  type->set_schema_type("MyType");
-
-  auto* prop = type->add_properties();
-  prop->set_property_name("Foo");
-  prop->set_data_type(PropertyConfigProto::DataType::STRING);
-  prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  prop->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
+TEST_P(SchemaUtilTest, DeletingPropertyAndChangingProperty) {
+  SchemaProto old_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property1")
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property2")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  // Remove Property2 and make Property1 indexed now. Removing Property2 should
+  // be incompatible.
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kEmailType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("Property1")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_incompatible.emplace(kEmailType);
+  schema_delta.schema_types_index_incompatible.emplace(kEmailType);
+  SchemaUtil::DependentMap no_dependents_map;
+  SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
+      old_schema, new_schema, no_dependents_map);
+  EXPECT_THAT(actual, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) {
+  // Make two schemas. One that sets index_nested_properties to false and one
+  // that sets it to true.
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType(kEmailType)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto no_nested_index_schema =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emails")
+                                        .SetDataTypeDocument(
+                                            kEmailType,
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  SchemaProto nested_index_schema =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("emails")
+                               .SetDataTypeDocument(
+                                   kEmailType, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  // Going from index_nested_properties=false to index_nested_properties=true
+  // should make kPersonType index_incompatible. kEmailType should be
+  // unaffected.
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_index_incompatible.emplace(kPersonType);
+  SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+  SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
+      no_nested_index_schema, nested_index_schema, dependents_map);
+  EXPECT_THAT(actual, Eq(schema_delta));
+
+  // Going from index_nested_properties=true to index_nested_properties=false
+  // should also make kPersonType index_incompatible. kEmailType should be
+  // unaffected.
+  actual = SchemaUtil::ComputeCompatibilityDelta(
+      nested_index_schema, no_nested_index_schema, dependents_map);
+  EXPECT_THAT(actual, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, AddOrDropIndexableNestedProperties_IndexIncompatible) {
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType(kEmailType)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("recipient")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("body")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema_1 =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType(kPersonType)
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("emails")
+                                   .SetDataTypeDocument(
+                                       kEmailType,
+                                       /*indexable_nested_properties_list=*/
+                                       {"recipient", "subject", "body"})
+                                   .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  SchemaProto schema_2 =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emails")
+                                        .SetDataTypeDocument(
+                                            kEmailType,
+                                            /*indexable_nested_properties=*/
+                                            {"recipient", "subject"})
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  // Dropping some indexable_nested_properties should make kPersonType
+  // index_incompatible. kEmailType should be unaffected.
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_index_incompatible.emplace(kPersonType);
+  SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+  SchemaUtil::SchemaDelta actual =
+      SchemaUtil::ComputeCompatibilityDelta(schema_1, schema_2, dependents_map);
+  EXPECT_THAT(actual, Eq(schema_delta));
+
+  // Adding some indexable_nested_properties should also make kPersonType
+  // index_incompatible. kEmailType should be unaffected.
+  actual =
+      SchemaUtil::ComputeCompatibilityDelta(schema_2, schema_1, dependents_map);
+  EXPECT_THAT(actual, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, ChangingIndexableNestedProperties_IndexIncompatible) {
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType(kEmailType)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("recipient")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("body")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema_1 =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType(kPersonType)
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("emails")
+                                   .SetDataTypeDocument(
+                                       kEmailType,
+                                       /*indexable_nested_properties_list=*/
+                                       {"recipient", "subject"})
+                                   .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  SchemaProto schema_2 =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType(kPersonType)
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("emails")
+                                   .SetDataTypeDocument(
+                                       kEmailType,
+                                       /*indexable_nested_properties_list=*/
+                                       {"recipient", "body"})
+                                   .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  // Changing 'subject' to 'body' for indexable_nested_properties_list should
+  // make kPersonType index_incompatible. kEmailType should be unaffected.
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_index_incompatible.emplace(kPersonType);
+  SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+  SchemaUtil::SchemaDelta actual =
+      SchemaUtil::ComputeCompatibilityDelta(schema_1, schema_2, dependents_map);
+  EXPECT_THAT(actual, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest, IndexableNestedPropertiesFullSet_IndexIncompatible) {
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType(kEmailType)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("recipient")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("body")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema_1 =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kPersonType)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emails")
+                                        .SetDataTypeDocument(
+                                            kEmailType,
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  SchemaProto schema_2 =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType(kPersonType)
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("emails")
+                                   .SetDataTypeDocument(
+                                       kEmailType,
+                                       /*indexable_nested_properties_list=*/
+                                       {"recipient", "body", "subject"})
+                                   .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  // This scenario also invalidates kPersonType and triggers an index rebuild at
+  // the moment, even though the set of indexable_nested_properties from
+  // schema_1 to schema_2 should be the same.
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_index_incompatible.emplace(kPersonType);
+  SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+  SchemaUtil::SchemaDelta actual =
+      SchemaUtil::ComputeCompatibilityDelta(schema_1, schema_2, dependents_map);
+  EXPECT_THAT(actual, Eq(schema_delta));
+}
+
+TEST_P(SchemaUtilTest,
+       ChangingIndexableNestedPropertiesOrder_IndexIsCompatible) {
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType(kEmailType)
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("recipient")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("body")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema_1 =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType(kPersonType)
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("emails")
+                                   .SetDataTypeDocument(
+                                       kEmailType,
+                                       /*indexable_nested_properties_list=*/
+                                       {"recipient", "subject", "body"})
+                                   .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  SchemaProto schema_2 =
+      SchemaBuilder()
+          .AddType(email_type_config)
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType(kPersonType)
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("emails")
+                                   .SetDataTypeDocument(
+                                       kEmailType,
+                                       /*indexable_nested_properties_list=*/
+                                       {"subject", "body", "recipient"})
+                                   .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  // Changing order of elements in indexable_nested_properties_list should have
+  // no effect on schema compatibility.
+  SchemaUtil::SchemaDelta schema_delta;
+  SchemaUtil::DependentMap dependents_map = {{kEmailType, {{kPersonType, {}}}}};
+  SchemaUtil::SchemaDelta actual =
+      SchemaUtil::ComputeCompatibilityDelta(schema_1, schema_2, dependents_map);
+  EXPECT_THAT(actual, Eq(schema_delta));
+  EXPECT_THAT(actual.schema_types_index_incompatible, IsEmpty());
+}
+
+TEST_P(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  // Error if we don't set a term match type
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-  EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+  // Passes once we set a term match type
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("Foo")
+                       .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                       .SetCardinality(CARDINALITY_REQUIRED)))
+               .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
 }
 
-TEST_F(SchemaUtilTest, ValidateDocumentNoTokenizer) {
-  SchemaProto schema;
-  auto* type = schema.add_types();
-  type->set_schema_type("OtherType");
+TEST_P(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_NONE)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  // Error if we don't set a tokenizer type
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // Passes once we set a tokenizer type
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("Foo")
+                       .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                       .SetCardinality(CARDINALITY_REQUIRED)))
+               .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest,
+       ValidateJoinablePropertyTypeQualifiedIdShouldHaveStringDataType) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataType(TYPE_INT64)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/false)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  // Error if data type is not STRING for qualified id joinable value type.
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // Passes once we set STRING as the data type.
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("Foo")
+                       .SetDataType(TYPE_STRING)
+                       .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                    /*propagate_delete=*/false)
+                       .SetCardinality(CARDINALITY_REQUIRED)))
+               .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
 
-  type = schema.add_types();
-  type->set_schema_type("MyType");
+TEST_P(SchemaUtilTest,
+       ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataType(TYPE_STRING)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/false)
+                  .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
-  auto* prop = type->add_properties();
-  prop->set_property_name("SubType");
-  prop->set_schema_type("OtherType");
-  prop->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  prop->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::NONE);
+  // Error if using REPEATED cardinality for joinable property.
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // Passes once we use OPTIONAL cardinality with joinable property.
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("Foo")
+                       .SetDataType(TYPE_STRING)
+                       .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                    /*propagate_delete=*/false)
+                       .SetCardinality(CARDINALITY_OPTIONAL)))
+               .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+
+  // Passes once we use REQUIRED cardinality with joinable property.
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("Foo")
+                       .SetDataType(TYPE_STRING)
+                       .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                    /*propagate_delete=*/false)
+                       .SetCardinality(CARDINALITY_REQUIRED)))
+               .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+
+  // Passes once we use REPEATED cardinality with non-joinable property.
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("Foo")
+                       .SetDataType(TYPE_STRING)
+                       .SetJoinable(JOINABLE_VALUE_TYPE_NONE,
+                                    /*propagate_delete=*/false)
+                       .SetCardinality(CARDINALITY_REPEATED)))
+               .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest,
+       ValidateJoinablePropertyWithDeletePropagationShouldHaveTypeQualifiedId) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataType(TYPE_STRING)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_NONE,
+                               /*propagate_delete=*/true)
+                  .SetCardinality(CARDINALITY_REQUIRED)))
+          .Build();
+
+  // Error if enabling delete propagation with non qualified id joinable value
+  // type.
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // Passes once we set qualified id joinable value type with delete propagation
+  // enabled.
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("Foo")
+                       .SetDataType(TYPE_STRING)
+                       .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                    /*propagate_delete=*/true)
+                       .SetCardinality(CARDINALITY_REQUIRED)))
+               .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+
+  // Passes once we disable delete propagation.
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("Foo")
+                       .SetDataType(TYPE_STRING)
+                       .SetJoinable(JOINABLE_VALUE_TYPE_NONE,
+                                    /*propagate_delete=*/false)
+                       .SetCardinality(CARDINALITY_REQUIRED)))
+               .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest,
+       ValidateNestedJoinablePropertyShouldNotHaveNestedRepeatedCardinality) {
+  // Dependency and nested document property cardinality:
+  //   "C" --(REPEATED)--> "B" --(OPTIONAL)--> "A"
+  // where "A" contains joinable property. This should not be allowed.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataType(TYPE_STRING)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetDataTypeDocument("B",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // Passes once we use non-REPEATED cardinality for "C.b", i.e. the dependency
+  // and nested document property cardinality becomes:
+  //   "C" --(OPTIONAL)--> "B" --(OPTIONAL)--> "A"
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("Foo")
+                       .SetDataType(TYPE_STRING)
+                       .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                    /*propagate_delete=*/false)
+                       .SetCardinality(CARDINALITY_OPTIONAL)))
+               .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("a")
+                       .SetDataTypeDocument("A",
+                                            /*index_nested_properties=*/false)
+                       .SetCardinality(CARDINALITY_OPTIONAL)))
+               .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+                   PropertyConfigBuilder()
+                       .SetName("b")
+                       .SetDataTypeDocument("B",
+                                            /*index_nested_properties=*/false)
+                       .SetCardinality(CARDINALITY_OPTIONAL)))
+               .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(
+    SchemaUtilTest,
+    ValidateNestedJoinablePropertyShouldAllowRepeatedCardinalityIfNoJoinableProperty) {
+  // Dependency and nested document property cardinality:
+  //   "C" --(OPTIONAL)--> "B" --(REPEATED)--> "A"
+  // where only "B" contains joinable property. This should be allowed.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataType(TYPE_STRING)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_NONE,
+                               /*propagate_delete=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("B")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("a")
+                                        .SetDataTypeDocument(
+                                            "A",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("Bar")
+                               .SetDataType(TYPE_STRING)
+                               .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                                            /*propagate_delete=*/false)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("b")
+                  .SetDataTypeDocument("B",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Passes since nested schema type with REPEATED cardinality doesn't have
+  // joinable property.
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest,
+       ValidateNestedJoinablePropertyMultiplePropertiesWithSameSchema) {
+  // Dependency and nested document property cardinality:
+  //        --(a1: OPTIONAL)--
+  //      /                    \
+  // B --                        --> A
+  //      \                    /
+  //        --(a2: REPEATED)--
+  // where "A" contains joinable property. This should not be allowed.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataType(TYPE_STRING)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("B")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("a1")
+                                        .SetDataTypeDocument(
+                                            "A",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("a2")
+                                        .SetDataTypeDocument(
+                                            "A",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // Passes once we use non-REPEATED cardinality for "B.a2", i.e. the dependency
+  // and nested document property cardinality becomes:
+  //        --(a1: OPTIONAL)--
+  //      /                    \
+  // B --                        --> A
+  //      \                    /
+  //        --(a2: OPTIONAL)--
+  schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataType(TYPE_STRING)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("B")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("a1")
+                                        .SetDataTypeDocument(
+                                            "A",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("a2")
+                                        .SetDataTypeDocument(
+                                            "A",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) {
+  // Dependency and nested document property cardinality:
+  //           B
+  //         /   \
+  // (OPTIONAL) (OPTIONAL)
+  //       /       \
+  // D ---           --> A
+  //       \       /
+  // (OPTIONAL) (OPTIONAL)
+  //         \   /
+  //           C
+  // where "A" contains joinable property. This should be allowed.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataType(TYPE_STRING)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("D")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("b")
+                                        .SetDataTypeDocument(
+                                            "B",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("c")
+                                        .SetDataTypeDocument(
+                                            "C",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+
+  // Fails once we change any of edge to REPEATED cardinality.
+  //           B
+  //         /   \
+  // (REPEATED) (OPTIONAL)
+  //       /       \
+  // D ---           --> A
+  //       \       /
+  // (OPTIONAL) (OPTIONAL)
+  //         \   /
+  //           C
+  schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataType(TYPE_STRING)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("D")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("b")
+                                        .SetDataTypeDocument(
+                                            "B",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("c")
+                                        .SetDataTypeDocument(
+                                            "C",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  //           B
+  //         /   \
+  // (OPTIONAL) (REPEATED)
+  //       /       \
+  // D ---           --> A
+  //       \       /
+  // (OPTIONAL) (OPTIONAL)
+  //         \   /
+  //           C
+  schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataType(TYPE_STRING)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("D")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("b")
+                                        .SetDataTypeDocument(
+                                            "B",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("c")
+                                        .SetDataTypeDocument(
+                                            "C",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  //           B
+  //         /   \
+  // (OPTIONAL) (OPTIONAL)
+  //       /       \
+  // D ---           --> A
+  //       \       /
+  // (REPEATED) (OPTIONAL)
+  //         \   /
+  //           C
+  schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataType(TYPE_STRING)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("D")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("b")
+                                        .SetDataTypeDocument(
+                                            "B",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("c")
+                                        .SetDataTypeDocument(
+                                            "C",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  //           B
+  //         /   \
+  // (OPTIONAL) (OPTIONAL)
+  //       /       \
+  // D ---           --> A
+  //       \       /
+  // (OPTIONAL) (REPEATED)
+  //         \   /
+  //           C
+  schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("Foo")
+                  .SetDataType(TYPE_STRING)
+                  .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+                               /*propagate_delete=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("C").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("a")
+                  .SetDataTypeDocument("A",
+                                       /*index_nested_properties=*/false)
+                  .SetCardinality(CARDINALITY_REPEATED)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("D")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("b")
+                                        .SetDataTypeDocument(
+                                            "B",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("c")
+                                        .SetDataTypeDocument(
+                                            "C",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(SchemaUtilTest,
+       ValidDocumentIndexingConfigFields_emptyIndexableListBooleanTrue) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("InnerSchema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetDataTypeString(TERM_MATCH_UNKNOWN,
+                                                           TOKENIZER_NONE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("OuterSchema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("InnerProperty")
+                                        .SetDataTypeDocument(
+                                            "InnerSchema",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  SchemaTypeConfigProto* outerSchemaType = schema.mutable_types(1);
+  outerSchemaType->mutable_properties(0)
+      ->mutable_document_indexing_config()
+      ->clear_indexable_nested_properties_list();
+
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest,
+       ValidDocumentIndexingConfigFields_emptyIndexableListBooleanFalse) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("InnerSchema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop2")
+                                        .SetDataTypeString(TERM_MATCH_UNKNOWN,
+                                                           TOKENIZER_NONE)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("OuterSchema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("InnerProperty")
+                                        .SetDataTypeDocument(
+                                            "InnerSchema",
+                                            /*index_nested_properties=*/false)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  SchemaTypeConfigProto* outerSchemaType = schema.mutable_types(1);
+  outerSchemaType->mutable_properties(0)
+      ->mutable_document_indexing_config()
+      ->clear_indexable_nested_properties_list();
+
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest,
+       ValidDocumentIndexingConfigFields_nonEmptyIndexableList) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("InnerSchema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("OuterSchema")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("InnerProperty")
+                               .SetDataTypeDocument(
+                                   "InnerSchema",
+                                   /*indexable_nested_properties_list=*/
+                                   std::initializer_list<std::string>{"prop1"})
+                               .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  SchemaTypeConfigProto* outerSchemaType = schema.mutable_types(1);
+  outerSchemaType->mutable_properties(0)
+      ->mutable_document_indexing_config()
+      ->set_index_nested_properties(false);
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
+}
+
+TEST_P(SchemaUtilTest, InvalidDocumentIndexingConfigFields) {
+  // If indexable_nested_properties is non-empty, index_nested_properties is
+  // required to be false.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("InnerSchema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("prop1")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("OuterSchema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("InnerProperty")
+                                        .SetDataTypeDocument(
+                                            "InnerSchema",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  // Setting a non-empty indexable_nested_properties_list while
+  // index_nested_properties=true is invalid.
+  SchemaTypeConfigProto* outerSchemaType = schema.mutable_types(1);
+  outerSchemaType->mutable_properties(0)
+      ->mutable_document_indexing_config()
+      ->add_indexable_nested_properties_list("prop");
+
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_P(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("InnerSchema"))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("OuterSchema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("InnerProperty1")
+                                        .SetDataTypeDocument(
+                                            "InnerSchema",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("InnerProperty2")
+                                        .SetDataTypeDocument(
+                                            "InnerSchema",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
-  EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
 }
 
+TEST_P(SchemaUtilTest, InvalidSelfReference) {
+  // Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("OwnSchema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NestedDocument")
+                                        .SetDataTypeDocument(
+                                            "OwnSchema",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
+  // Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("OwnSchema")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("NestedDocument")
+                                        .SetDataTypeDocument(
+                                            "OwnSchema",
+                                            /*index_nested_properties=*/true)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("SomeString")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
+  // Create a schema for the outer schema
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("A")
+                  // Reference schema B, so far so good
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("NestedDocument")
+                                   .SetDataTypeDocument(
+                                       "B", /*index_nested_properties=*/true)
+                                   .SetCardinality(CARDINALITY_OPTIONAL)))
+          // Create the inner schema
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("B")
+                  // Reference the schema A, causing an invalid cycle of
+                  // references.
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("NestedDocument")
+                                   .SetDataTypeDocument(
+                                       "A", /*index_nested_properties=*/true)
+                                   .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  // Two degrees of referencing: A -> B -> A
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
+  SchemaProto schema =
+      SchemaBuilder()
+          // Create a schema for the outer schema
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("A")
+                  // Reference schema B, so far so good
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("NestedDocument")
+                                   .SetDataTypeDocument(
+                                       "B", /*index_nested_properties=*/true)
+                                   .SetCardinality(CARDINALITY_OPTIONAL)))
+          // Create the inner schema
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("B")
+                  // Reference schema C, so far so good
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("NestedDocument")
+                                   .SetDataTypeDocument(
+                                       "C", /*index_nested_properties=*/true)
+                                   .SetCardinality(CARDINALITY_REPEATED)))
+          // Create the inner schema
+          .AddType(
+              SchemaTypeConfigBuilder()
+                  .SetType("C")
+                  // Reference schema C, so far so good
+                  .AddProperty(PropertyConfigBuilder()
+                                   .SetName("NestedDocument")
+                                   .SetDataTypeDocument(
+                                       "A", /*index_nested_properties=*/true)
+                                   .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+
+  // Three degrees of referencing: A -> B -> C -> A
+  EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, ChildMissingOptionalAndRepeatedPropertiesNotOk) {
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+  EXPECT_THAT(
+      SchemaUtil::Validate(schema, GetParam()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("Property text is not present in child type")));
+}
+
+TEST_P(SchemaUtilTest, ChildMissingRequiredPropertyNotOk) {
+  SchemaTypeConfigProto type_a =
+      SchemaTypeConfigBuilder()
+          .SetType("A")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetCardinality(CARDINALITY_REQUIRED)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .Build();
+  SchemaTypeConfigProto type_b =
+      SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+
+  SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+  EXPECT_THAT(
+      SchemaUtil::Validate(schema, GetParam()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("Property text is not present in child type")));
+}
+
+TEST_P(SchemaUtilTest, ChildCompatiblePropertyOk) {
+  SchemaTypeConfigProto message_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Message")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetCardinality(CARDINALITY_REPEATED)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("person")
+                           .SetCardinality(CARDINALITY_OPTIONAL)
+                           .SetDataTypeDocument(
+                               "Person", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto artist_message_type =
+      SchemaTypeConfigBuilder()
+          .SetType("ArtistMessage")
+          .AddParentType("Message")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  // OPTIONAL is compatible with REPEATED.
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              // An extra text is compatible.
+              PropertyConfigBuilder()
+                  .SetName("extraText")
+                  .SetCardinality(CARDINALITY_REPEATED)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              // An extra double is compatible
+              PropertyConfigBuilder()
+                  .SetName("extraDouble")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataType(TYPE_DOUBLE))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("person")
+                           // REQUIRED is compatible with OPTIONAL.
+                           .SetCardinality(CARDINALITY_REQUIRED)
+                           // Artist is compatible with Person.
+                           .SetDataTypeDocument(
+                               "Artist", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaTypeConfigProto person_type =
+      SchemaTypeConfigBuilder().SetType("Person").Build();
+  SchemaTypeConfigProto artist_type = SchemaTypeConfigBuilder()
+                                          .SetType("Artist")
+                                          .AddParentType("Person")
+                                          .Build();
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(message_type)
+                           .AddType(artist_message_type)
+                           .AddType(person_type)
+                           .AddType(artist_type)
+                           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(3));
+  EXPECT_THAT(d_map["Message"],
+              UnorderedElementsAre(Pair("ArtistMessage", IsEmpty())));
+  EXPECT_THAT(d_map["Person"],
+              UnorderedElementsAre(
+                  Pair("Message", UnorderedElementsAre(Pointee(EqualsProto(
+                                      message_type.properties(1))))),
+                  Pair("Artist", IsEmpty())));
+  EXPECT_THAT(d_map["Artist"],
+              UnorderedElementsAre(Pair(
+                  "ArtistMessage", UnorderedElementsAre(Pointee(EqualsProto(
+                                       artist_message_type.properties(3)))))));
+}
+
+TEST_P(SchemaUtilTest, ChildIncompatibleCardinalityPropertyNotOk) {
+  SchemaTypeConfigProto message_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Message")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetCardinality(CARDINALITY_REPEATED)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("person")
+                           .SetCardinality(CARDINALITY_OPTIONAL)
+                           .SetDataTypeDocument(
+                               "Person", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto artist_message_type =
+      SchemaTypeConfigBuilder()
+          .SetType("ArtistMessage")
+          .AddParentType("Message")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("extraText")
+                  .SetCardinality(CARDINALITY_REPEATED)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("person")
+                           // Overwrite OPTIONAL to REPEATED is not ok.
+                           .SetCardinality(CARDINALITY_REPEATED)
+                           .SetDataTypeDocument(
+                               "Artist", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaTypeConfigProto person_type =
+      SchemaTypeConfigBuilder().SetType("Person").Build();
+  SchemaTypeConfigProto artist_type = SchemaTypeConfigBuilder()
+                                          .SetType("Artist")
+                                          .AddParentType("Person")
+                                          .Build();
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(message_type)
+                           .AddType(artist_message_type)
+                           .AddType(person_type)
+                           .AddType(artist_type)
+                           .Build();
+  EXPECT_THAT(
+      SchemaUtil::Validate(schema, GetParam()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("Property person from child type ArtistMessage is not "
+                         "compatible to the parent type Message.")));
+}
+
+TEST_P(SchemaUtilTest, ChildIncompatibleDataTypePropertyNotOk) {
+  SchemaTypeConfigProto message_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Message")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetCardinality(CARDINALITY_REPEATED)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("person")
+                           .SetCardinality(CARDINALITY_OPTIONAL)
+                           .SetDataTypeDocument(
+                               "Person", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto artist_message_type =
+      SchemaTypeConfigBuilder()
+          .SetType("ArtistMessage")
+          .AddParentType("Message")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("text")
+                           .SetCardinality(CARDINALITY_OPTIONAL)
+                           // Double is not compatible to string.
+                           .SetDataType(TYPE_DOUBLE))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("extraText")
+                  .SetCardinality(CARDINALITY_REPEATED)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("person")
+                           .SetCardinality(CARDINALITY_REQUIRED)
+                           .SetDataTypeDocument(
+                               "Artist", /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaTypeConfigProto person_type =
+      SchemaTypeConfigBuilder().SetType("Person").Build();
+  SchemaTypeConfigProto artist_type = SchemaTypeConfigBuilder()
+                                          .SetType("Artist")
+                                          .AddParentType("Person")
+                                          .Build();
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(message_type)
+                           .AddType(artist_message_type)
+                           .AddType(person_type)
+                           .AddType(artist_type)
+                           .Build();
+  EXPECT_THAT(
+      SchemaUtil::Validate(schema, GetParam()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("Property text from child type ArtistMessage is not "
+                         "compatible to the parent type Message.")));
+}
+
+TEST_P(SchemaUtilTest, ChildIncompatibleDocumentTypePropertyNotOk) {
+  SchemaTypeConfigProto message_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Message")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetCardinality(CARDINALITY_REPEATED)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("person")
+                           .SetCardinality(CARDINALITY_OPTIONAL)
+                           .SetDataTypeDocument(
+                               "Person", /*index_nested_properties=*/true))
+          .Build();
+  SchemaTypeConfigProto artist_message_type =
+      SchemaTypeConfigBuilder()
+          .SetType("ArtistMessage")
+          .AddParentType("Message")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("text")
+                  .SetCardinality(CARDINALITY_OPTIONAL)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("extraText")
+                  .SetCardinality(CARDINALITY_REPEATED)
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("person")
+                  .SetCardinality(CARDINALITY_REQUIRED)
+                  // Artist is not a subtype of Person, thus incompatible
+                  .SetDataTypeDocument("Artist",
+                                       /*index_nested_properties=*/true))
+          .Build();
+
+  SchemaTypeConfigProto person_type =
+      SchemaTypeConfigBuilder().SetType("Person").Build();
+  // In this test, Artist is not a subtype of Person.
+  SchemaTypeConfigProto artist_type =
+      SchemaTypeConfigBuilder().SetType("Artist").Build();
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(message_type)
+                           .AddType(artist_message_type)
+                           .AddType(person_type)
+                           .AddType(artist_type)
+                           .Build();
+  EXPECT_THAT(
+      SchemaUtil::Validate(schema, GetParam()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("Property person from child type ArtistMessage is not "
+                         "compatible to the parent type Message.")));
+}
+
+TEST_P(SchemaUtilTest, ChildCompatibleMultipleParentPropertyOk) {
+  SchemaTypeConfigProto email_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Email")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("sender")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("recipient")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaTypeConfigProto message_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Message")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("content")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaTypeConfigProto email_message_type =
+      SchemaTypeConfigBuilder()
+          .SetType("EmailMessage")
+          .AddParentType("Email")
+          .AddParentType("Message")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("sender")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("recipient")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("content")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(email_type)
+                           .AddType(message_type)
+                           .AddType(email_message_type)
+                           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+                             SchemaUtil::Validate(schema, GetParam()));
+  EXPECT_THAT(d_map, SizeIs(2));
+  EXPECT_THAT(d_map["Email"],
+              UnorderedElementsAre(Pair("EmailMessage", IsEmpty())));
+  EXPECT_THAT(d_map["Message"],
+              UnorderedElementsAre(Pair("EmailMessage", IsEmpty())));
+}
+
+TEST_P(SchemaUtilTest, ChildIncompatibleMultipleParentPropertyNotOk) {
+  SchemaTypeConfigProto email_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Email")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("sender")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("recipient")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaTypeConfigProto message_type =
+      SchemaTypeConfigBuilder()
+          .SetType("Message")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("content")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+
+  // Missing the "sender" field from parent "Email", thus incompatible.
+  SchemaTypeConfigProto email_message_type1 =
+      SchemaTypeConfigBuilder()
+          .SetType("EmailMessage")
+          .AddParentType("Email")
+          .AddParentType("Message")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("recipient")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("content")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema1 = SchemaBuilder()
+                            .AddType(email_type)
+                            .AddType(message_type)
+                            .AddType(email_message_type1)
+                            .Build();
+  EXPECT_THAT(
+      SchemaUtil::Validate(schema1, GetParam()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr(
+                   "Property sender is not present in child type EmailMessage, "
+                   "but it is defined in the parent type Email.")));
+
+  // Missing the "content" field from parent "Message", thus incompatible.
+  SchemaTypeConfigProto email_message_type2 =
+      SchemaTypeConfigBuilder()
+          .SetType("EmailMessage")
+          .AddParentType("Email")
+          .AddParentType("Message")
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("sender")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("recipient")
+                  .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema2 = SchemaBuilder()
+                            .AddType(email_type)
+                            .AddType(message_type)
+                            .AddType(email_message_type2)
+                            .Build();
+  EXPECT_THAT(
+      SchemaUtil::Validate(schema2, GetParam()),
+      StatusIs(
+          libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+          HasSubstr(
+              "Property content is not present in child type EmailMessage, "
+              "but it is defined in the parent type Message.")));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    SchemaUtilTest, SchemaUtilTest,
+    testing::Values(/*allow_circular_schema_definitions=*/true, false));
+
+struct IsIndexedPropertyTestParam {
+  PropertyConfigProto property_config;
+  bool expected_result;
+
+  explicit IsIndexedPropertyTestParam(PropertyConfigProto property_config_in,
+                                      bool expected_result_in)
+      : property_config(std::move(property_config_in)),
+        expected_result(expected_result_in) {}
+};
+
+class SchemaUtilIsIndexedPropertyTest
+    : public ::testing::TestWithParam<IsIndexedPropertyTestParam> {};
+
+TEST_P(SchemaUtilIsIndexedPropertyTest, IsIndexedProperty) {
+  const IsIndexedPropertyTestParam& test_param = GetParam();
+  EXPECT_THAT(SchemaUtil::IsIndexedProperty(test_param.property_config),
+              Eq(test_param.expected_result));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    SchemaUtilIsIndexedPropertyTest, SchemaUtilIsIndexedPropertyTest,
+    testing::Values(
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_UNKNOWN,
+                                                          TOKENIZER_NONE)
+                                       .Build(),
+                                   false),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_UNKNOWN,
+                                                          TOKENIZER_PLAIN)
+                                       .Build(),
+                                   false),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_UNKNOWN,
+                                                          TOKENIZER_VERBATIM)
+                                       .Build(),
+                                   false),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_UNKNOWN,
+                                                          TOKENIZER_RFC822)
+                                       .Build(),
+                                   false),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_UNKNOWN,
+                                                          TOKENIZER_URL)
+                                       .Build(),
+                                   false),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_EXACT,
+                                                          TOKENIZER_NONE)
+                                       .Build(),
+                                   false),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_EXACT,
+                                                          TOKENIZER_PLAIN)
+                                       .Build(),
+                                   true),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_EXACT,
+                                                          TOKENIZER_VERBATIM)
+                                       .Build(),
+                                   true),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_EXACT,
+                                                          TOKENIZER_RFC822)
+                                       .Build(),
+                                   true),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_EXACT,
+                                                          TOKENIZER_URL)
+                                       .Build(),
+                                   true),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                          TOKENIZER_NONE)
+                                       .Build(),
+                                   false),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                          TOKENIZER_PLAIN)
+                                       .Build(),
+                                   true),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                          TOKENIZER_VERBATIM)
+                                       .Build(),
+                                   true),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                          TOKENIZER_RFC822)
+                                       .Build(),
+                                   true),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                          TOKENIZER_URL)
+                                       .Build(),
+                                   true),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+                                       .Build(),
+                                   false),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                       .Build(),
+                                   true),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataType(TYPE_DOUBLE)
+                                       .Build(),
+                                   false),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataType(TYPE_BOOLEAN)
+                                       .Build(),
+                                   false),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataType(TYPE_BYTES)
+                                       .Build(),
+                                   false),
+        IsIndexedPropertyTestParam(PropertyConfigBuilder()
+                                       .SetName("property")
+                                       .SetDataType(TYPE_DOCUMENT)
+                                       .Build(),
+                                   false)));
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/schema/section-manager-builder_test.cc b/icing/schema/section-manager-builder_test.cc
new file mode 100644
index 0000000..1d452d5
--- /dev/null
+++ b/icing/schema/section-manager-builder_test.cc
@@ -0,0 +1,341 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/section-manager.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Pointee;
+
+class SectionManagerBuilderTest : public ::testing::Test {
+ protected:
+  void SetUp() override { test_dir_ = GetTestTempDir() + "/icing"; }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  std::string test_dir_;
+};
+
+TEST_F(SectionManagerBuilderTest, Build) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put("typeOne", 0));
+  ICING_ASSERT_OK(schema_type_mapper->Put("typeTwo", 1));
+
+  PropertyConfigProto prop_foo =
+      PropertyConfigBuilder()
+          .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .Build();
+  PropertyConfigProto prop_bar =
+      PropertyConfigBuilder()
+          .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build();
+  PropertyConfigProto prop_baz =
+      PropertyConfigBuilder()
+          .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_OPTIONAL)
+          .Build();
+
+  SectionManager::Builder builder(*schema_type_mapper);
+  ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+      /*schema_type_id=*/0, prop_foo, /*property_path=*/"foo"));
+  ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+      /*schema_type_id=*/0, prop_bar, /*property_path=*/"bar"));
+  ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+      /*schema_type_id=*/1, prop_baz, /*property_path=*/"baz"));
+
+  std::unique_ptr<SectionManager> section_manager = std::move(builder).Build();
+  // Check "typeOne"
+  EXPECT_THAT(
+      section_manager->GetMetadataList("typeOne"),
+      IsOkAndHolds(Pointee(ElementsAre(
+          EqualsSectionMetadata(/*expected_id=*/0,
+                                /*expected_property_path=*/"foo", prop_foo),
+          EqualsSectionMetadata(/*expected_id=*/1,
+                                /*expected_property_path=*/"bar", prop_bar)))));
+  // Check "typeTwo"
+  EXPECT_THAT(section_manager->GetMetadataList("typeTwo"),
+              IsOkAndHolds(Pointee(ElementsAre(EqualsSectionMetadata(
+                  /*expected_id=*/0,
+                  /*expected_property_path=*/"baz", prop_baz)))));
+}
+
+TEST_F(SectionManagerBuilderTest, TooManyPropertiesShouldFail) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put("type", 0));
+
+  SectionManager::Builder builder(*schema_type_mapper);
+  // Add kTotalNumSections indexable properties
+  for (int i = 0; i < kTotalNumSections; i++) {
+    PropertyConfigProto property_config =
+        PropertyConfigBuilder()
+            .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+            .SetCardinality(CARDINALITY_REQUIRED)
+            .Build();
+    ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+        /*schema_type_id=*/0, property_config,
+        /*property_path=*/"property" + std::to_string(i)));
+  }
+
+  // Add another indexable property. This should fail.
+  PropertyConfigProto property_config =
+      PropertyConfigBuilder()
+          .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build();
+  EXPECT_THAT(builder.ProcessSchemaTypePropertyConfig(
+                  /*schema_type_id=*/0, property_config,
+                  /*property_path=*/"propertyExceed"),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
+                       HasSubstr("Too many properties")));
+}
+
+TEST_F(SectionManagerBuilderTest, InvalidSchemaTypeIdShouldFail) {
+  // Create a schema type mapper with invalid schema type id.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put("type", 0));
+
+  PropertyConfigProto property_config =
+      PropertyConfigBuilder()
+          .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build();
+
+  SectionManager::Builder builder(*schema_type_mapper);
+  EXPECT_THAT(
+      builder.ProcessSchemaTypePropertyConfig(
+          /*schema_type_id=*/-1, property_config, /*property_path=*/"property"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SectionManagerBuilderTest,
+       SchemaTypeIdInconsistentWithSchemaTypeMapperSizeShouldFail) {
+  // Create a schema type mapper with schema type id = 2, but size of mapper is
+  // 2.
+  // Since SectionManagerBuilder expects 2 schema type ids = [0, 1], building
+  // with schema type id = 2 should fail even though id = 2 is in schema type
+  // mapper.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put("typeOne", 0));
+  ICING_ASSERT_OK(schema_type_mapper->Put("typeTwo", 2));
+
+  PropertyConfigProto property_config =
+      PropertyConfigBuilder()
+          .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+          .SetCardinality(CARDINALITY_REQUIRED)
+          .Build();
+
+  SectionManager::Builder builder(*schema_type_mapper);
+  EXPECT_THAT(
+      builder.ProcessSchemaTypePropertyConfig(
+          /*schema_type_id=*/2, property_config, /*property_path=*/"property"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+class IndexableSectionManagerBuilderTest
+    : public SectionManagerBuilderTest,
+      public ::testing::WithParamInterface<PropertyConfigProto> {};
+
+TEST_P(IndexableSectionManagerBuilderTest, Build) {
+  static constexpr std::string_view kSchemaType = "type";
+  static constexpr std::string_view kPropertyPath = "foo.bar";
+  const PropertyConfigProto& property_config = GetParam();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put(kSchemaType, 0));
+
+  SectionManager::Builder builder(*schema_type_mapper);
+  ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+      /*schema_type_id=*/0, property_config, std::string(kPropertyPath)));
+
+  std::unique_ptr<SectionManager> section_manager = std::move(builder).Build();
+  EXPECT_THAT(section_manager->GetMetadataList(std::string(kSchemaType)),
+              IsOkAndHolds(Pointee(ElementsAre(EqualsSectionMetadata(
+                  /*expected_id=*/0, kPropertyPath, property_config)))));
+}
+
+// The following types are considered indexable:
+// - String with valid TermMatchType and TokenizerType
+// - Int64 with valid NumericMatchType
+INSTANTIATE_TEST_SUITE_P(
+    IndexableSectionManagerBuilderTest, IndexableSectionManagerBuilderTest,
+    testing::Values(PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_VERBATIM)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_RFC822)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                           TOKENIZER_VERBATIM)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build()));
+
+class NonIndexableSectionManagerBuilderTest
+    : public SectionManagerBuilderTest,
+      public ::testing::WithParamInterface<PropertyConfigProto> {};
+
+TEST_P(NonIndexableSectionManagerBuilderTest, Build) {
+  static constexpr std::string_view kSchemaType = "type";
+  static constexpr std::string_view kPropertyPath = "foo.bar";
+  const PropertyConfigProto& property_config = GetParam();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(
+          filesystem_, test_dir_ + "/schema_type_mapper",
+          /*maximum_size_bytes=*/3 * 128 * 1024));
+  ICING_ASSERT_OK(schema_type_mapper->Put(kSchemaType, 0));
+
+  SectionManager::Builder builder(*schema_type_mapper);
+  ICING_ASSERT_OK(builder.ProcessSchemaTypePropertyConfig(
+      /*schema_type_id=*/0, property_config, std::string(kPropertyPath)));
+
+  // NonIndexable sections will still consume a sectionId.
+  std::unique_ptr<SectionManager> section_manager = std::move(builder).Build();
+  EXPECT_THAT(section_manager->GetMetadataList(std::string(kSchemaType)),
+              IsOkAndHolds(Pointee(ElementsAre(EqualsSectionMetadata(
+                  /*expected_id=*/0, kPropertyPath, property_config)))));
+}
+
+// The following types are considered non-indexable:
+// - String with TERM_MATCH_UNKNOWN or TOKENIZER_NONE
+// - Int64 with NUMERIC_MATCH_UNKNOWN
+// - Double
+// - Boolean
+// - Bytes
+// - Document
+INSTANTIATE_TEST_SUITE_P(
+    NonIndexableSectionManagerBuilderTest,
+    NonIndexableSectionManagerBuilderTest,
+    testing::Values(PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_PLAIN)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_NONE)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataType(TYPE_DOUBLE)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataType(TYPE_BOOLEAN)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataType(TYPE_BYTES)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeDocument("anotherSchema",
+                                             /*index_nested_properties=*/false)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build(),
+                    PropertyConfigBuilder()
+                        .SetName("property")
+                        .SetDataTypeDocument("anotherSchema",
+                                             /*index_nested_properties=*/true)
+                        .SetCardinality(CARDINALITY_OPTIONAL)
+                        .Build()));
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/schema/section-manager.cc b/icing/schema/section-manager.cc
index 6a10c9a..3d540d6 100644
--- a/icing/schema/section-manager.cc
+++ b/icing/schema/section-manager.cc
@@ -15,27 +15,20 @@
 #include "icing/schema/section-manager.h"
 
 #include <algorithm>
-#include <cinttypes>
-#include <cstddef>
 #include <cstdint>
-#include <iterator>
-#include <memory>
 #include <string>
 #include <string_view>
-#include <unordered_map>
-#include <unordered_set>
 #include <utility>
 #include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
-#include "icing/absl_ports/str_cat.h"
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/term.pb.h"
-#include "icing/schema/schema-util.h"
+#include "icing/schema/property-util.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/key-mapper.h"
@@ -43,300 +36,85 @@
 
 namespace icing {
 namespace lib {
-namespace {
-
-using TypeSectionMap =
-    std::unordered_map<std::string, const std::vector<SectionMetadata>>;
-
-// This state helps detect infinite loops (e.g. two type configs referencing
-// each other) when assigning sections. The combination of 'number of section
-// assigned' and 'current schema name' represents a unique state in the
-// section-assign process. If the same state is seen the second time, that means
-// an infinite loop.
-struct SectionAssigningState {
-  size_t num_sections_assigned;
-  std::string current_schema_name;
-
-  SectionAssigningState(size_t num_sections_assigned_in,
-                        std::string&& current_schema_name_in)
-      : num_sections_assigned(num_sections_assigned_in),
-        current_schema_name(std::move(current_schema_name_in)) {}
-};
-
-// Provides a hash value of this struct so that it can be stored in a hash
-// set.
-struct SectionAssigningStateHasher {
-  size_t operator()(const SectionAssigningState& state) const {
-    size_t str_hash = std::hash<std::string>()(state.current_schema_name);
-    size_t int_hash = std::hash<size_t>()(state.num_sections_assigned);
-    // Combine the two hashes by taking the upper 16-bits of the string hash and
-    // the lower 16-bits of the int hash.
-    return (str_hash & 0xFFFF0000) | (int_hash & 0x0000FFFF);
-  }
-};
-
-bool operator==(const SectionAssigningState& lhs,
-                const SectionAssigningState& rhs) {
-  return lhs.num_sections_assigned == rhs.num_sections_assigned &&
-         lhs.current_schema_name == rhs.current_schema_name;
-}
 
-// Helper function to concatenate a path and a property name
-std::string ConcatenatePath(const std::string& path,
-                            const std::string& next_property_name) {
-  if (path.empty()) {
-    return next_property_name;
-  }
-  return absl_ports::StrCat(path, kPropertySeparator, next_property_name);
-}
+namespace {
 
-// Helper function to recursively identify sections from a type config and add
-// them to a section metadata list
-libtextclassifier3::Status AssignSections(
-    const SchemaTypeConfigProto& type_config,
-    const std::string& current_section_path,
-    const SchemaUtil::TypeConfigMap& type_config_map,
-    std::unordered_set<SectionAssigningState, SectionAssigningStateHasher>*
-        visited_states,
-    std::vector<SectionMetadata>* metadata_list) {
-  if (!visited_states
-           ->emplace(metadata_list->size(),
-                     std::string(type_config.schema_type()))
-           .second) {
-    // Failed to insert, the same state has been seen before, there's an
-    // infinite loop in type configs
-    return absl_ports::InvalidArgumentError(
-        "Infinite loop detected in type configs");
+// Helper function to append a new section metadata
+libtextclassifier3::Status AppendNewSectionMetadata(
+    std::vector<SectionMetadata>* metadata_list,
+    std::string&& concatenated_path,
+    const PropertyConfigProto& property_config) {
+  // Validates next section id, makes sure that section id is the same as the
+  // list index so that we could find any section metadata by id in O(1) later.
+  SectionId new_section_id = static_cast<SectionId>(metadata_list->size());
+  if (!IsSectionIdValid(new_section_id)) {
+    // Max number of sections reached
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Too many properties to be indexed, max number of properties "
+        "allowed: %d",
+        kMaxSectionId - kMinSectionId + 1));
   }
 
-  // Sorts properties by name's alphabetical order so that order doesn't affect
-  // section assigning.
-  auto sorted_properties = type_config.properties();
-  std::sort(sorted_properties.pointer_begin(), sorted_properties.pointer_end(),
-            [](const PropertyConfigProto* p1, const PropertyConfigProto* p2) {
-              return p1->property_name() < p2->property_name();
-            });
-  for (const auto& property_config : sorted_properties) {
-    if (property_config.indexing_config().term_match_type() ==
-        TermMatchType::UNKNOWN) {
-      // No need to create section for current property
-      continue;
-    }
-
-    // Creates section metadata according to data type
-    if (property_config.data_type() == PropertyConfigProto::DataType::STRING ||
-        property_config.data_type() == PropertyConfigProto::DataType::INT64 ||
-        property_config.data_type() == PropertyConfigProto::DataType::DOUBLE) {
-      // Validates next section id, makes sure that section id is the same as
-      // the list index so that we could find any section metadata by id in O(1)
-      // later.
-      auto new_section_id = static_cast<SectionId>(metadata_list->size());
-      if (!IsSectionIdValid(new_section_id)) {
-        // Max number of sections reached
-        return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
-            "Too many properties to be indexed, max number of properties "
-            "allowed: %d",
-            kMaxSectionId - kMinSectionId + 1));
-      }
-      // Creates section metadata from property config
-      metadata_list->emplace_back(
-          new_section_id, property_config.indexing_config().term_match_type(),
-          property_config.indexing_config().tokenizer_type(),
-          ConcatenatePath(current_section_path,
-                          property_config.property_name()));
-    } else if (property_config.data_type() ==
-               PropertyConfigProto::DataType::DOCUMENT) {
-      // Tries to find sections recursively
-      auto nested_type_config_iter =
-          type_config_map.find(property_config.schema_type());
-      if (nested_type_config_iter == type_config_map.end()) {
-        return absl_ports::NotFoundError(absl_ports::StrCat(
-            "type config not found: ", property_config.schema_type()));
-      }
-      const SchemaTypeConfigProto& nested_type_config =
-          nested_type_config_iter->second;
-      ICING_RETURN_IF_ERROR(
-          AssignSections(nested_type_config,
-                         ConcatenatePath(current_section_path,
-                                         property_config.property_name()),
-                         type_config_map, visited_states, metadata_list));
-    }
-    // NOTE: we don't create sections for BOOLEAN and BYTES data types.
-  }
+  // Creates section metadata
+  metadata_list->push_back(SectionMetadata(
+      new_section_id, property_config.data_type(),
+      property_config.string_indexing_config().tokenizer_type(),
+      property_config.string_indexing_config().term_match_type(),
+      property_config.integer_indexing_config().numeric_match_type(),
+      std::move(concatenated_path)));
   return libtextclassifier3::Status::OK;
 }
 
-// Builds a vector of vectors that holds SectionMetadatas for all the schema
-// types. The outer vector's index corresponds with a type's SchemaTypeId. The
-// inner vector's index corresponds to the section's SectionId.
-libtextclassifier3::StatusOr<std::vector<std::vector<SectionMetadata>>>
-BuildSectionMetadataCache(const SchemaUtil::TypeConfigMap& type_config_map,
-                          const KeyMapper<SchemaTypeId>& schema_type_mapper) {
-  // Create our vector and reserve the number of schema types we have
-  std::vector<std::vector<SectionMetadata>> section_metadata_cache(
-      schema_type_mapper.num_keys());
-
-  std::unordered_set<SectionAssigningState, SectionAssigningStateHasher>
-      visited_states;
-  for (const auto& name_and_type : type_config_map) {
-    // Assigns sections for each type config
-    visited_states.clear();
-    const std::string& type_config_name = name_and_type.first;
-    const SchemaTypeConfigProto& type_config = name_and_type.second;
-    std::vector<SectionMetadata> metadata_list;
-    ICING_RETURN_IF_ERROR(
-        AssignSections(type_config, /*current_section_path*/ "",
-                       type_config_map, &visited_states, &metadata_list));
-
-    // Insert the section metadata list at the index of the type's SchemaTypeId
-    ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
-                           schema_type_mapper.Get(type_config_name));
-    section_metadata_cache[schema_type_id] = std::move(metadata_list);
+template <typename T>
+void AppendSection(
+    SectionMetadata section_metadata,
+    libtextclassifier3::StatusOr<std::vector<T>>&& section_content_or,
+    std::vector<Section<T>>& sections_out) {
+  if (!section_content_or.ok()) {
+    return;
   }
-  return section_metadata_cache;
-}
 
-// Helper function to get string content from a property. Repeated values are
-// joined into one string. We only care about STRING, INT64, and DOUBLE data
-// types.
-std::vector<std::string> GetPropertyContent(const PropertyProto& property) {
-  std::vector<std::string> values;
-  if (!property.string_values().empty()) {
-    std::copy(property.string_values().begin(), property.string_values().end(),
-              std::back_inserter(values));
-  } else if (!property.int64_values().empty()) {
-    std::transform(
-        property.int64_values().begin(), property.int64_values().end(),
-        std::back_inserter(values),
-        [](int64_t i) { return IcingStringUtil::StringPrintf("%" PRId64, i); });
-  } else {
-    std::transform(
-        property.double_values().begin(), property.double_values().end(),
-        std::back_inserter(values),
-        [](double d) { return IcingStringUtil::StringPrintf("%f", d); });
+  std::vector<T> section_content = std::move(section_content_or).ValueOrDie();
+  if (!section_content.empty()) {
+    // Adds to result vector if section is found in document
+    sections_out.emplace_back(std::move(section_metadata),
+                              std::move(section_content));
   }
-  return values;
-}
-
-// Helper function to get metadata list of a type config
-libtextclassifier3::StatusOr<std::vector<SectionMetadata>> GetMetadataList(
-    const KeyMapper<SchemaTypeId>& schema_type_mapper,
-    const std::vector<std::vector<SectionMetadata>>& section_metadata_cache,
-    const std::string& type_config_name) {
-  ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
-                         schema_type_mapper.Get(type_config_name));
-  return section_metadata_cache.at(schema_type_id);
 }
 
 }  // namespace
 
-SectionManager::SectionManager(
-    const KeyMapper<SchemaTypeId>* schema_type_mapper,
-    std::vector<std::vector<SectionMetadata>>&& section_metadata_cache)
-    : schema_type_mapper_(*schema_type_mapper),
-      section_metadata_cache_(std::move(section_metadata_cache)) {}
-
-libtextclassifier3::StatusOr<std::unique_ptr<SectionManager>>
-SectionManager::Create(const SchemaUtil::TypeConfigMap& type_config_map,
-                       const KeyMapper<SchemaTypeId>* schema_type_mapper) {
-  ICING_RETURN_ERROR_IF_NULL(schema_type_mapper);
-
-  ICING_ASSIGN_OR_RETURN(
-      std::vector<std::vector<SectionMetadata>> section_metadata_cache,
-      BuildSectionMetadataCache(type_config_map, *schema_type_mapper));
-  return std::unique_ptr<SectionManager>(new SectionManager(
-      schema_type_mapper, std::move(section_metadata_cache)));
-}
-
-libtextclassifier3::StatusOr<std::vector<std::string>>
-SectionManager::GetSectionContent(const DocumentProto& document,
-                                  std::string_view section_path) const {
-  // Finds the first property name in section_path
-  size_t separator_position = section_path.find(kPropertySeparator);
-  std::string_view current_property_name =
-      (separator_position == std::string::npos)
-          ? section_path
-          : section_path.substr(0, separator_position);
-
-  // Tries to match the property name with the ones in document
-  auto property_iterator =
-      std::find_if(document.properties().begin(), document.properties().end(),
-                   [current_property_name](const PropertyProto& property) {
-                     return property.name() == current_property_name;
-                   });
-
-  if (property_iterator == document.properties().end()) {
-    // Property name not found, it could be one of the following 2 cases:
-    // 1. The property is optional and it's not in the document
-    // 2. The property name is invalid
-    return absl_ports::NotFoundError(
-        absl_ports::StrCat("Section path ", section_path,
-                           " not found in type config ", document.schema()));
-  }
-
-  if (separator_position == std::string::npos) {
-    // Current property name is the last one in section path
-    std::vector<std::string> content = GetPropertyContent(*property_iterator);
-    if (content.empty()) {
-      // The content of property is explicitly set to empty, we'll treat it as
-      // NOT_FOUND because the index doesn't care about empty strings.
-      return absl_ports::NotFoundError(
-          absl_ports::StrCat("Section path ", section_path,
-                             " not found in type config ", document.schema()));
-    }
-    return content;
-  }
-
-  // Gets section content recursively
-  std::string_view sub_section_path =
-      section_path.substr(separator_position + 1);
-  std::vector<std::string> nested_document_content;
-  for (const auto& nested_document : property_iterator->document_values()) {
-    auto content_or = GetSectionContent(nested_document, sub_section_path);
-    if (content_or.ok()) {
-      std::vector<std::string> content = std::move(content_or).ValueOrDie();
-      std::move(content.begin(), content.end(),
-                std::back_inserter(nested_document_content));
-    }
+libtextclassifier3::Status
+SectionManager::Builder::ProcessSchemaTypePropertyConfig(
+    SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
+    std::string&& property_path) {
+  if (schema_type_id < 0 || schema_type_id >= section_metadata_cache_.size()) {
+    return absl_ports::InvalidArgumentError("Invalid schema type id");
   }
-  if (nested_document_content.empty()) {
-    return absl_ports::NotFoundError(
-        absl_ports::StrCat("Section path ", section_path,
-                           " not found in type config ", document.schema()));
-  }
-  return nested_document_content;
-}
 
-libtextclassifier3::StatusOr<std::vector<std::string>>
-SectionManager::GetSectionContent(const DocumentProto& document,
-                                  SectionId section_id) const {
-  if (!IsSectionIdValid(section_id)) {
-    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-        "Section id %d is greater than the max value %d", section_id,
-        kMaxSectionId));
-  }
-  ICING_ASSIGN_OR_RETURN(
-      const std::vector<SectionMetadata>& metadata_list,
-      GetMetadataList(schema_type_mapper_, section_metadata_cache_,
-                      document.schema()));
-  if (section_id >= metadata_list.size()) {
-    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-        "Section with id %d doesn't exist in type config %s", section_id,
-        document.schema().c_str()));
-  }
-  // The index of metadata list is the same as the section id, so we can use
-  // section id as the index.
-  return GetSectionContent(document, metadata_list[section_id].path);
+  // We don't need to check if the property is indexable. This method will
+  // only be called properties that should consume sectionIds, even if the
+  // property's indexing configuration itself is not indexable.
+  // This would be the case for unknown and non-indexable property paths that
+  // are defined in the indexable_nested_properties_list.
+  ICING_RETURN_IF_ERROR(
+      AppendNewSectionMetadata(&section_metadata_cache_[schema_type_id],
+                               std::move(property_path), property_config));
+  return libtextclassifier3::Status::OK;
 }
 
 libtextclassifier3::StatusOr<const SectionMetadata*>
 SectionManager::GetSectionMetadata(SchemaTypeId schema_type_id,
                                    SectionId section_id) const {
+  if (schema_type_id < 0 || schema_type_id >= section_metadata_cache_.size()) {
+    return absl_ports::InvalidArgumentError("Invalid schema type id");
+  }
   if (!IsSectionIdValid(section_id)) {
     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
         "Section id %d is greater than the max value %d", section_id,
         kMaxSectionId));
   }
+
   const std::vector<SectionMetadata>& section_metadatas =
       section_metadata_cache_[schema_type_id];
   if (section_id >= section_metadatas.size()) {
@@ -350,23 +128,54 @@ SectionManager::GetSectionMetadata(SchemaTypeId schema_type_id,
   return &section_metadatas[section_id];
 }
 
-libtextclassifier3::StatusOr<std::vector<Section>>
-SectionManager::ExtractSections(const DocumentProto& document) const {
-  ICING_ASSIGN_OR_RETURN(
-      const std::vector<SectionMetadata>& metadata_list,
-      GetMetadataList(schema_type_mapper_, section_metadata_cache_,
-                      document.schema()));
-  std::vector<Section> sections;
-  for (const auto& section_metadata : metadata_list) {
-    auto section_content_or =
-        GetSectionContent(document, section_metadata.path);
-    // Adds to result vector if section is found in document
-    if (section_content_or.ok()) {
-      sections.emplace_back(SectionMetadata(section_metadata),
-                            std::move(section_content_or).ValueOrDie());
+libtextclassifier3::StatusOr<SectionGroup> SectionManager::ExtractSections(
+    const DocumentProto& document) const {
+  ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
+                         GetMetadataList(document.schema()));
+  SectionGroup section_group;
+  for (const SectionMetadata& section_metadata : *metadata_list) {
+    switch (section_metadata.data_type) {
+      case PropertyConfigProto::DataType::STRING: {
+        if (section_metadata.term_match_type == TermMatchType::UNKNOWN ||
+            section_metadata.tokenizer ==
+                StringIndexingConfig::TokenizerType::NONE) {
+          // Skip if term-match type is UNKNOWN, or if the tokenizer-type is
+          // NONE.
+          break;
+        }
+        AppendSection(
+            section_metadata,
+            property_util::ExtractPropertyValuesFromDocument<std::string_view>(
+                document, section_metadata.path),
+            section_group.string_sections);
+        break;
+      }
+      case PropertyConfigProto::DataType::INT64: {
+        if (section_metadata.numeric_match_type ==
+            IntegerIndexingConfig::NumericMatchType::UNKNOWN) {
+          // Skip if numeric-match type is UNKNOWN.
+          break;
+        }
+        AppendSection(section_metadata,
+                      property_util::ExtractPropertyValuesFromDocument<int64_t>(
+                          document, section_metadata.path),
+                      section_group.integer_sections);
+        break;
+      }
+      default: {
+        // Skip other data types.
+        break;
+      }
     }
   }
-  return sections;
+  return section_group;
+}
+
+libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
+SectionManager::GetMetadataList(const std::string& type_config_name) const {
+  ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+                         schema_type_mapper_.Get(type_config_name));
+  return &section_metadata_cache_.at(schema_type_id);
 }
 
 }  // namespace lib
diff --git a/icing/schema/section-manager.h b/icing/schema/section-manager.h
index 475fa6a..6241dc0 100644
--- a/icing/schema/section-manager.h
+++ b/icing/schema/section-manager.h
@@ -22,7 +22,6 @@
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/proto/document.pb.h"
-#include "icing/schema/schema-util.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/key-mapper.h"
@@ -30,49 +29,55 @@
 namespace icing {
 namespace lib {
 
-inline constexpr char kPropertySeparator[] = ".";
-
 // This class provides section-related operations. It assigns sections according
 // to type configs and extracts section / sections from documents.
+// The actual instance is created together with JoinablePropertyManager and both
+// of them are wrapped into SchemaTypeManager.
+//
+// Note: SectionManager assumes schema type ids are consecutive integers
+// starting from 0, so it maintains a vector with size
+// schema_type_mapper_->num_keys() that maps schema type id to a list (2nd level
+// vector) of SectionMetadatas. Therefore, all schema type ids stored in
+// schema_type_mapper_ must be in range [0, schema_type_mapper_->num_keys() - 1]
+// and unique.
 class SectionManager {
  public:
+  // Builder class to create a SectionManager which does not take ownership of
+  // any input components, and all pointers must refer to valid objects that
+  // outlive the created SectionManager instance.
+  class Builder {
+   public:
+    explicit Builder(const KeyMapper<SchemaTypeId>& schema_type_mapper)
+        : schema_type_mapper_(schema_type_mapper),
+          section_metadata_cache_(schema_type_mapper.num_keys()) {}
+
+    // Checks and appends a new SectionMetadata for the schema type id if the
+    // given property config is indexable.
+    //
+    // Returns:
+    //   - OK on success
+    //   - INVALID_ARGUMENT_ERROR if schema type id is invalid (not in range [0,
+    //     schema_type_mapper_.num_keys() - 1])
+    //   - OUT_OF_RANGE_ERROR if # of indexable properties in a single Schema
+    //     exceeds the threshold (kTotalNumSections)
+    libtextclassifier3::Status ProcessSchemaTypePropertyConfig(
+        SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
+        std::string&& property_path);
+
+    // Builds and returns a SectionManager instance.
+    std::unique_ptr<SectionManager> Build() && {
+      return std::unique_ptr<SectionManager>(new SectionManager(
+          schema_type_mapper_, std::move(section_metadata_cache_)));
+    }
+
+   private:
+    const KeyMapper<SchemaTypeId>& schema_type_mapper_;  // Does not own.
+    std::vector<std::vector<SectionMetadata>> section_metadata_cache_;
+  };
+
   SectionManager(const SectionManager&) = delete;
   SectionManager& operator=(const SectionManager&) = delete;
 
-  // Factory function to create a SectionManager which does not take ownership
-  // of any input components, and all pointers must refer to valid objects that
-  // outlive the created SectionManager instance.
-  //
-  // Returns:
-  //   A SectionManager on success
-  //   FAILED_PRECONDITION on any null pointer input
-  //   INVALID_ARGUMENT if infinite loop detected in the type configs
-  //   OUT_OF_RANGE if number of properties need indexing exceeds the max number
-  //   NOT_FOUND if any type config name not found in the map
-  static libtextclassifier3::StatusOr<std::unique_ptr<SectionManager>> Create(
-      const SchemaUtil::TypeConfigMap& type_config_map,
-      const KeyMapper<SchemaTypeId>* schema_type_mapper);
-
-  // Finds content of a section by section path (e.g. property1.property2)
-  //
-  // Returns:
-  //   A string of content on success
-  //   NOT_FOUND if:
-  //     1. Property is optional and not found in the document
-  //     2. section_path is invalid
-  //     3. Content is empty
-  libtextclassifier3::StatusOr<std::vector<std::string>> GetSectionContent(
-      const DocumentProto& document, std::string_view section_path) const;
-
-  // Finds content of a section by id
-  //
-  // Returns:
-  //   A string of content on success
-  //   INVALID_ARGUMENT if section id is invalid
-  //   NOT_FOUND if type config name of document not found
-  libtextclassifier3::StatusOr<std::vector<std::string>> GetSectionContent(
-      const DocumentProto& document, SectionId section_id) const;
-
   // Returns the SectionMetadata associated with the SectionId that's in the
   // SchemaTypeId.
   //
@@ -82,24 +87,34 @@ class SectionManager {
   libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata(
       SchemaTypeId schema_type_id, SectionId section_id) const;
 
-  // Extracts all sections from the given document, sections are sorted by
-  // section id in increasing order. Section ids start from 0. Sections with
-  // empty content won't be returned.
+  // Extracts all sections of different types from the given document and group
+  // them by type.
+  // - Sections are sorted by section id in ascending order.
+  // - Section ids start from 0.
+  // - Sections with empty content won't be returned.
   //
   // Returns:
-  //   A list of sections on success
-  //   NOT_FOUND if type config name of document not found
-  libtextclassifier3::StatusOr<std::vector<Section>> ExtractSections(
+  //   A SectionGroup instance on success
+  //   NOT_FOUND if the type config name of document is not present in
+  //     schema_type_mapper_
+  libtextclassifier3::StatusOr<SectionGroup> ExtractSections(
       const DocumentProto& document) const;
 
+  // Returns:
+  //   - On success, the section metadatas for the specified type
+  //   - NOT_FOUND if the type config name is not present in schema_type_mapper_
+  libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
+  GetMetadataList(const std::string& type_config_name) const;
+
  private:
-  // Use SectionManager::Create() to instantiate
   explicit SectionManager(
-      const KeyMapper<SchemaTypeId>* schema_type_mapper,
-      std::vector<std::vector<SectionMetadata>>&& section_metadata_cache);
+      const KeyMapper<SchemaTypeId>& schema_type_mapper,
+      std::vector<std::vector<SectionMetadata>>&& section_metadata_cache)
+      : schema_type_mapper_(schema_type_mapper),
+        section_metadata_cache_(std::move(section_metadata_cache)) {}
 
   // Maps schema types to a densely-assigned unique id.
-  const KeyMapper<SchemaTypeId>& schema_type_mapper_;
+  const KeyMapper<SchemaTypeId>& schema_type_mapper_;  // Does not own
 
   // The index of section_metadata_cache_ corresponds to a schema type's
   // SchemaTypeId. At that SchemaTypeId index, we store an inner vector. The
diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc
index 9e73465..eee78e9 100644
--- a/icing/schema/section-manager_test.cc
+++ b/icing/schema/section-manager_test.cc
@@ -14,432 +14,1012 @@
 
 #include "icing/schema/section-manager.h"
 
-#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
+#include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
-#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-type-manager.h"
 #include "icing/schema/schema-util.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
 #include "icing/store/key-mapper.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/tmp-directory.h"
 
 namespace icing {
 namespace lib {
+
+namespace {
+
 using ::testing::ElementsAre;
-using ::testing::Eq;
-using ::testing::HasSubstr;
-
-// type and property names of EmailMessage
-constexpr char kTypeEmail[] = "EmailMessage";
-constexpr char kPropertySubject[] = "subject";
-constexpr char kPropertyText[] = "text";
-constexpr char kPropertyTimestamp[] = "timestamp";
-constexpr char kPropertyAttachment[] = "attachment";
-constexpr char kPropertyRecipients[] = "recipients";
+using ::testing::IsEmpty;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+
+// type and property names of Email
+static constexpr std::string_view kTypeEmail = "Email";
+// indexable
+static constexpr std::string_view kPropertyRecipientIds = "recipientIds";
+static constexpr std::string_view kPropertyRecipients = "recipients";
+static constexpr std::string_view kPropertySubject = "subject";
+static constexpr std::string_view kPropertyTimestamp = "timestamp";
+// non-indexable
+static constexpr std::string_view kPropertyAttachment = "attachment";
+static constexpr std::string_view kPropertyNonIndexableInteger =
+    "nonIndexableInteger";
+static constexpr std::string_view kPropertyText = "text";
+
 // type and property names of Conversation
-constexpr char kTypeConversation[] = "Conversation";
-constexpr char kPropertyName[] = "name";
-constexpr char kPropertyEmails[] = "emails";
+static constexpr std::string_view kTypeConversation = "Conversation";
+// indexable
+static constexpr std::string_view kPropertyEmails = "emails";
+static constexpr std::string_view kPropertyName = "name";
+
+// type and property names of Group
+static constexpr std::string_view kTypeGroup = "Group";
+// indexable
+static constexpr std::string_view kPropertyConversation = "conversation";
+static constexpr std::string_view kPropertyGroupName = "groupName";
+// nested indexable
+static constexpr std::string_view kPropertyNestedConversationName = "name";
+static constexpr std::string_view kPropertyNestedConversationEmailRecipientIds =
+    "emails.recipientIds";
+static constexpr std::string_view kPropertyNestedConversationEmailRecipient =
+    "emails.recipients";
+static constexpr std::string_view kPropertyNestedConversationEmailSubject =
+    "emails.subject";
+// nested non-indexable
+static constexpr std::string_view kPropertyNestedConversationEmailAttachment =
+    "emails.attachment";
+// non-existent property path
+static constexpr std::string_view kPropertyNestedNonExistent =
+    "emails.nonExistentNestedProperty";
+static constexpr std::string_view kPropertyNestedNonExistent2 =
+    "emails.nonExistentNestedProperty2";
+
+constexpr int64_t kDefaultTimestamp = 1663274901;
+
+PropertyConfigProto CreateRecipientIdsPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyRecipientIds)
+      .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+      .SetCardinality(CARDINALITY_REPEATED)
+      .Build();
+}
+
+PropertyConfigProto CreateRecipientsPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyRecipients)
+      .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+      .SetCardinality(CARDINALITY_REPEATED)
+      .Build();
+}
+
+PropertyConfigProto CreateSubjectPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertySubject)
+      .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+      .SetCardinality(CARDINALITY_REQUIRED)
+      .Build();
+}
+
+PropertyConfigProto CreateTimestampPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyTimestamp)
+      .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+      .SetCardinality(CARDINALITY_REQUIRED)
+      .Build();
+}
+
+PropertyConfigProto CreateNamePropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyName)
+      .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+      .SetCardinality(CARDINALITY_OPTIONAL)
+      .Build();
+}
+
+PropertyConfigProto CreateAttachmentPropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyAttachment)
+      .SetDataType(TYPE_BYTES)
+      .SetCardinality(CARDINALITY_OPTIONAL)
+      .Build();
+}
+
+PropertyConfigProto CreateGroupNamePropertyConfig() {
+  return PropertyConfigBuilder()
+      .SetName(kPropertyGroupName)
+      .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+      .SetCardinality(CARDINALITY_OPTIONAL)
+      .Build();
+}
+
+SchemaTypeConfigProto CreateEmailTypeConfig() {
+  return SchemaTypeConfigBuilder()
+      .SetType(kTypeEmail)
+      .AddProperty(CreateSubjectPropertyConfig())
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertyText)
+                       .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+                       .SetCardinality(CARDINALITY_OPTIONAL))
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertyAttachment)
+                       .SetDataType(TYPE_BYTES)
+                       .SetCardinality(CARDINALITY_REQUIRED))
+      .AddProperty(CreateRecipientsPropertyConfig())
+      .AddProperty(CreateRecipientIdsPropertyConfig())
+      .AddProperty(CreateTimestampPropertyConfig())
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertyNonIndexableInteger)
+                       .SetDataType(TYPE_INT64)
+                       .SetCardinality(CARDINALITY_REQUIRED))
+      .Build();
+}
+
+SchemaTypeConfigProto CreateConversationTypeConfig() {
+  return SchemaTypeConfigBuilder()
+      .SetType(kTypeConversation)
+      .AddProperty(CreateNamePropertyConfig())
+      .AddProperty(PropertyConfigBuilder()
+                       .SetName(kPropertyEmails)
+                       .SetDataTypeDocument(kTypeEmail,
+                                            /*index_nested_properties=*/true)
+                       .SetCardinality(CARDINALITY_REPEATED))
+      .Build();
+}
+
+SchemaTypeConfigProto CreateGroupTypeConfig() {
+  return SchemaTypeConfigBuilder()
+      .SetType(kTypeGroup)
+      .AddProperty(CreateGroupNamePropertyConfig())
+      .AddProperty(
+          PropertyConfigBuilder()
+              .SetName(kPropertyConversation)
+              .SetDataTypeDocument(
+                  kTypeConversation,
+                  /*indexable_nested_properties_list=*/
+                  {std::string(kPropertyNestedConversationName),
+                   std::string(kPropertyNestedConversationEmailRecipientIds),
+                   std::string(kPropertyNestedConversationEmailSubject),
+                   std::string(kPropertyNestedConversationEmailRecipient),
+                   std::string(kPropertyNestedConversationEmailAttachment),
+                   std::string(kPropertyNestedNonExistent2),
+                   std::string(kPropertyNestedNonExistent),
+                   std::string(kPropertyNestedNonExistent)})
+              .SetCardinality(CARDINALITY_REPEATED))
+      .Build();
+}
 
 class SectionManagerTest : public ::testing::Test {
  protected:
-  SectionManagerTest() : test_dir_(GetTestTempDir() + "/icing") {
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/icing";
+
     auto email_type = CreateEmailTypeConfig();
     auto conversation_type = CreateConversationTypeConfig();
+    auto group_type = CreateGroupTypeConfig();
     type_config_map_.emplace(email_type.schema_type(), email_type);
     type_config_map_.emplace(conversation_type.schema_type(),
                              conversation_type);
+    type_config_map_.emplace(group_type.schema_type(), group_type);
+
+    // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each
+    // one 128KiB so the total DynamicTrieKeyMapper should get 384KiB
+    int key_mapper_size = 3 * 128 * 1024;
+    ICING_ASSERT_OK_AND_ASSIGN(schema_type_mapper_,
+                               DynamicTrieKeyMapper<SchemaTypeId>::Create(
+                                   filesystem_, test_dir_, key_mapper_size));
+    ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeEmail, 0));
+    ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeConversation, 1));
+    ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeGroup, 2));
 
     email_document_ =
         DocumentBuilder()
             .SetKey("icing", "email/1")
-            .SetSchema(kTypeEmail)
-            .AddStringProperty(kPropertySubject, "the subject")
-            .AddStringProperty(kPropertyText, "the text")
-            .AddInt64Property(kPropertyTimestamp, 1234567890)
-            .AddBytesProperty(kPropertyAttachment, "attachment bytes")
-            .AddStringProperty(kPropertyRecipients, "recipient1", "recipient2",
-                               "recipient3")
+            .SetSchema(std::string(kTypeEmail))
+            .AddStringProperty(std::string(kPropertySubject), "the subject")
+            .AddStringProperty(std::string(kPropertyText), "the text")
+            .AddBytesProperty(std::string(kPropertyAttachment),
+                              "attachment bytes")
+            .AddStringProperty(std::string(kPropertyRecipients), "recipient1",
+                               "recipient2", "recipient3")
+            .AddInt64Property(std::string(kPropertyRecipientIds), 1, 2, 3)
+            .AddInt64Property(std::string(kPropertyTimestamp),
+                              kDefaultTimestamp)
+            .AddInt64Property(std::string(kPropertyNonIndexableInteger), 100)
             .Build();
 
     conversation_document_ =
         DocumentBuilder()
             .SetKey("icing", "conversation/1")
-            .SetSchema(kTypeConversation)
-            .AddDocumentProperty(kPropertyEmails,
+            .SetSchema(std::string(kTypeConversation))
+            .AddDocumentProperty(std::string(kPropertyEmails),
                                  DocumentProto(email_document_),
                                  DocumentProto(email_document_))
             .Build();
-  }
 
-  void SetUp() override {
-    // KeyMapper uses 3 internal arrays for bookkeeping. Give each one 128KiB so
-    // the total KeyMapper should get 384KiB
-    int key_mapper_size = 3 * 128 * 1024;
-    ICING_ASSERT_OK_AND_ASSIGN(schema_type_mapper_,
-                               KeyMapper<SchemaTypeId>::Create(
-                                   filesystem_, test_dir_, key_mapper_size));
-    ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeEmail, 0));
-    ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeConversation, 1));
-  }
-
-  static SchemaTypeConfigProto CreateEmailTypeConfig() {
-    SchemaTypeConfigProto type;
-    type.set_schema_type(kTypeEmail);
-
-    auto subject = type.add_properties();
-    subject->set_property_name(kPropertySubject);
-    subject->set_data_type(PropertyConfigProto::DataType::STRING);
-    subject->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-    subject->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    subject->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
-
-    auto text = type.add_properties();
-    text->set_property_name(kPropertyText);
-    text->set_data_type(PropertyConfigProto::DataType::STRING);
-    text->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    text->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::UNKNOWN);
-
-    auto timestamp = type.add_properties();
-    timestamp->set_property_name(kPropertyTimestamp);
-    timestamp->set_data_type(PropertyConfigProto::DataType::INT64);
-    timestamp->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-    timestamp->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    timestamp->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
-
-    auto attachment = type.add_properties();
-    attachment->set_property_name(kPropertyAttachment);
-    attachment->set_data_type(PropertyConfigProto::DataType::BYTES);
-    attachment->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-    attachment->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    attachment->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
-
-    auto recipients = type.add_properties();
-    recipients->set_property_name(kPropertyRecipients);
-    recipients->set_data_type(PropertyConfigProto::DataType::STRING);
-    recipients->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-    recipients->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    recipients->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
-
-    return type;
+    group_document_ =
+        DocumentBuilder()
+            .SetKey("icing", "group/1")
+            .SetSchema(std::string(kTypeGroup))
+            .AddDocumentProperty(std::string(kPropertyConversation),
+                                 DocumentProto(conversation_document_))
+            .AddStringProperty(std::string(kPropertyGroupName), "group_name_1")
+            .Build();
   }
 
-  static SchemaTypeConfigProto CreateConversationTypeConfig() {
-    SchemaTypeConfigProto type;
-    type.set_schema_type(kTypeConversation);
-
-    auto name = type.add_properties();
-    name->set_property_name(kPropertyName);
-    name->set_data_type(PropertyConfigProto::DataType::STRING);
-    name->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    name->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-
-    auto emails = type.add_properties();
-    emails->set_property_name(kPropertyEmails);
-    emails->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-    emails->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-    emails->set_schema_type(kTypeEmail);
-    emails->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-
-    return type;
+  void TearDown() override {
+    schema_type_mapper_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
   }
 
   Filesystem filesystem_;
-  const std::string test_dir_;
+  std::string test_dir_;
   SchemaUtil::TypeConfigMap type_config_map_;
   std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
 
   DocumentProto email_document_;
   DocumentProto conversation_document_;
+  DocumentProto group_document_;
 };
 
-TEST_F(SectionManagerTest, CreationWithNullPointerShouldFail) {
-  EXPECT_THAT(
-      SectionManager::Create(type_config_map_, /*schema_type_mapper=*/nullptr),
-      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-}
+TEST_F(SectionManagerTest, ExtractSections) {
+  // Use SchemaTypeManager factory method to instantiate SectionManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  // Extracts all sections from 'Email' document
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SectionGroup section_group,
+      schema_type_manager->section_manager().ExtractSections(email_document_));
 
-TEST_F(SectionManagerTest, CreationWithSchemaInfiniteLoopShouldFail) {
-  // Creates 2 type configs that reference each other
-  SchemaTypeConfigProto type_config1;
-  type_config1.set_schema_type("type1");
-  auto property1 = type_config1.add_properties();
-  property1->set_property_name("property1");
-  property1->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property1->set_schema_type("type2");  // Here we reference type2
-  property1->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  property1->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-
-  SchemaTypeConfigProto type_config2;
-  type_config2.set_schema_type("type2");
-  auto property2 = type_config2.add_properties();
-  property2->set_property_name("property2");
-  property2->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  // Here we reference type1, which references type2 causing the infinite loop
-  property2->set_schema_type("type1");
-  property2->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  property2->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
+  // String sections
+  EXPECT_THAT(section_group.string_sections, SizeIs(2));
 
-  SchemaUtil::TypeConfigMap type_config_map;
-  type_config_map.emplace("type1", type_config1);
-  type_config_map.emplace("type2", type_config2);
+  EXPECT_THAT(section_group.string_sections[0].metadata,
+              EqualsSectionMetadata(/*expected_id=*/1,
+                                    /*expected_property_path=*/"recipients",
+                                    CreateRecipientsPropertyConfig()));
+  EXPECT_THAT(section_group.string_sections[0].content,
+              ElementsAre("recipient1", "recipient2", "recipient3"));
 
-  EXPECT_THAT(
-      SectionManager::Create(type_config_map, schema_type_mapper_.get()),
-      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
-               HasSubstr("Infinite loop detected")));
-}
+  EXPECT_THAT(section_group.string_sections[1].metadata,
+              EqualsSectionMetadata(/*expected_id=*/2,
+                                    /*expected_property_path=*/"subject",
+                                    CreateSubjectPropertyConfig()));
+  EXPECT_THAT(section_group.string_sections[1].content,
+              ElementsAre("the subject"));
 
-TEST_F(SectionManagerTest, CreationWithSchemaSelfReferenceShouldFail) {
-  // Creates a type config that has a section and references to self.
-  SchemaTypeConfigProto type_config;
-  type_config.set_schema_type("type");
-  auto property1 = type_config.add_properties();
-  property1->set_property_name("property1");
-  property1->set_data_type(PropertyConfigProto::DataType::STRING);
-  property1->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  property1->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  auto property2 = type_config.add_properties();
-  property2->set_property_name("property2");
-  property2->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  // Here we're referencing our own type, causing an infinite loop
-  property2->set_schema_type("type");
-  property2->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  property2->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
+  // Integer sections
+  EXPECT_THAT(section_group.integer_sections, SizeIs(2));
 
-  SchemaUtil::TypeConfigMap type_config_map;
-  type_config_map.emplace("type", type_config);
+  EXPECT_THAT(section_group.integer_sections[0].metadata,
+              EqualsSectionMetadata(/*expected_id=*/0,
+                                    /*expected_property_path=*/"recipientIds",
+                                    CreateRecipientIdsPropertyConfig()));
+  EXPECT_THAT(section_group.integer_sections[0].content, ElementsAre(1, 2, 3));
 
-  EXPECT_THAT(
-      SectionManager::Create(type_config_map, schema_type_mapper_.get()),
-      StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
-               HasSubstr("Too many properties")));
+  EXPECT_THAT(section_group.integer_sections[1].metadata,
+              EqualsSectionMetadata(/*expected_id=*/3,
+                                    /*expected_property_path=*/"timestamp",
+                                    CreateTimestampPropertyConfig()));
+  EXPECT_THAT(section_group.integer_sections[1].content,
+              ElementsAre(kDefaultTimestamp));
 }
 
-TEST_F(SectionManagerTest, CreationWithTooManyPropertiesShouldFail) {
-  SchemaTypeConfigProto type_config;
-  type_config.set_schema_type("type");
-  // Adds more properties than allowed
-  int max_num_sections_allowed = kMaxSectionId - kMinSectionId + 1;
-  for (int i = 0; i < max_num_sections_allowed + 1; i++) {
-    auto property = type_config.add_properties();
-    property->set_property_name("property" + std::to_string(i));
-    property->set_data_type(PropertyConfigProto::DataType::STRING);
-    property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-    property->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-  }
+TEST_F(SectionManagerTest, ExtractSectionsNested) {
+  // Use SchemaTypeManager factory method to instantiate SectionManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
 
-  SchemaUtil::TypeConfigMap type_config_map;
-  type_config_map.emplace("type", type_config);
+  // Extracts all sections from 'Conversation' document
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SectionGroup section_group,
+      schema_type_manager->section_manager().ExtractSections(
+          conversation_document_));
+
+  // String sections
+  EXPECT_THAT(section_group.string_sections, SizeIs(2));
 
   EXPECT_THAT(
-      SectionManager::Create(type_config_map, schema_type_mapper_.get()),
-      StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
-               HasSubstr("Too many properties")));
-}
+      section_group.string_sections[0].metadata,
+      EqualsSectionMetadata(/*expected_id=*/1,
+                            /*expected_property_path=*/"emails.recipients",
+                            CreateRecipientsPropertyConfig()));
+  EXPECT_THAT(section_group.string_sections[0].content,
+              ElementsAre("recipient1", "recipient2", "recipient3",
+                          "recipient1", "recipient2", "recipient3"));
 
-TEST_F(SectionManagerTest, CreationWithUnknownSchemaTypeNameShouldFail) {
-  SchemaTypeConfigProto type_config;
-  type_config.set_schema_type("type");
-  auto property = type_config.add_properties();
-  property->set_property_name("property");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_schema_type("unknown_name");
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  property->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(section_group.string_sections[1].metadata,
+              EqualsSectionMetadata(/*expected_id=*/2,
+                                    /*expected_property_path=*/"emails.subject",
+                                    CreateSubjectPropertyConfig()));
+  EXPECT_THAT(section_group.string_sections[1].content,
+              ElementsAre("the subject", "the subject"));
 
-  SchemaUtil::TypeConfigMap type_config_map;
-  type_config_map.emplace("type", type_config);
+  // Integer sections
+  EXPECT_THAT(section_group.integer_sections, SizeIs(2));
 
   EXPECT_THAT(
-      SectionManager::Create(type_config_map, schema_type_mapper_.get()),
-      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
-               HasSubstr("type config not found")));
+      section_group.integer_sections[0].metadata,
+      EqualsSectionMetadata(/*expected_id=*/0,
+                            /*expected_property_path=*/"emails.recipientIds",
+                            CreateRecipientIdsPropertyConfig()));
+  EXPECT_THAT(section_group.integer_sections[0].content,
+              ElementsAre(1, 2, 3, 1, 2, 3));
+
+  EXPECT_THAT(
+      section_group.integer_sections[1].metadata,
+      EqualsSectionMetadata(/*expected_id=*/3,
+                            /*expected_property_path=*/"emails.timestamp",
+                            CreateTimestampPropertyConfig()));
+  EXPECT_THAT(section_group.integer_sections[1].content,
+              ElementsAre(kDefaultTimestamp, kDefaultTimestamp));
 }
 
-TEST_F(SectionManagerTest, GetSectionContent) {
+TEST_F(SectionManagerTest, ExtractSectionsIndexableNestedPropertiesList) {
+  // Use SchemaTypeManager factory method to instantiate SectionManager.
   ICING_ASSERT_OK_AND_ASSIGN(
-      auto section_manager,
-      SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
-
-  // Test simple section paths
-  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
-                                                 /*section_path*/ "subject"),
-              IsOkAndHolds(ElementsAre("the subject")));
-  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
-                                                 /*section_path*/ "text"),
-              IsOkAndHolds(ElementsAre("the text")));
-
-  // Test repeated values, they are joined into one string
-  ICING_ASSERT_OK_AND_ASSIGN(auto content, section_manager->GetSectionContent(
-                                               email_document_,
-                                               /*section_path*/ "recipients"));
-  EXPECT_THAT(content, ElementsAre("recipient1", "recipient2", "recipient3"));
-
-  // Test concatenated section paths: "property1.property2"
-  ICING_ASSERT_OK_AND_ASSIGN(content, section_manager->GetSectionContent(
-                                          conversation_document_,
-                                          /*section_path*/ "emails.subject"));
-  EXPECT_THAT(content, ElementsAre("the subject", "the subject"));
-
-  ICING_ASSERT_OK_AND_ASSIGN(content, section_manager->GetSectionContent(
-                                          conversation_document_,
-                                          /*section_path*/ "emails.text"));
-  EXPECT_THAT(content, ElementsAre("the text", "the text"));
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
 
+  // Extracts all sections from 'Group' document
   ICING_ASSERT_OK_AND_ASSIGN(
-      content,
-      section_manager->GetSectionContent(conversation_document_,
-                                         /*section_path*/ "emails.recipients"));
-  EXPECT_THAT(content, ElementsAre("recipient1", "recipient2", "recipient3",
-                                   "recipient1", "recipient2", "recipient3"));
-
-  // Test non-existing paths
-  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
-                                                 /*section_path*/ "name"),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
-                                                 /*section_path*/ "invalid"),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-  EXPECT_THAT(
-      section_manager->GetSectionContent(conversation_document_,
-                                         /*section_path*/ "emails.invalid"),
-      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-
-  // Test other data types
-  // INT64
-  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
-                                                 /*section_path*/ "timestamp"),
-              IsOkAndHolds(ElementsAre("1234567890")));
-  // BYTES type can't be indexed, so content won't be returned
-  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
-                                                 /*section_path*/ "attachment"),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-
-  // The following tests are similar to the ones above but use section ids
-  // instead of section paths
-
-  // EmailMessage (section id -> section path):
-  SectionId recipients_section_id = 0;
-  SectionId subject_section_id = 1;
-  SectionId timestamp_section_id = 2;
-  SectionId invalid_email_section_id = 3;
+      SectionGroup section_group,
+      schema_type_manager->section_manager().ExtractSections(group_document_));
+
+  // SectionId assignments:
+  //    0 -> conversation.emails.attachment (bytes, non-indexable)
+  //    1 -> conversation.emails.recipientIds (int64)
+  //    2 -> conversation.emails.recipients (string)
+  //    3 -> conversation.emails.subject (string)
+  //    4 -> conversation.name
+  //         (string, but no entry for this in conversation_document_)
+  //    5 -> groupName (string)
+  //    6 -> conversation.emails.nonExistentNestedProperty
+  //         (unknown, non-indexable)
+  //    7 -> conversation.emails.nonExistentNestedProperty2
+  //         (unknown, non-indexable)
+  //
+  // SectionId assignment order:
+  // - We assign section ids to known (existing) properties first in alphabet
+  //  order.
+  // - After handling all known properties, we assign section ids to all unknown
+  //   (non-existent) properties that are specified in the
+  //  indexable_nested_properties_list.
+  // - As a result, assignment of the entire section set is not done
+  //   alphabetically, but assignment is still deterministic and alphabetical
+  //   order is preserved inside the known properties and unknown properties
+  //   sets individually.
+  //
+  // 'conversation.emails.attachment',
+  // 'conversation.emails.nonExistentNestedProperty' and
+  // 'conversation.emails.nonExistentNestedProperty2' are assigned sectionIds
+  // even though they are non-indexable because they appear in 'Group' schema
+  // type's indexable_nested_props_list.
+  // However 'conversation.emails.attachment' does not exist in section_group
+  // (even though the property exists and has a sectionId assignment) as
+  // SectionManager::ExtractSections only extracts indexable string and integer
+  // section data from a document.
+
+  // String sections
+  EXPECT_THAT(section_group.string_sections, SizeIs(3));
+
+  EXPECT_THAT(section_group.string_sections[0].metadata,
+              EqualsSectionMetadata(
+                  /*expected_id=*/2,
+                  /*expected_property_path=*/"conversation.emails.recipients",
+                  CreateRecipientsPropertyConfig()));
+  EXPECT_THAT(section_group.string_sections[0].content,
+              ElementsAre("recipient1", "recipient2", "recipient3",
+                          "recipient1", "recipient2", "recipient3"));
+
+  EXPECT_THAT(section_group.string_sections[1].metadata,
+              EqualsSectionMetadata(
+                  /*expected_id=*/3,
+                  /*expected_property_path=*/"conversation.emails.subject",
+                  CreateSubjectPropertyConfig()));
+  EXPECT_THAT(section_group.string_sections[1].content,
+              ElementsAre("the subject", "the subject"));
+
+  EXPECT_THAT(section_group.string_sections[2].metadata,
+              EqualsSectionMetadata(
+                  /*expected_id=*/5,
+                  /*expected_property_path=*/"groupName",
+                  CreateGroupNamePropertyConfig()));
+  EXPECT_THAT(section_group.string_sections[2].content,
+              ElementsAre("group_name_1"));
+
+  // Integer sections
+  EXPECT_THAT(section_group.integer_sections, SizeIs(1));
+
+  EXPECT_THAT(section_group.integer_sections[0].metadata,
+              EqualsSectionMetadata(
+                  /*expected_id=*/1,
+                  /*expected_property_path=*/"conversation.emails.recipientIds",
+                  CreateRecipientIdsPropertyConfig()));
+  EXPECT_THAT(section_group.integer_sections[0].content,
+              ElementsAre(1, 2, 3, 1, 2, 3));
+}
+
+TEST_F(SectionManagerTest, GetSectionMetadata) {
+  // Use SchemaTypeManager factory method to instantiate SectionManager.
   ICING_ASSERT_OK_AND_ASSIGN(
-      content, section_manager->GetSectionContent(email_document_,
-                                                  recipients_section_id));
-  EXPECT_THAT(content, ElementsAre("recipient1", "recipient2", "recipient3"));
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  // Email (section id -> section property path):
+  //   0 -> recipientIds
+  //   1 -> recipients
+  //   2 -> subject
+  //   3 -> timestamp
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/0, /*section_id=*/0),
+              IsOkAndHolds(Pointee(EqualsSectionMetadata(
+                  /*expected_id=*/0, /*expected_property_path=*/"recipientIds",
+                  CreateRecipientIdsPropertyConfig()))));
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/0, /*section_id=*/1),
+              IsOkAndHolds(Pointee(EqualsSectionMetadata(
+                  /*expected_id=*/1, /*expected_property_path=*/"recipients",
+                  CreateRecipientsPropertyConfig()))));
 
+  // Conversation (section id -> section property path):
+  //   0 -> emails.recipientIds
+  //   1 -> emails.recipients
+  //   2 -> emails.subject
+  //   3 -> emails.timestamp
+  //   4 -> name
+  EXPECT_THAT(
+      schema_type_manager->section_manager().GetSectionMetadata(
+          /*schema_type_id=*/1, /*section_id=*/0),
+      IsOkAndHolds(Pointee(EqualsSectionMetadata(
+          /*expected_id=*/0, /*expected_property_path=*/"emails.recipientIds",
+          CreateRecipientIdsPropertyConfig()))));
+  EXPECT_THAT(
+      schema_type_manager->section_manager().GetSectionMetadata(
+          /*schema_type_id=*/1, /*section_id=*/1),
+      IsOkAndHolds(Pointee(EqualsSectionMetadata(
+          /*expected_id=*/1, /*expected_property_path=*/"emails.recipients",
+          CreateRecipientsPropertyConfig()))));
   EXPECT_THAT(
-      section_manager->GetSectionContent(email_document_, subject_section_id),
-      IsOkAndHolds(ElementsAre("the subject")));
+      schema_type_manager->section_manager().GetSectionMetadata(
+          /*schema_type_id=*/1, /*section_id=*/2),
+      IsOkAndHolds(Pointee(EqualsSectionMetadata(
+          /*expected_id=*/2, /*expected_property_path=*/"emails.subject",
+          CreateSubjectPropertyConfig()))));
   EXPECT_THAT(
-      section_manager->GetSectionContent(email_document_, timestamp_section_id),
-      IsOkAndHolds(ElementsAre("1234567890")));
+      schema_type_manager->section_manager().GetSectionMetadata(
+          /*schema_type_id=*/1, /*section_id=*/3),
+      IsOkAndHolds(Pointee(EqualsSectionMetadata(
+          /*expected_id=*/3, /*expected_property_path=*/"emails.timestamp",
+          CreateTimestampPropertyConfig()))));
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/1, /*section_id=*/4),
+              IsOkAndHolds(Pointee(EqualsSectionMetadata(
+                  /*expected_id=*/4, /*expected_property_path=*/"name",
+                  CreateNamePropertyConfig()))));
 
-  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
-                                                 invalid_email_section_id),
+  // Group (section id -> section property path):
+  //    0 -> conversation.emails.attachment (non-indexable)
+  //    1 -> conversation.emails.recipientIds
+  //    2 -> conversation.emails.recipients
+  //    3 -> conversation.emails.subject
+  //    4 -> conversation.name
+  //    5 -> groupName
+  //    6 -> conversation.emails.nonExistentNestedProperty (non-indexable)
+  //    7 -> conversation.emails.nonExistentNestedProperty2 (non-indexable)
+  //
+  // SectionId assignment order:
+  // - We assign section ids to known (existing) properties first in alphabet
+  //  order.
+  // - After handling all known properties, we assign section ids to all unknown
+  //   (non-existent) properties that are specified in the
+  //  indexable_nested_properties_list.
+  // - As a result, assignment of the entire section set is not done
+  //   alphabetically, but assignment is still deterministic and alphabetical
+  //   order is preserved inside the known properties and unknown properties
+  //   sets individually.
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/2, /*section_id=*/0),
+              IsOkAndHolds(Pointee(EqualsSectionMetadata(
+                  /*expected_id=*/0,
+                  /*expected_property_path=*/"conversation.emails.attachment",
+                  CreateAttachmentPropertyConfig()))));
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/2, /*section_id=*/1),
+              IsOkAndHolds(Pointee(EqualsSectionMetadata(
+                  /*expected_id=*/1,
+                  /*expected_property_path=*/"conversation.emails.recipientIds",
+                  CreateRecipientIdsPropertyConfig()))));
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/2, /*section_id=*/2),
+              IsOkAndHolds(Pointee(EqualsSectionMetadata(
+                  /*expected_id=*/2,
+                  /*expected_property_path=*/"conversation.emails.recipients",
+                  CreateRecipientsPropertyConfig()))));
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/2, /*section_id=*/3),
+              IsOkAndHolds(Pointee(EqualsSectionMetadata(
+                  /*expected_id=*/3,
+                  /*expected_property_path=*/"conversation.emails.subject",
+                  CreateSubjectPropertyConfig()))));
+  EXPECT_THAT(
+      schema_type_manager->section_manager().GetSectionMetadata(
+          /*schema_type_id=*/2, /*section_id=*/4),
+      IsOkAndHolds(Pointee(EqualsSectionMetadata(
+          /*expected_id=*/4, /*expected_property_path=*/"conversation.name",
+          CreateNamePropertyConfig()))));
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/2, /*section_id=*/5),
+              IsOkAndHolds(Pointee(EqualsSectionMetadata(
+                  /*expected_id=*/5, /*expected_property_path=*/"groupName",
+                  CreateGroupNamePropertyConfig()))));
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/2, /*section_id=*/6),
+              IsOkAndHolds(Pointee(EqualsSectionMetadata(
+                  /*expected_id=*/6,
+                  /*expected_property_path=*/
+                  "conversation.emails.nonExistentNestedProperty",
+                  PropertyConfigBuilder()
+                      .SetName("nonExistentNestedProperty")
+                      .SetDataType(TYPE_UNKNOWN)
+                      .Build()))));
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/2, /*section_id=*/7),
+              IsOkAndHolds(Pointee(EqualsSectionMetadata(
+                  /*expected_id=*/7,
+                  /*expected_property_path=*/
+                  "conversation.emails.nonExistentNestedProperty2",
+                  PropertyConfigBuilder()
+                      .SetName("nonExistentNestedProperty2")
+                      .SetDataType(TYPE_UNKNOWN)
+                      .Build()))));
+  // Check that no more properties are indexed
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/2, /*section_id=*/8),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
 
-  // Conversation (section id -> section path):
-  //   0 -> emails.recipients
-  //   1 -> emails.subject
-  //   2 -> emails.timestamp
-  //   3 -> name
-  SectionId emails_recipients_section_id = 0;
-  SectionId emails_subject_section_id = 1;
-  SectionId emails_timestamp_section_id = 2;
-  SectionId name_section_id = 3;
-  SectionId invalid_conversation_section_id = 4;
+TEST_F(SectionManagerTest, GetSectionMetadataInvalidSchemaTypeId) {
+  // Use SchemaTypeManager factory method to instantiate SectionManager.
   ICING_ASSERT_OK_AND_ASSIGN(
-      content, section_manager->GetSectionContent(
-                   conversation_document_, emails_recipients_section_id));
-  EXPECT_THAT(content, ElementsAre("recipient1", "recipient2", "recipient3",
-                                   "recipient1", "recipient2", "recipient3"));
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+  ASSERT_THAT(type_config_map_, SizeIs(3));
 
-  ICING_ASSERT_OK_AND_ASSIGN(
-      content, section_manager->GetSectionContent(conversation_document_,
-                                                  emails_subject_section_id));
-  EXPECT_THAT(content, ElementsAre("the subject", "the subject"));
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/-1, /*section_id=*/0),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/3, /*section_id=*/0),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
 
+TEST_F(SectionManagerTest, GetSectionMetadataInvalidSectionId) {
+  // Use SchemaTypeManager factory method to instantiate SectionManager.
   ICING_ASSERT_OK_AND_ASSIGN(
-      content, section_manager->GetSectionContent(conversation_document_,
-                                                  emails_timestamp_section_id));
-  EXPECT_THAT(content, ElementsAre("1234567890", "1234567890"));
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
 
-  EXPECT_THAT(section_manager->GetSectionContent(conversation_document_,
-                                                 name_section_id),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  // Email (section id -> section property path):
+  //   0 -> recipientIds
+  //   1 -> recipients
+  //   2 -> subject
+  //   3 -> timestamp
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/0, /*section_id=*/-1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/0, /*section_id=*/4),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 
-  EXPECT_THAT(section_manager->GetSectionContent(
-                  conversation_document_, invalid_conversation_section_id),
+  // Conversation (section id -> section property path):
+  //   0 -> emails.recipientIds
+  //   1 -> emails.recipients
+  //   2 -> emails.subject
+  //   3 -> emails.timestamp
+  //   4 -> name
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/1, /*section_id=*/-1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(schema_type_manager->section_manager().GetSectionMetadata(
+                  /*schema_type_id=*/1, /*section_id=*/5),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SectionManagerTest, ExtractSections) {
+TEST_F(SectionManagerTest,
+       NonStringFieldsWithStringIndexingConfigDontCreateSections) {
+  // Create a schema for an empty document.
+  SchemaTypeConfigProto empty_type;
+  empty_type.set_schema_type("EmptySchema");
+
+  // Create a schema with all the non-string fields
+  SchemaTypeConfigProto type_with_non_string_properties;
+  type_with_non_string_properties.set_schema_type("Schema");
+
+  // Create an int property with a string_indexing_config
+  auto int_property = type_with_non_string_properties.add_properties();
+  int_property->set_property_name("int");
+  int_property->set_data_type(TYPE_INT64);
+  int_property->set_cardinality(CARDINALITY_REQUIRED);
+  int_property->mutable_string_indexing_config()->set_term_match_type(
+      TERM_MATCH_EXACT);
+  int_property->mutable_string_indexing_config()->set_tokenizer_type(
+      TOKENIZER_PLAIN);
+
+  // Create a double property with a string_indexing_config
+  auto double_property = type_with_non_string_properties.add_properties();
+  double_property->set_property_name("double");
+  double_property->set_data_type(TYPE_DOUBLE);
+  double_property->set_cardinality(CARDINALITY_REQUIRED);
+  double_property->mutable_string_indexing_config()->set_term_match_type(
+      TERM_MATCH_EXACT);
+  double_property->mutable_string_indexing_config()->set_tokenizer_type(
+      TOKENIZER_PLAIN);
+
+  // Create a boolean property with a string_indexing_config
+  auto boolean_property = type_with_non_string_properties.add_properties();
+  boolean_property->set_property_name("boolean");
+  boolean_property->set_data_type(TYPE_BOOLEAN);
+  boolean_property->set_cardinality(CARDINALITY_REQUIRED);
+  boolean_property->mutable_string_indexing_config()->set_term_match_type(
+      TERM_MATCH_EXACT);
+  boolean_property->mutable_string_indexing_config()->set_tokenizer_type(
+      TOKENIZER_PLAIN);
+
+  // Create a bytes property with a string_indexing_config
+  auto bytes_property = type_with_non_string_properties.add_properties();
+  bytes_property->set_property_name("bytes");
+  bytes_property->set_data_type(TYPE_BYTES);
+  bytes_property->set_cardinality(CARDINALITY_REQUIRED);
+  bytes_property->mutable_string_indexing_config()->set_term_match_type(
+      TERM_MATCH_EXACT);
+  bytes_property->mutable_string_indexing_config()->set_tokenizer_type(
+      TOKENIZER_PLAIN);
+
+  // Create a document property with a string_indexing_config
+  auto document_property = type_with_non_string_properties.add_properties();
+  document_property->set_property_name("document");
+  document_property->set_data_type(TYPE_DOCUMENT);
+  document_property->set_schema_type(empty_type.schema_type());
+  document_property->set_cardinality(CARDINALITY_REQUIRED);
+  document_property->mutable_string_indexing_config()->set_term_match_type(
+      TERM_MATCH_EXACT);
+  document_property->mutable_string_indexing_config()->set_tokenizer_type(
+      TOKENIZER_PLAIN);
+
+  // Setup classes to create the section manager
+  SchemaUtil::TypeConfigMap type_config_map;
+  type_config_map.emplace(type_with_non_string_properties.schema_type(),
+                          type_with_non_string_properties);
+  type_config_map.emplace(empty_type.schema_type(), empty_type);
+
+  // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each one
+  // 128KiB so the total DynamicTrieKeyMapper should get 384KiB
+  int key_mapper_size = 3 * 128 * 1024;
+  std::string dir = GetTestTempDir() + "/non_string_fields";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, dir,
+                                                 key_mapper_size));
+  ICING_ASSERT_OK(schema_type_mapper->Put(
+      type_with_non_string_properties.schema_type(), /*schema_type_id=*/0));
+  ICING_ASSERT_OK(schema_type_mapper->Put(empty_type.schema_type(),
+                                          /*schema_type_id=*/1));
+
+  // Use SchemaTypeManager factory method to instantiate SectionManager.
   ICING_ASSERT_OK_AND_ASSIGN(
-      auto section_manager,
-      SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
 
-  // Extracts all sections from 'EmailMessage' document
-  ICING_ASSERT_OK_AND_ASSIGN(auto sections,
-                             section_manager->ExtractSections(email_document_));
-  EXPECT_THAT(sections.size(), Eq(3));
+  // Create an empty document to be nested
+  DocumentProto empty_document = DocumentBuilder()
+                                     .SetKey("icing", "uri1")
+                                     .SetSchema(empty_type.schema_type())
+                                     .Build();
 
-  EXPECT_THAT(sections[0].metadata.id, Eq(0));
-  EXPECT_THAT(sections[0].metadata.path, Eq("recipients"));
-  EXPECT_THAT(sections[0].content,
-              ElementsAre("recipient1", "recipient2", "recipient3"));
+  // Create a document that follows "Schema"
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "uri2")
+          .SetSchema(type_with_non_string_properties.schema_type())
+          .AddInt64Property("int", 1)
+          .AddDoubleProperty("double", 0.2)
+          .AddBooleanProperty("boolean", true)
+          .AddBytesProperty("bytes", "attachment bytes")
+          .AddDocumentProperty("document", empty_document)
+          .Build();
 
-  EXPECT_THAT(sections[1].metadata.id, Eq(1));
-  EXPECT_THAT(sections[1].metadata.path, Eq("subject"));
-  EXPECT_THAT(sections[1].content, ElementsAre("the subject"));
+  // Extracts sections from 'Schema' document
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SectionGroup section_group,
+      schema_type_manager->section_manager().ExtractSections(document));
+  EXPECT_THAT(section_group.string_sections, IsEmpty());
+  EXPECT_THAT(section_group.integer_sections, IsEmpty());
+}
 
-  EXPECT_THAT(sections[2].metadata.id, Eq(2));
-  EXPECT_THAT(sections[2].metadata.path, Eq("timestamp"));
-  EXPECT_THAT(sections[2].content, ElementsAre("1234567890"));
+TEST_F(SectionManagerTest,
+       NonIntegerFieldsWithIntegerIndexingConfigDontCreateSections) {
+  // Create a schema for an empty document.
+  SchemaTypeConfigProto empty_type;
+  empty_type.set_schema_type("EmptySchema");
 
-  // Extracts all sections from 'Conversation' document
+  // Create a schema with all the non-integer fields
+  SchemaTypeConfigProto type_with_non_integer_properties;
+  type_with_non_integer_properties.set_schema_type("Schema");
+
+  // Create an string property with a integer_indexing_config
+  auto string_property = type_with_non_integer_properties.add_properties();
+  string_property->set_property_name("string");
+  string_property->set_data_type(TYPE_STRING);
+  string_property->set_cardinality(CARDINALITY_REQUIRED);
+  string_property->mutable_integer_indexing_config()->set_numeric_match_type(
+      NUMERIC_MATCH_RANGE);
+
+  // Create a double property with a integer_indexing_config
+  auto double_property = type_with_non_integer_properties.add_properties();
+  double_property->set_property_name("double");
+  double_property->set_data_type(TYPE_DOUBLE);
+  double_property->set_cardinality(CARDINALITY_REQUIRED);
+  double_property->mutable_integer_indexing_config()->set_numeric_match_type(
+      NUMERIC_MATCH_RANGE);
+
+  // Create a boolean property with a integer_indexing_config
+  auto boolean_property = type_with_non_integer_properties.add_properties();
+  boolean_property->set_property_name("boolean");
+  boolean_property->set_data_type(TYPE_BOOLEAN);
+  boolean_property->set_cardinality(CARDINALITY_REQUIRED);
+  boolean_property->mutable_integer_indexing_config()->set_numeric_match_type(
+      NUMERIC_MATCH_RANGE);
+
+  // Create a bytes property with a integer_indexing_config
+  auto bytes_property = type_with_non_integer_properties.add_properties();
+  bytes_property->set_property_name("bytes");
+  bytes_property->set_data_type(TYPE_BYTES);
+  bytes_property->set_cardinality(CARDINALITY_REQUIRED);
+  bytes_property->mutable_integer_indexing_config()->set_numeric_match_type(
+      NUMERIC_MATCH_RANGE);
+
+  // Create a document property with a integer_indexing_config
+  auto document_property = type_with_non_integer_properties.add_properties();
+  document_property->set_property_name("document");
+  document_property->set_data_type(TYPE_DOCUMENT);
+  document_property->set_schema_type(empty_type.schema_type());
+  document_property->set_cardinality(CARDINALITY_REQUIRED);
+  document_property->mutable_integer_indexing_config()->set_numeric_match_type(
+      NUMERIC_MATCH_RANGE);
+
+  // Setup classes to create the section manager
+  SchemaUtil::TypeConfigMap type_config_map;
+  type_config_map.emplace(type_with_non_integer_properties.schema_type(),
+                          type_with_non_integer_properties);
+  type_config_map.emplace(empty_type.schema_type(), empty_type);
+
+  // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each one
+  // 128KiB so the total DynamicTrieKeyMapper should get 384KiB
+  int key_mapper_size = 3 * 128 * 1024;
+  std::string dir = GetTestTempDir() + "/non_integer_fields";
   ICING_ASSERT_OK_AND_ASSIGN(
-      sections, section_manager->ExtractSections(conversation_document_));
-  EXPECT_THAT(sections.size(), Eq(3));
-
-  // Section id 3 (name) not found in document, so the first section id found
-  // is 1 below.
-  EXPECT_THAT(sections[0].metadata.id, Eq(0));
-  EXPECT_THAT(sections[0].metadata.path, Eq("emails.recipients"));
-  EXPECT_THAT(sections[0].content,
-              ElementsAre("recipient1", "recipient2", "recipient3",
-                          "recipient1", "recipient2", "recipient3"));
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, dir,
+                                                 key_mapper_size));
+  ICING_ASSERT_OK(schema_type_mapper->Put(
+      type_with_non_integer_properties.schema_type(), /*schema_type_id=*/0));
+  ICING_ASSERT_OK(schema_type_mapper->Put(empty_type.schema_type(),
+                                          /*schema_type_id=*/1));
 
-  EXPECT_THAT(sections[1].metadata.id, Eq(1));
-  EXPECT_THAT(sections[1].metadata.path, Eq("emails.subject"));
-  EXPECT_THAT(sections[1].content, ElementsAre("the subject", "the subject"));
+  // Use SchemaTypeManager factory method to instantiate SectionManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
+
+  // Create an empty document to be nested
+  DocumentProto empty_document = DocumentBuilder()
+                                     .SetKey("icing", "uri1")
+                                     .SetSchema(empty_type.schema_type())
+                                     .Build();
+
+  // Create a document that follows "Schema"
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "uri2")
+          .SetSchema(type_with_non_integer_properties.schema_type())
+          .AddStringProperty("string", "abc")
+          .AddDoubleProperty("double", 0.2)
+          .AddBooleanProperty("boolean", true)
+          .AddBytesProperty("bytes", "attachment bytes")
+          .AddDocumentProperty("document", empty_document)
+          .Build();
 
-  EXPECT_THAT(sections[2].metadata.id, Eq(2));
-  EXPECT_THAT(sections[2].metadata.path, Eq("emails.timestamp"));
-  EXPECT_THAT(sections[2].content, ElementsAre("1234567890", "1234567890"));
+  // Extracts sections from 'Schema' document
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SectionGroup section_group,
+      schema_type_manager->section_manager().ExtractSections(document));
+  EXPECT_THAT(section_group.string_sections, IsEmpty());
+  EXPECT_THAT(section_group.integer_sections, IsEmpty());
 }
 
+TEST_F(SectionManagerTest, AssignSectionsRecursivelyForDocumentFields) {
+  // Create the inner schema that the document property is.
+  SchemaTypeConfigProto document_type;
+  document_type.set_schema_type("DocumentSchema");
+
+  auto string_property = document_type.add_properties();
+  string_property->set_property_name("string");
+  string_property->set_data_type(TYPE_STRING);
+  string_property->set_cardinality(CARDINALITY_REQUIRED);
+  string_property->mutable_string_indexing_config()->set_term_match_type(
+      TERM_MATCH_EXACT);
+  string_property->mutable_string_indexing_config()->set_tokenizer_type(
+      TOKENIZER_PLAIN);
+
+  auto integer_property = document_type.add_properties();
+  integer_property->set_property_name("integer");
+  integer_property->set_data_type(TYPE_INT64);
+  integer_property->set_cardinality(CARDINALITY_REQUIRED);
+  integer_property->mutable_integer_indexing_config()->set_numeric_match_type(
+      NUMERIC_MATCH_RANGE);
+
+  // Create the outer schema which has the document property.
+  SchemaTypeConfigProto type;
+  type.set_schema_type("Schema");
+
+  auto document_property = type.add_properties();
+  document_property->set_property_name("document");
+  document_property->set_data_type(TYPE_DOCUMENT);
+  document_property->set_schema_type(document_type.schema_type());
+  document_property->set_cardinality(CARDINALITY_REQUIRED);
+
+  // Opt into recursing into the document fields.
+  document_property->mutable_document_indexing_config()
+      ->set_index_nested_properties(true);
+
+  // Create the inner document.
+  DocumentProto inner_document = DocumentBuilder()
+                                     .SetKey("icing", "uri1")
+                                     .SetSchema(document_type.schema_type())
+                                     .AddStringProperty("string", "foo")
+                                     .AddInt64Property("integer", 123)
+                                     .Build();
+
+  // Create the outer document that holds the inner document
+  DocumentProto outer_document =
+      DocumentBuilder()
+          .SetKey("icing", "uri2")
+          .SetSchema(type.schema_type())
+          .AddDocumentProperty("document", inner_document)
+          .Build();
+
+  // Setup classes to create the section manager
+  SchemaUtil::TypeConfigMap type_config_map;
+  type_config_map.emplace(type.schema_type(), type);
+  type_config_map.emplace(document_type.schema_type(), document_type);
+
+  // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each one
+  // 128KiB so the total DynamicTrieKeyMapper should get 384KiB
+  int key_mapper_size = 3 * 128 * 1024;
+  std::string dir = GetTestTempDir() + "/recurse_into_document";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, dir,
+                                                 key_mapper_size));
+  int type_schema_type_id = 0;
+  int document_type_schema_type_id = 1;
+  ICING_ASSERT_OK(
+      schema_type_mapper->Put(type.schema_type(), type_schema_type_id));
+  ICING_ASSERT_OK(schema_type_mapper->Put(document_type.schema_type(),
+                                          document_type_schema_type_id));
+
+  // Use SchemaTypeManager factory method to instantiate SectionManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
+
+  // Extracts sections from 'Schema' document; there should be the 1 string
+  // property and 1 integer property inside the document.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SectionGroup section_group,
+      schema_type_manager->section_manager().ExtractSections(outer_document));
+  EXPECT_THAT(section_group.string_sections, SizeIs(1));
+  EXPECT_THAT(section_group.integer_sections, SizeIs(1));
+}
+
+TEST_F(SectionManagerTest, DontAssignSectionsRecursivelyForDocumentFields) {
+  // Create the inner schema that the document property is.
+  SchemaTypeConfigProto document_type;
+  document_type.set_schema_type("DocumentSchema");
+
+  auto string_property = document_type.add_properties();
+  string_property->set_property_name("string");
+  string_property->set_data_type(TYPE_STRING);
+  string_property->set_cardinality(CARDINALITY_REQUIRED);
+  string_property->mutable_string_indexing_config()->set_term_match_type(
+      TERM_MATCH_EXACT);
+  string_property->mutable_string_indexing_config()->set_tokenizer_type(
+      TOKENIZER_PLAIN);
+
+  auto integer_property = document_type.add_properties();
+  integer_property->set_property_name("integer");
+  integer_property->set_data_type(TYPE_INT64);
+  integer_property->set_cardinality(CARDINALITY_REQUIRED);
+  integer_property->mutable_integer_indexing_config()->set_numeric_match_type(
+      NUMERIC_MATCH_RANGE);
+
+  // Create the outer schema which has the document property.
+  SchemaTypeConfigProto type;
+  type.set_schema_type("Schema");
+
+  auto document_property = type.add_properties();
+  document_property->set_property_name("document");
+  document_property->set_data_type(TYPE_DOCUMENT);
+  document_property->set_schema_type(document_type.schema_type());
+  document_property->set_cardinality(CARDINALITY_REQUIRED);
+
+  // Opt into recursing into the document fields.
+  document_property->mutable_document_indexing_config()
+      ->set_index_nested_properties(false);
+
+  // Create the inner document.
+  DocumentProto inner_document = DocumentBuilder()
+                                     .SetKey("icing", "uri1")
+                                     .SetSchema(document_type.schema_type())
+                                     .AddStringProperty("string", "foo")
+                                     .AddInt64Property("integer", 123)
+                                     .Build();
+
+  // Create the outer document that holds the inner document
+  DocumentProto outer_document =
+      DocumentBuilder()
+          .SetKey("icing", "uri2")
+          .SetSchema(type.schema_type())
+          .AddDocumentProperty("document", inner_document)
+          .Build();
+
+  // Setup classes to create the section manager
+  SchemaUtil::TypeConfigMap type_config_map;
+  type_config_map.emplace(type.schema_type(), type);
+  type_config_map.emplace(document_type.schema_type(), document_type);
+
+  // DynamicTrieKeyMapper uses 3 internal arrays for bookkeeping. Give each one
+  // 128KiB so the total DynamicTrieKeyMapper should get 384KiB
+  int key_mapper_size = 3 * 128 * 1024;
+  std::string dir = GetTestTempDir() + "/recurse_into_document";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, dir,
+                                                 key_mapper_size));
+  int type_schema_type_id = 0;
+  int document_type_schema_type_id = 1;
+  ICING_ASSERT_OK(
+      schema_type_mapper->Put(type.schema_type(), type_schema_type_id));
+  ICING_ASSERT_OK(schema_type_mapper->Put(document_type.schema_type(),
+                                          document_type_schema_type_id));
+
+  // Use SchemaTypeManager factory method to instantiate SectionManager.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaTypeManager> schema_type_manager,
+      SchemaTypeManager::Create(type_config_map, schema_type_mapper.get()));
+
+  // Extracts sections from 'Schema' document; there won't be any since we
+  // didn't recurse into the document to see the inner string property
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SectionGroup section_group,
+      schema_type_manager->section_manager().ExtractSections(outer_document));
+  EXPECT_THAT(section_group.string_sections, IsEmpty());
+  EXPECT_THAT(section_group.integer_sections, IsEmpty());
+}
+
+}  // namespace
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/schema/section.h b/icing/schema/section.h
index daf4fd0..3685a29 100644
--- a/icing/schema/section.h
+++ b/icing/schema/section.h
@@ -17,6 +17,7 @@
 
 #include <cstdint>
 #include <string>
+#include <string_view>
 #include <utility>
 #include <vector>
 
@@ -27,25 +28,30 @@ namespace icing {
 namespace lib {
 
 using SectionId = int8_t;
-// 4 bits for 16 values. NOTE: Increasing this value means that SectionIdMask
-// must increase from an int16_t to an int32_t
-inline constexpr int kSectionIdBits = 4;
-inline constexpr SectionId kInvalidSectionId = (1 << kSectionIdBits);
-inline constexpr SectionId kMaxSectionId = kInvalidSectionId - 1;
+// 6 bits for 64 values.
+inline constexpr int kSectionIdBits = 6;
+inline constexpr SectionId kTotalNumSections = (1 << kSectionIdBits);
+inline constexpr SectionId kInvalidSectionId = kTotalNumSections;
+inline constexpr SectionId kMaxSectionId = kTotalNumSections - 1;
+// Prior versions of Icing only supported 16 indexed properties.
+inline constexpr SectionId kOldTotalNumSections = 16;
 inline constexpr SectionId kMinSectionId = 0;
 constexpr bool IsSectionIdValid(SectionId section_id) {
   return section_id >= kMinSectionId && section_id <= kMaxSectionId;
 }
 
-using SectionIdMask = int16_t;
+using SectionIdMask = int64_t;
 inline constexpr SectionIdMask kSectionIdMaskAll = ~SectionIdMask{0};
 inline constexpr SectionIdMask kSectionIdMaskNone = SectionIdMask{0};
 
+static_assert(kSectionIdBits < 8 * sizeof(SectionId),
+              "Cannot exhaust all bits of SectionId since it is a signed "
+              "integer and the most significant bit should be preserved.");
+
 static_assert(
     kMaxSectionId < 8 * sizeof(SectionIdMask),
     "SectionIdMask is not large enough to represent all section values!");
 
-// TODO(samzheng): add more metadata when needed, e.g. tokenizer type,
 struct SectionMetadata {
   // Dot-joined property names, representing the location of section inside an
   // document. E.g. "property1.property2"
@@ -54,11 +60,14 @@ struct SectionMetadata {
   // A unique id of property within a type config
   SectionId id;
 
-  // How content in this section should be tokenized. It is invalid for a
-  // section to have tokenizer == 'NONE'.
-  IndexingConfig::TokenizerType::Code tokenizer;
+  // Indexable data type of this section. E.g. STRING, INT64.
+  PropertyConfigProto::DataType::Code data_type;
+
+  // How strings should be tokenized. It is invalid for a string section
+  // (data_type == 'STRING') to have tokenizer == 'NONE'.
+  StringIndexingConfig::TokenizerType::Code tokenizer;
 
-  // How tokens in this section should be matched.
+  // How tokens in a string section should be matched.
   //
   // TermMatchType::UNKNOWN:
   //   Terms will not match anything
@@ -70,24 +79,71 @@ struct SectionMetadata {
   //   Terms will be only stored as an exact match, "fool" only matches "fool"
   TermMatchType::Code term_match_type = TermMatchType::UNKNOWN;
 
-  SectionMetadata(SectionId id_in, TermMatchType::Code term_match_type_in,
-                  IndexingConfig::TokenizerType::Code tokenizer,
-                  std::string&& path_in)
+  // How tokens in a numeric section should be matched.
+  //
+  // NumericMatchType::UNKNOWN:
+  //   Contents will not match anything. It is invalid for a numeric section
+  //   (data_type == 'INT64') to have numeric_match_type == 'UNKNOWN'.
+  //
+  // NumericMatchType::RANGE:
+  //   Contents will be matched by a range query.
+  IntegerIndexingConfig::NumericMatchType::Code numeric_match_type;
+
+  explicit SectionMetadata(
+      SectionId id_in, PropertyConfigProto::DataType::Code data_type_in,
+      StringIndexingConfig::TokenizerType::Code tokenizer,
+      TermMatchType::Code term_match_type_in,
+      IntegerIndexingConfig::NumericMatchType::Code numeric_match_type_in,
+      std::string&& path_in)
       : path(std::move(path_in)),
         id(id_in),
+        data_type(data_type_in),
         tokenizer(tokenizer),
-        term_match_type(term_match_type_in) {}
+        term_match_type(term_match_type_in),
+        numeric_match_type(numeric_match_type_in) {}
+
+  SectionMetadata(const SectionMetadata& other) = default;
+  SectionMetadata& operator=(const SectionMetadata& other) = default;
+
+  SectionMetadata(SectionMetadata&& other) = default;
+  SectionMetadata& operator=(SectionMetadata&& other) = default;
+
+  bool operator==(const SectionMetadata& rhs) const {
+    return path == rhs.path && id == rhs.id && data_type == rhs.data_type &&
+           tokenizer == rhs.tokenizer &&
+           term_match_type == rhs.term_match_type &&
+           numeric_match_type == rhs.numeric_match_type;
+  }
 };
 
 // Section is an icing internal concept similar to document property but with
 // extra metadata. The content can be a value or the combination of repeated
-// values of a property.
+// values of a property, and the type of content is specified by template.
+//
+// Current supported types:
+// - std::string_view (PropertyConfigProto::DataType::STRING)
+// - int64_t (PropertyConfigProto::DataType::INT64)
+template <typename T>
 struct Section {
   SectionMetadata metadata;
-  std::vector<std::string> content;
+  std::vector<T> content;
 
-  Section(SectionMetadata&& metadata_in, std::vector<std::string>&& content_in)
+  explicit Section(SectionMetadata&& metadata_in, std::vector<T>&& content_in)
       : metadata(std::move(metadata_in)), content(std::move(content_in)) {}
+
+  PropertyConfigProto::DataType::Code data_type() const {
+    return metadata.data_type;
+  }
+};
+
+// Groups of different type sections. Callers can access sections with types
+// they want and avoid going through non-desired ones.
+//
+// REQUIRES: lifecycle of the property must be longer than this object, since we
+//   use std::string_view for extracting its string_values.
+struct SectionGroup {
+  std::vector<Section<std::string_view>> string_sections;
+  std::vector<Section<int64_t>> integer_sections;
 };
 
 }  // namespace lib
diff --git a/icing/scoring/advanced_scoring/advanced-scorer.cc b/icing/scoring/advanced_scoring/advanced-scorer.cc
new file mode 100644
index 0000000..83c1519
--- /dev/null
+++ b/icing/scoring/advanced_scoring/advanced-scorer.cc
@@ -0,0 +1,68 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/advanced_scoring/advanced-scorer.h"
+
+#include <memory>
+
+#include "icing/query/advanced_query_parser/lexer.h"
+#include "icing/query/advanced_query_parser/parser.h"
+#include "icing/scoring/advanced_scoring/score-expression.h"
+#include "icing/scoring/advanced_scoring/scoring-visitor.h"
+#include "icing/scoring/bm25f-calculator.h"
+#include "icing/scoring/section-weights.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::StatusOr<std::unique_ptr<AdvancedScorer>>
+AdvancedScorer::Create(const ScoringSpecProto& scoring_spec,
+                       double default_score,
+                       const DocumentStore* document_store,
+                       const SchemaStore* schema_store, int64_t current_time_ms,
+                       const JoinChildrenFetcher* join_children_fetcher) {
+  ICING_RETURN_ERROR_IF_NULL(document_store);
+  ICING_RETURN_ERROR_IF_NULL(schema_store);
+
+  Lexer lexer(scoring_spec.advanced_scoring_expression(),
+              Lexer::Language::SCORING);
+  ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
+                         lexer.ExtractTokens());
+  Parser parser = Parser::Create(std::move(lexer_tokens));
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> tree_root,
+                         parser.ConsumeScoring());
+
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<SectionWeights> section_weights,
+                         SectionWeights::Create(schema_store, scoring_spec));
+  std::unique_ptr<Bm25fCalculator> bm25f_calculator =
+      std::make_unique<Bm25fCalculator>(document_store, section_weights.get(),
+                                        current_time_ms);
+  ScoringVisitor visitor(default_score, document_store, schema_store,
+                         section_weights.get(), bm25f_calculator.get(),
+                         join_children_fetcher, current_time_ms);
+  tree_root->Accept(&visitor);
+
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<ScoreExpression> expression,
+                         std::move(visitor).Expression());
+  if (expression->type() != ScoreExpressionType::kDouble) {
+    return absl_ports::InvalidArgumentError(
+        "The root scoring expression is not of double type.");
+  }
+  return std::unique_ptr<AdvancedScorer>(
+      new AdvancedScorer(std::move(expression), std::move(section_weights),
+                         std::move(bm25f_calculator), default_score));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/scoring/advanced_scoring/advanced-scorer.h b/icing/scoring/advanced_scoring/advanced-scorer.h
new file mode 100644
index 0000000..d69abad
--- /dev/null
+++ b/icing/scoring/advanced_scoring/advanced-scorer.h
@@ -0,0 +1,92 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_ADVANCED_SCORING_ADVANCED_SCORER_H_
+#define ICING_SCORING_ADVANCED_SCORING_ADVANCED_SCORER_H_
+
+#include <memory>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/schema/schema-store.h"
+#include "icing/scoring/advanced_scoring/score-expression.h"
+#include "icing/scoring/bm25f-calculator.h"
+#include "icing/scoring/scorer.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+class AdvancedScorer : public Scorer {
+ public:
+  // Returns:
+  //   A AdvancedScorer instance on success
+  //   FAILED_PRECONDITION on any null pointer input
+  //   INVALID_ARGUMENT if fails to create an instance
+  static libtextclassifier3::StatusOr<std::unique_ptr<AdvancedScorer>> Create(
+      const ScoringSpecProto& scoring_spec, double default_score,
+      const DocumentStore* document_store, const SchemaStore* schema_store,
+      int64_t current_time_ms,
+      const JoinChildrenFetcher* join_children_fetcher = nullptr);
+
+  double GetScore(const DocHitInfo& hit_info,
+                  const DocHitInfoIterator* query_it) override {
+    libtextclassifier3::StatusOr<double> result =
+        score_expression_->eval(hit_info, query_it);
+    if (!result.ok()) {
+      ICING_LOG(ERROR) << "Got an error when scoring a document:\n"
+                       << result.status().error_message();
+      return default_score_;
+    }
+    return std::move(result).ValueOrDie();
+  }
+
+  void PrepareToScore(
+      std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
+          query_term_iterators) override {
+    if (query_term_iterators == nullptr || query_term_iterators->empty()) {
+      return;
+    }
+    bm25f_calculator_->PrepareToScore(query_term_iterators);
+  }
+
+  bool is_constant() const { return score_expression_->is_constant_double(); }
+
+ private:
+  explicit AdvancedScorer(std::unique_ptr<ScoreExpression> score_expression,
+                          std::unique_ptr<SectionWeights> section_weights,
+                          std::unique_ptr<Bm25fCalculator> bm25f_calculator,
+                          double default_score)
+      : score_expression_(std::move(score_expression)),
+        section_weights_(std::move(section_weights)),
+        bm25f_calculator_(std::move(bm25f_calculator)),
+        default_score_(default_score) {
+    if (is_constant()) {
+      ICING_LOG(WARNING)
+          << "The advanced scoring expression will evaluate to a constant.";
+    }
+  }
+
+  std::unique_ptr<ScoreExpression> score_expression_;
+  std::unique_ptr<SectionWeights> section_weights_;
+  std::unique_ptr<Bm25fCalculator> bm25f_calculator_;
+  double default_score_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_ADVANCED_SCORING_ADVANCED_SCORER_H_
diff --git a/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc
new file mode 100644
index 0000000..3612359
--- /dev/null
+++ b/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc
@@ -0,0 +1,70 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <memory>
+#include <string_view>
+
+#include "icing/scoring/advanced_scoring/advanced-scorer.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  FakeClock fake_clock;
+  Filesystem filesystem;
+  const std::string test_dir = GetTestTempDir() + "/icing";
+  const std::string doc_store_dir = test_dir + "/doc_store";
+  const std::string schema_store_dir = test_dir + "/schema_store";
+  filesystem.DeleteDirectoryRecursively(test_dir.c_str());
+  filesystem.CreateDirectoryRecursively(doc_store_dir.c_str());
+  filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
+
+  std::unique_ptr<SchemaStore> schema_store =
+      SchemaStore::Create(&filesystem, schema_store_dir, &fake_clock)
+          .ValueOrDie();
+  std::unique_ptr<DocumentStore> document_store =
+      DocumentStore::Create(
+          &filesystem, doc_store_dir, &fake_clock, schema_store.get(),
+          /*force_recovery_and_revalidate_documents=*/false,
+          /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+          /*use_persistent_hash_map=*/false,
+          PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+          /*initialize_stats=*/nullptr)
+          .ValueOrDie()
+          .document_store;
+
+  std::string_view text(reinterpret_cast<const char*>(data), size);
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+  scoring_spec.set_advanced_scoring_expression(text);
+
+  AdvancedScorer::Create(scoring_spec,
+                         /*default_score=*/10, document_store.get(),
+                         schema_store.get(),
+                         fake_clock.GetSystemTimeMilliseconds());
+
+  // Not able to test the GetScore method of AdvancedScorer, since it will only
+  // be available after AdvancedScorer is successfully created. However, the
+  // text provided by the fuzz test is very random, which means that in most
+  // cases, there will be syntax errors or type errors that cause
+  // AdvancedScorer::Create to fail.
+  return 0;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/scoring/advanced_scoring/advanced-scorer_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_test.cc
new file mode 100644
index 0000000..cc1d413
--- /dev/null
+++ b/icing/scoring/advanced_scoring/advanced-scorer_test.cc
@@ -0,0 +1,1039 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/advanced_scoring/advanced-scorer.h"
+
+#include <cmath>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/scoring/scorer-factory.h"
+#include "icing/scoring/scorer.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+using ::testing::DoubleNear;
+using ::testing::Eq;
+
+class AdvancedScorerTest : public testing::Test {
+ protected:
+  AdvancedScorerTest()
+      : test_dir_(GetTestTempDir() + "/icing"),
+        doc_store_dir_(test_dir_ + "/doc_store"),
+        schema_store_dir_(test_dir_ + "/schema_store") {}
+
+  void SetUp() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    document_store_ = std::move(create_result.document_store);
+
+    // Creates a simple email schema
+    SchemaProto test_email_schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("subject")
+                    .SetDataTypeString(
+                        TermMatchType::PREFIX,
+                        StringIndexingConfig::TokenizerType::PLAIN)
+                    .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(SchemaTypeConfigBuilder()
+                         .SetType("person")
+                         .AddProperty(
+                             PropertyConfigBuilder()
+                                 .SetName("emailAddress")
+                                 .SetDataTypeString(
+                                     TermMatchType::PREFIX,
+                                     StringIndexingConfig::TokenizerType::PLAIN)
+                                 .SetCardinality(CARDINALITY_OPTIONAL))
+                         .AddProperty(
+                             PropertyConfigBuilder()
+                                 .SetName("name")
+                                 .SetDataTypeString(
+                                     TermMatchType::PREFIX,
+                                     StringIndexingConfig::TokenizerType::PLAIN)
+                                 .SetCardinality(CARDINALITY_OPTIONAL))
+
+                         .AddProperty(
+                             PropertyConfigBuilder()
+                                 .SetName("phoneNumber")
+                                 .SetDataTypeString(
+                                     TermMatchType::PREFIX,
+                                     StringIndexingConfig::TokenizerType::PLAIN)
+                                 .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        test_email_schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+  }
+
+  void TearDown() override {
+    document_store_.reset();
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  const std::string test_dir_;
+  const std::string doc_store_dir_;
+  const std::string schema_store_dir_;
+  Filesystem filesystem_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+  FakeClock fake_clock_;
+};
+
+constexpr double kEps = 0.0000000001;
+constexpr int kDefaultScore = 0;
+constexpr int64_t kDefaultCreationTimestampMs = 1571100001111;
+
+DocumentProto CreateDocument(
+    const std::string& name_space, const std::string& uri,
+    int score = kDefaultScore,
+    int64_t creation_timestamp_ms = kDefaultCreationTimestampMs) {
+  return DocumentBuilder()
+      .SetKey(name_space, uri)
+      .SetSchema("email")
+      .SetScore(score)
+      .SetCreationTimestampMs(creation_timestamp_ms)
+      .Build();
+}
+
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+                              int64_t timestamp_ms,
+                              UsageReport::UsageType usage_type) {
+  UsageReport usage_report;
+  usage_report.set_document_namespace(name_space);
+  usage_report.set_document_uri(uri);
+  usage_report.set_usage_timestamp_ms(timestamp_ms);
+  usage_report.set_usage_type(usage_type);
+  return usage_report;
+}
+
+ScoringSpecProto CreateAdvancedScoringSpec(
+    const std::string& advanced_scoring_expression) {
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+  scoring_spec.set_advanced_scoring_expression(advanced_scoring_expression);
+  return scoring_spec;
+}
+
+PropertyWeight CreatePropertyWeight(std::string path, double weight) {
+  PropertyWeight property_weight;
+  property_weight.set_path(std::move(path));
+  property_weight.set_weight(weight);
+  return property_weight;
+}
+
+TypePropertyWeights CreateTypePropertyWeights(
+    std::string schema_type, std::vector<PropertyWeight>&& property_weights) {
+  TypePropertyWeights type_property_weights;
+  type_property_weights.set_schema_type(std::move(schema_type));
+  type_property_weights.mutable_property_weights()->Reserve(
+      property_weights.size());
+
+  for (PropertyWeight& property_weight : property_weights) {
+    *type_property_weights.add_property_weights() = std::move(property_weight);
+  }
+
+  return type_property_weights;
+}
+
+TEST_F(AdvancedScorerTest, InvalidAdvancedScoringSpec) {
+  // Empty scoring expression for advanced scoring
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+  EXPECT_THAT(scorer_factory::Create(scoring_spec, /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // Non-empty scoring expression for normal scoring
+  scoring_spec = ScoringSpecProto::default_instance();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  scoring_spec.set_advanced_scoring_expression("1");
+  EXPECT_THAT(scorer_factory::Create(scoring_spec, /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(AdvancedScorerTest, SimpleExpression) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(CreateDocument("namespace", "uri")));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("123"),
+                             /*default_score=*/10, document_store_.get(),
+                             schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
+
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(123));
+}
+
+TEST_F(AdvancedScorerTest, BasicPureArithmeticExpression) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(CreateDocument("namespace", "uri")));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("1 + 2"),
+                             /*default_score=*/10, document_store_.get(),
+                             schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(3));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("-1 + 2"),
+                                     /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(1));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("1 + -2"),
+                                     /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(-1));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("1 - 2"),
+                                     /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(-1));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("1 * 2"),
+                                     /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(2));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("1 / 2"),
+                                     /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(0.5));
+}
+
+TEST_F(AdvancedScorerTest, BasicMathFunctionExpression) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(CreateDocument("namespace", "uri")));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("log(10, 1000)"),
+                             /*default_score=*/10, document_store_.get(),
+                             schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(3, kEps));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("log(2.718281828459045)"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(1, kEps));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("pow(2, 10)"),
+                                     /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(1024));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("max(10, 11, 12, 13, 14)"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(14));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("min(10, 11, 12, 13, 14)"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("len(10, 11, 12, 13, 14)"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(5));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("sum(10, 11, 12, 13, 14)"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10 + 11 + 12 + 13 + 14));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("avg(10, 11, 12, 13, 14)"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq((10 + 11 + 12 + 13 + 14) / 5.));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("sqrt(2)"),
+                                     /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(sqrt(2), kEps));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("abs(-2) + abs(2)"),
+                             /*default_score=*/10, document_store_.get(),
+                             schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(4));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("sin(3.141592653589793)"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(0, kEps));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("cos(3.141592653589793)"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(-1, kEps));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("tan(3.141592653589793 / 4)"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(1, kEps));
+}
+
+TEST_F(AdvancedScorerTest, DocumentScoreCreationTimestampFunctionExpression) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(CreateDocument(
+          "namespace", "uri", /*score=*/123,
+          /*creation_timestamp_ms=*/kDefaultCreationTimestampMs)));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("this.documentScore()"),
+                             /*default_score=*/10, document_store_.get(),
+                             schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(123));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("this.creationTimestamp()"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(kDefaultCreationTimestampMs));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec(
+              "this.documentScore() + this.creationTimestamp()"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo),
+              Eq(123 + kDefaultCreationTimestampMs));
+}
+
+TEST_F(AdvancedScorerTest, DocumentUsageFunctionExpression) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(CreateDocument("namespace", "uri")));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("this.usageCount(1) + this.usageCount(2) "
+                                    "+ this.usageLastUsedTimestamp(3)"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(0));
+  ICING_ASSERT_OK(document_store_->ReportUsage(
+      CreateUsageReport("namespace", "uri", 100000, UsageReport::USAGE_TYPE1)));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(1));
+  ICING_ASSERT_OK(document_store_->ReportUsage(
+      CreateUsageReport("namespace", "uri", 200000, UsageReport::USAGE_TYPE2)));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(2));
+  ICING_ASSERT_OK(document_store_->ReportUsage(
+      CreateUsageReport("namespace", "uri", 300000, UsageReport::USAGE_TYPE3)));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(300002));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("this.usageLastUsedTimestamp(1)"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(100000));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("this.usageLastUsedTimestamp(2)"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(200000));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("this.usageLastUsedTimestamp(3)"),
+          /*default_score=*/10, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(300000));
+}
+
+TEST_F(AdvancedScorerTest, DocumentUsageFunctionOutOfRange) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(CreateDocument("namespace", "uri")));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+  const double default_score = 123;
+
+  // Should get default score for the following expressions that cause "runtime"
+  // errors.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("this.usageCount(4)"),
+                             default_score, document_store_.get(),
+                             schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(default_score));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(
+                  CreateAdvancedScoringSpec("this.usageCount(0)"),
+                  default_score, document_store_.get(), schema_store_.get(),
+                  fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(default_score));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(
+                  CreateAdvancedScoringSpec("this.usageCount(1.5)"),
+                  default_score, document_store_.get(), schema_store_.get(),
+                  fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(default_score));
+}
+
+// scoring-processor_test.cc will help to get better test coverage for relevance
+// score.
+TEST_F(AdvancedScorerTest, RelevanceScoreFunctionScoreExpression) {
+  DocumentProto test_document =
+      DocumentBuilder()
+          .SetScore(5)
+          .SetKey("namespace", "uri")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(test_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<AdvancedScorer> scorer,
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("this.relevanceScore()"),
+                             /*default_score=*/10, document_store_.get(),
+                             schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
+  scorer->PrepareToScore(/*query_term_iterators=*/{});
+
+  // Should get the default score.
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer->GetScore(docHitInfo, /*query_it=*/nullptr), Eq(10));
+}
+
+TEST_F(AdvancedScorerTest, ChildrenScoresFunctionScoreExpression) {
+  const double default_score = 123;
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id_1,
+      document_store_->Put(CreateDocument("namespace", "uri1")));
+  DocHitInfo docHitInfo1 = DocHitInfo(document_id_1);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id_2,
+      document_store_->Put(CreateDocument("namespace", "uri2")));
+  DocHitInfo docHitInfo2 = DocHitInfo(document_id_2);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id_3,
+      document_store_->Put(CreateDocument("namespace", "uri3")));
+  DocHitInfo docHitInfo3 = DocHitInfo(document_id_3);
+
+  // Create a JoinChildrenFetcher that matches:
+  //   document_id_1 to fake_child1 with score 1 and fake_child2 with score 2.
+  //   document_id_2 to fake_child3 with score 4.
+  //   document_id_3 has no child.
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression("this.qualifiedId()");
+  join_spec.set_child_property_expression("sender");
+  std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
+      map_joinable_qualified_id;
+  ScoredDocumentHit fake_child1(/*document_id=*/10, kSectionIdMaskNone,
+                                /*score=*/1.0);
+  ScoredDocumentHit fake_child2(/*document_id=*/11, kSectionIdMaskNone,
+                                /*score=*/2.0);
+  ScoredDocumentHit fake_child3(/*document_id=*/12, kSectionIdMaskNone,
+                                /*score=*/4.0);
+  map_joinable_qualified_id[document_id_1].push_back(fake_child1);
+  map_joinable_qualified_id[document_id_1].push_back(fake_child2);
+  map_joinable_qualified_id[document_id_2].push_back(fake_child3);
+  JoinChildrenFetcher fetcher(join_spec, std::move(map_joinable_qualified_id));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<AdvancedScorer> scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("len(this.childrenRankingSignals())"),
+          default_score, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds(), &fetcher));
+  // document_id_1 has two children.
+  EXPECT_THAT(scorer->GetScore(docHitInfo1, /*query_it=*/nullptr), Eq(2));
+  // document_id_2 has one child.
+  EXPECT_THAT(scorer->GetScore(docHitInfo2, /*query_it=*/nullptr), Eq(1));
+  // document_id_3 has no child.
+  EXPECT_THAT(scorer->GetScore(docHitInfo3, /*query_it=*/nullptr), Eq(0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("sum(this.childrenRankingSignals())"),
+          default_score, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds(), &fetcher));
+  // document_id_1 has two children with scores 1 and 2.
+  EXPECT_THAT(scorer->GetScore(docHitInfo1, /*query_it=*/nullptr), Eq(3));
+  // document_id_2 has one child with score 4.
+  EXPECT_THAT(scorer->GetScore(docHitInfo2, /*query_it=*/nullptr), Eq(4));
+  // document_id_3 has no child.
+  EXPECT_THAT(scorer->GetScore(docHitInfo3, /*query_it=*/nullptr), Eq(0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("avg(this.childrenRankingSignals())"),
+          default_score, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds(), &fetcher));
+  // document_id_1 has two children with scores 1 and 2.
+  EXPECT_THAT(scorer->GetScore(docHitInfo1, /*query_it=*/nullptr), Eq(3 / 2.));
+  // document_id_2 has one child with score 4.
+  EXPECT_THAT(scorer->GetScore(docHitInfo2, /*query_it=*/nullptr), Eq(4 / 1.));
+  // document_id_3 has no child.
+  // This is an evaluation error, so default_score will be returned.
+  EXPECT_THAT(scorer->GetScore(docHitInfo3, /*query_it=*/nullptr),
+              Eq(default_score));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(
+                  CreateAdvancedScoringSpec(
+                      // Equivalent to "avg(this.childrenRankingSignals())"
+                      "sum(this.childrenRankingSignals()) / "
+                      "len(this.childrenRankingSignals())"),
+                  default_score, document_store_.get(), schema_store_.get(),
+                  fake_clock_.GetSystemTimeMilliseconds(), &fetcher));
+  // document_id_1 has two children with scores 1 and 2.
+  EXPECT_THAT(scorer->GetScore(docHitInfo1, /*query_it=*/nullptr), Eq(3 / 2.));
+  // document_id_2 has one child with score 4.
+  EXPECT_THAT(scorer->GetScore(docHitInfo2, /*query_it=*/nullptr), Eq(4 / 1.));
+  // document_id_3 has no child.
+  // This is an evaluation error, so default_score will be returned.
+  EXPECT_THAT(scorer->GetScore(docHitInfo3, /*query_it=*/nullptr),
+              Eq(default_score));
+}
+
+TEST_F(AdvancedScorerTest, PropertyWeightsFunctionScoreExpression) {
+  DocumentProto test_document_1 =
+      DocumentBuilder().SetKey("namespace", "uri1").SetSchema("email").Build();
+  DocumentProto test_document_2 =
+      DocumentBuilder().SetKey("namespace", "uri2").SetSchema("person").Build();
+  DocumentProto test_document_3 =
+      DocumentBuilder().SetKey("namespace", "uri3").SetSchema("person").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_1,
+                             document_store_->Put(test_document_1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_2,
+                             document_store_->Put(test_document_2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_3,
+                             document_store_->Put(test_document_3));
+
+  ScoringSpecProto spec_proto = CreateAdvancedScoringSpec("");
+
+  *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
+      /*schema_type=*/"email",
+      {CreatePropertyWeight(/*path=*/"subject", /*weight=*/1.0)});
+  *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
+      /*schema_type=*/"person",
+      {CreatePropertyWeight(/*path=*/"emailAddress", /*weight=*/0.5),
+       CreatePropertyWeight(/*path=*/"name", /*weight=*/0.8),
+       CreatePropertyWeight(/*path=*/"phoneNumber", /*weight=*/1.0)});
+
+  // Let the hit for test_document_1 match property "subject".
+  // So this.propertyWeights() for test_document_1 will return [1].
+  DocHitInfo doc_hit_info_1 = DocHitInfo(document_id_1);
+  doc_hit_info_1.UpdateSection(0);
+
+  // Let the hit for test_document_2 match properties "emailAddress" and "name".
+  // So this.propertyWeights() for test_document_2 will return [0.5, 0.8].
+  DocHitInfo doc_hit_info_2 = DocHitInfo(document_id_2);
+  doc_hit_info_2.UpdateSection(0);
+  doc_hit_info_2.UpdateSection(1);
+
+  // Let the hit for test_document_3 match properties "emailAddress", "name" and
+  // "phoneNumber". So this.propertyWeights() for test_document_3 will return
+  // [0.5, 0.8, 1].
+  DocHitInfo doc_hit_info_3 = DocHitInfo(document_id_3);
+  doc_hit_info_3.UpdateSection(0);
+  doc_hit_info_3.UpdateSection(1);
+  doc_hit_info_3.UpdateSection(2);
+
+  spec_proto.set_advanced_scoring_expression("min(this.propertyWeights())");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<AdvancedScorer> scorer,
+      AdvancedScorer::Create(spec_proto,
+                             /*default_score=*/10, document_store_.get(),
+                             schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
+  // min([1]) = 1
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+  // min([0.5, 0.8]) = 0.5
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(0.5));
+  // min([0.5, 0.8, 1.0]) = 0.5
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_3, /*query_it=*/nullptr), Eq(0.5));
+
+  spec_proto.set_advanced_scoring_expression("max(this.propertyWeights())");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(spec_proto,
+                                     /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  // max([1]) = 1
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+  // max([0.5, 0.8]) = 0.8
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(0.8));
+  // max([0.5, 0.8, 1.0]) = 1
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_3, /*query_it=*/nullptr), Eq(1));
+
+  spec_proto.set_advanced_scoring_expression("sum(this.propertyWeights())");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(spec_proto,
+                                     /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  // sum([1]) = 1
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+  // sum([0.5, 0.8]) = 1.3
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(1.3));
+  // sum([0.5, 0.8, 1.0]) = 2.3
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_3, /*query_it=*/nullptr), Eq(2.3));
+}
+
+TEST_F(AdvancedScorerTest,
+       PropertyWeightsFunctionScoreExpressionUnspecifiedWeights) {
+  DocumentProto test_document_1 =
+      DocumentBuilder().SetKey("namespace", "uri1").SetSchema("email").Build();
+  DocumentProto test_document_2 =
+      DocumentBuilder().SetKey("namespace", "uri2").SetSchema("person").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_1,
+                             document_store_->Put(test_document_1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id_2,
+                             document_store_->Put(test_document_2));
+
+  ScoringSpecProto spec_proto = CreateAdvancedScoringSpec("");
+
+  // The entry for type "email" is missing, so every properties in "email"
+  // should get weight 1.0.
+  // The weight of "phoneNumber" in "person" type is unspecified, which should
+  // default to 1/2 = 0.5
+  *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
+      /*schema_type=*/"person",
+      {CreatePropertyWeight(/*path=*/"emailAddress", /*weight=*/1.0),
+       CreatePropertyWeight(/*path=*/"name", /*weight=*/2)});
+
+  // Let the hit for test_document_1 match property "subject".
+  // So this.propertyWeights() for test_document_1 will return [1].
+  DocHitInfo doc_hit_info_1 = DocHitInfo(document_id_1);
+  doc_hit_info_1.UpdateSection(0);
+
+  // Let the hit for test_document_2 match properties "emailAddress", "name" and
+  // "phoneNumber". So this.propertyWeights() for test_document_3 will return
+  // [0.5, 1, 0.5].
+  DocHitInfo doc_hit_info_2 = DocHitInfo(document_id_2);
+  doc_hit_info_2.UpdateSection(0);
+  doc_hit_info_2.UpdateSection(1);
+  doc_hit_info_2.UpdateSection(2);
+
+  spec_proto.set_advanced_scoring_expression("min(this.propertyWeights())");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<AdvancedScorer> scorer,
+      AdvancedScorer::Create(spec_proto,
+                             /*default_score=*/10, document_store_.get(),
+                             schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
+  // min([1]) = 1
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+  // min([0.5, 1, 0.5]) = 0.5
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(0.5));
+
+  spec_proto.set_advanced_scoring_expression("max(this.propertyWeights())");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(spec_proto,
+                                     /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  // max([1]) = 1
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+  // max([0.5, 1, 0.5]) = 1
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(1));
+
+  spec_proto.set_advanced_scoring_expression("sum(this.propertyWeights())");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(spec_proto,
+                                     /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  // sum([1]) = 1
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_1, /*query_it=*/nullptr), Eq(1));
+  // sum([0.5, 1, 0.5]) = 2
+  EXPECT_THAT(scorer->GetScore(doc_hit_info_2, /*query_it=*/nullptr), Eq(2));
+}
+
+TEST_F(AdvancedScorerTest, InvalidChildrenScoresFunctionScoreExpression) {
+  const double default_score = 123;
+
+  // Without join_children_fetcher provided,
+  // "len(this.childrenRankingSignals())" cannot be created.
+  EXPECT_THAT(
+      AdvancedScorer::Create(
+          CreateAdvancedScoringSpec("len(this.childrenRankingSignals())"),
+          default_score, document_store_.get(), schema_store_.get(),
+          fake_clock_.GetSystemTimeMilliseconds(),
+          /*join_children_fetcher=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // The root expression can only be of double type, but here it is of list
+  // type.
+  JoinChildrenFetcher fake_fetcher(JoinSpecProto::default_instance(),
+                                   /*map_joinable_qualified_id=*/{});
+  EXPECT_THAT(AdvancedScorer::Create(
+                  CreateAdvancedScoringSpec("this.childrenRankingSignals()"),
+                  default_score, document_store_.get(), schema_store_.get(),
+                  fake_clock_.GetSystemTimeMilliseconds(), &fake_fetcher),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(AdvancedScorerTest, ComplexExpression) {
+  const int64_t creation_timestamp_ms = 123;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(CreateDocument("namespace", "uri", /*score=*/123,
+                                          creation_timestamp_ms)));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<AdvancedScorer> scorer,
+      AdvancedScorer::Create(CreateAdvancedScoringSpec(
+                                 "pow(sin(2), 2)"
+                                 // This is this.usageCount(1)
+                                 "+ this.usageCount(this.documentScore() - 122)"
+                                 "/ 12.34"
+                                 "* (10 * pow(2 * 1, sin(2))"
+                                 "+ 10 * (2 + 10 + this.creationTimestamp()))"
+                                 // This should evaluate to default score.
+                                 "+ this.relevanceScore()"),
+                             /*default_score=*/10, document_store_.get(),
+                             schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_FALSE(scorer->is_constant());
+  scorer->PrepareToScore(/*query_term_iterators=*/{});
+
+  ICING_ASSERT_OK(document_store_->ReportUsage(
+      CreateUsageReport("namespace", "uri", 0, UsageReport::USAGE_TYPE1)));
+  ICING_ASSERT_OK(document_store_->ReportUsage(
+      CreateUsageReport("namespace", "uri", 0, UsageReport::USAGE_TYPE1)));
+  EXPECT_THAT(scorer->GetScore(docHitInfo, /*query_it=*/nullptr),
+              DoubleNear(pow(sin(2), 2) +
+                             2 / 12.34 *
+                                 (10 * pow(2 * 1, sin(2)) +
+                                  10 * (2 + 10 + creation_timestamp_ms)) +
+                             10,
+                         kEps));
+}
+
+TEST_F(AdvancedScorerTest, ConstantExpression) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<AdvancedScorer> scorer,
+      AdvancedScorer::Create(CreateAdvancedScoringSpec(
+                                 "pow(sin(2), 2)"
+                                 "+ log(2, 122) / 12.34"
+                                 "* (10 * pow(2 * 1, sin(2)) + 10 * (2 + 10))"),
+                             /*default_score=*/10, document_store_.get(),
+                             schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_TRUE(scorer->is_constant());
+}
+
+// Should be a parsing Error
+TEST_F(AdvancedScorerTest, EmptyExpression) {
+  EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec(""),
+                                     /*default_score=*/10,
+                                     document_store_.get(), schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(AdvancedScorerTest, EvaluationErrorShouldReturnDefaultScore) {
+  const double default_score = 123;
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(CreateDocument("namespace", "uri")));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("log(0)"), default_score,
+                             document_store_.get(), schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(default_score, kEps));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer,
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("1 / 0"), default_score,
+                             document_store_.get(), schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(default_score, kEps));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("sqrt(-1)"),
+                                     default_score, document_store_.get(),
+                                     schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(default_score, kEps));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      scorer, AdvancedScorer::Create(CreateAdvancedScoringSpec("pow(-1, 0.5)"),
+                                     default_score, document_store_.get(),
+                                     schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), DoubleNear(default_score, kEps));
+}
+
+// The following tests should trigger a type error while the visitor tries to
+// build a ScoreExpression object.
+TEST_F(AdvancedScorerTest, MathTypeError) {
+  const double default_score = 0;
+
+  EXPECT_THAT(
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("test"), default_score,
+                             document_store_.get(), schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("log()"), default_score,
+                             document_store_.get(), schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("log(1, 2, 3)"),
+                                     default_score, document_store_.get(),
+                                     schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("log(1, this)"),
+                                     default_score, document_store_.get(),
+                                     schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("pow(1)"), default_score,
+                             document_store_.get(), schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("sqrt(1, 2)"),
+                                     default_score, document_store_.get(),
+                                     schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("abs(1, 2)"),
+                                     default_score, document_store_.get(),
+                                     schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("sin(1, 2)"),
+                                     default_score, document_store_.get(),
+                                     schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("cos(1, 2)"),
+                                     default_score, document_store_.get(),
+                                     schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("tan(1, 2)"),
+                                     default_score, document_store_.get(),
+                                     schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("this"), default_score,
+                             document_store_.get(), schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(
+      AdvancedScorer::Create(CreateAdvancedScoringSpec("-this"), default_score,
+                             document_store_.get(), schema_store_.get(),
+                             fake_clock_.GetSystemTimeMilliseconds()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("1 + this"),
+                                     default_score, document_store_.get(),
+                                     schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(AdvancedScorerTest, DocumentFunctionTypeError) {
+  const double default_score = 0;
+
+  EXPECT_THAT(AdvancedScorer::Create(
+                  CreateAdvancedScoringSpec("documentScore(1)"), default_score,
+                  document_store_.get(), schema_store_.get(),
+                  fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(AdvancedScorer::Create(
+                  CreateAdvancedScoringSpec("this.creationTimestamp(1)"),
+                  default_score, document_store_.get(), schema_store_.get(),
+                  fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(AdvancedScorer::Create(
+                  CreateAdvancedScoringSpec("this.usageCount()"), default_score,
+                  document_store_.get(), schema_store_.get(),
+                  fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(AdvancedScorer::Create(
+                  CreateAdvancedScoringSpec("usageLastUsedTimestamp(1, 1)"),
+                  default_score, document_store_.get(), schema_store_.get(),
+                  fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(AdvancedScorer::Create(
+                  CreateAdvancedScoringSpec("relevanceScore(1)"), default_score,
+                  document_store_.get(), schema_store_.get(),
+                  fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(AdvancedScorer::Create(
+                  CreateAdvancedScoringSpec("documentScore(this)"),
+                  default_score, document_store_.get(), schema_store_.get(),
+                  fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(AdvancedScorer::Create(
+                  CreateAdvancedScoringSpec("that.documentScore()"),
+                  default_score, document_store_.get(), schema_store_.get(),
+                  fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(AdvancedScorer::Create(
+                  CreateAdvancedScoringSpec("this.this.creationTimestamp()"),
+                  default_score, document_store_.get(), schema_store_.get(),
+                  fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(AdvancedScorer::Create(CreateAdvancedScoringSpec("this.log(2)"),
+                                     default_score, document_store_.get(),
+                                     schema_store_.get(),
+                                     fake_clock_.GetSystemTimeMilliseconds()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/scoring/advanced_scoring/score-expression.cc b/icing/scoring/advanced_scoring/score-expression.cc
new file mode 100644
index 0000000..e8a2a89
--- /dev/null
+++ b/icing/scoring/advanced_scoring/score-expression.cc
@@ -0,0 +1,521 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/advanced_scoring/score-expression.h"
+
+#include <numeric>
+#include <vector>
+
+#include "icing/absl_ports/canonical_errors.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+libtextclassifier3::Status CheckChildrenNotNull(
+    const std::vector<std::unique_ptr<ScoreExpression>>& children) {
+  for (const auto& child : children) {
+    ICING_RETURN_ERROR_IF_NULL(child);
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace
+
+libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>>
+OperatorScoreExpression::Create(
+    OperatorType op, std::vector<std::unique_ptr<ScoreExpression>> children) {
+  if (children.empty()) {
+    return absl_ports::InvalidArgumentError(
+        "OperatorScoreExpression must have at least one argument.");
+  }
+  ICING_RETURN_IF_ERROR(CheckChildrenNotNull(children));
+
+  bool children_all_constant_double = true;
+  for (const auto& child : children) {
+    if (child->type() != ScoreExpressionType::kDouble) {
+      return absl_ports::InvalidArgumentError(
+          "Operators are only supported for double type.");
+    }
+    if (!child->is_constant_double()) {
+      children_all_constant_double = false;
+    }
+  }
+  if (op == OperatorType::kNegative) {
+    if (children.size() != 1) {
+      return absl_ports::InvalidArgumentError(
+          "Negative operator must have only 1 argument.");
+    }
+  }
+  std::unique_ptr<ScoreExpression> expression =
+      std::unique_ptr<OperatorScoreExpression>(
+          new OperatorScoreExpression(op, std::move(children)));
+  if (children_all_constant_double) {
+    // Because all of the children are constants, this expression does not
+    // depend on the DocHitInto or query_it that are passed into it.
+    return ConstantScoreExpression::Create(
+        expression->eval(DocHitInfo(), /*query_it=*/nullptr));
+  }
+  return expression;
+}
+
+libtextclassifier3::StatusOr<double> OperatorScoreExpression::eval(
+    const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
+  // The Create factory guarantees that an operator will have at least one
+  // child.
+  ICING_ASSIGN_OR_RETURN(double res, children_.at(0)->eval(hit_info, query_it));
+
+  if (op_ == OperatorType::kNegative) {
+    return -res;
+  }
+
+  for (int i = 1; i < children_.size(); ++i) {
+    ICING_ASSIGN_OR_RETURN(double v, children_.at(i)->eval(hit_info, query_it));
+    switch (op_) {
+      case OperatorType::kPlus:
+        res += v;
+        break;
+      case OperatorType::kMinus:
+        res -= v;
+        break;
+      case OperatorType::kTimes:
+        res *= v;
+        break;
+      case OperatorType::kDiv:
+        res /= v;
+        break;
+      case OperatorType::kNegative:
+        return absl_ports::InternalError("Should never reach here.");
+    }
+    if (!std::isfinite(res)) {
+      return absl_ports::InvalidArgumentError(
+          "Got a non-finite value while evaluating operator score expression.");
+    }
+  }
+  return res;
+}
+
+const std::unordered_map<std::string, MathFunctionScoreExpression::FunctionType>
+    MathFunctionScoreExpression::kFunctionNames = {
+        {"log", FunctionType::kLog}, {"pow", FunctionType::kPow},
+        {"max", FunctionType::kMax}, {"min", FunctionType::kMin},
+        {"len", FunctionType::kLen}, {"sum", FunctionType::kSum},
+        {"avg", FunctionType::kAvg}, {"sqrt", FunctionType::kSqrt},
+        {"abs", FunctionType::kAbs}, {"sin", FunctionType::kSin},
+        {"cos", FunctionType::kCos}, {"tan", FunctionType::kTan}};
+
+const std::unordered_set<MathFunctionScoreExpression::FunctionType>
+    MathFunctionScoreExpression::kVariableArgumentsFunctions = {
+        FunctionType::kMax, FunctionType::kMin, FunctionType::kLen,
+        FunctionType::kSum, FunctionType::kAvg};
+
+libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>>
+MathFunctionScoreExpression::Create(
+    FunctionType function_type,
+    std::vector<std::unique_ptr<ScoreExpression>> args) {
+  if (args.empty()) {
+    return absl_ports::InvalidArgumentError(
+        "Math functions must have at least one argument.");
+  }
+  ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
+
+  // Received a list type in the function argument.
+  if (args.size() == 1 && args[0]->type() == ScoreExpressionType::kDoubleList) {
+    // Only certain functions support list type.
+    if (kVariableArgumentsFunctions.count(function_type) > 0) {
+      return std::unique_ptr<MathFunctionScoreExpression>(
+          new MathFunctionScoreExpression(function_type, std::move(args)));
+    }
+    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+        "Received an unsupported list type argument in the math function."));
+  }
+
+  bool args_all_constant_double = true;
+  for (const auto& child : args) {
+    if (child->type() != ScoreExpressionType::kDouble) {
+      return absl_ports::InvalidArgumentError(
+          "Got an invalid type for the math function. Should expect a double "
+          "type argument.");
+    }
+    if (!child->is_constant_double()) {
+      args_all_constant_double = false;
+    }
+  }
+  switch (function_type) {
+    case FunctionType::kLog:
+      if (args.size() != 1 && args.size() != 2) {
+        return absl_ports::InvalidArgumentError(
+            "log must have 1 or 2 arguments.");
+      }
+      break;
+    case FunctionType::kPow:
+      if (args.size() != 2) {
+        return absl_ports::InvalidArgumentError("pow must have 2 arguments.");
+      }
+      break;
+    case FunctionType::kSqrt:
+      if (args.size() != 1) {
+        return absl_ports::InvalidArgumentError("sqrt must have 1 argument.");
+      }
+      break;
+    case FunctionType::kAbs:
+      if (args.size() != 1) {
+        return absl_ports::InvalidArgumentError("abs must have 1 argument.");
+      }
+      break;
+    case FunctionType::kSin:
+      if (args.size() != 1) {
+        return absl_ports::InvalidArgumentError("sin must have 1 argument.");
+      }
+      break;
+    case FunctionType::kCos:
+      if (args.size() != 1) {
+        return absl_ports::InvalidArgumentError("cos must have 1 argument.");
+      }
+      break;
+    case FunctionType::kTan:
+      if (args.size() != 1) {
+        return absl_ports::InvalidArgumentError("tan must have 1 argument.");
+      }
+      break;
+    // Functions that support variable length arguments
+    case FunctionType::kMax:
+      [[fallthrough]];
+    case FunctionType::kMin:
+      [[fallthrough]];
+    case FunctionType::kLen:
+      [[fallthrough]];
+    case FunctionType::kSum:
+      [[fallthrough]];
+    case FunctionType::kAvg:
+      break;
+  }
+  std::unique_ptr<ScoreExpression> expression =
+      std::unique_ptr<MathFunctionScoreExpression>(
+          new MathFunctionScoreExpression(function_type, std::move(args)));
+  if (args_all_constant_double) {
+    // Because all of the arguments are constants, this expression does not
+    // depend on the DocHitInto or query_it that are passed into it.
+    return ConstantScoreExpression::Create(
+        expression->eval(DocHitInfo(), /*query_it=*/nullptr));
+  }
+  return expression;
+}
+
+libtextclassifier3::StatusOr<double> MathFunctionScoreExpression::eval(
+    const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
+  std::vector<double> values;
+  if (args_.at(0)->type() == ScoreExpressionType::kDoubleList) {
+    ICING_ASSIGN_OR_RETURN(values, args_.at(0)->eval_list(hit_info, query_it));
+  } else {
+    for (const auto& child : args_) {
+      ICING_ASSIGN_OR_RETURN(double v, child->eval(hit_info, query_it));
+      values.push_back(v);
+    }
+  }
+
+  double res = 0;
+  switch (function_type_) {
+    case FunctionType::kLog:
+      if (values.size() == 1) {
+        res = log(values[0]);
+      } else {
+        // argument 0 is log base
+        // argument 1 is the value
+        res = log(values[1]) / log(values[0]);
+      }
+      break;
+    case FunctionType::kPow:
+      res = pow(values[0], values[1]);
+      break;
+    case FunctionType::kMax:
+      if (values.empty()) {
+        return absl_ports::InvalidArgumentError(
+            "Got an empty parameter set in max function");
+      }
+      res = *std::max_element(values.begin(), values.end());
+      break;
+    case FunctionType::kMin:
+      if (values.empty()) {
+        return absl_ports::InvalidArgumentError(
+            "Got an empty parameter set in min function");
+      }
+      res = *std::min_element(values.begin(), values.end());
+      break;
+    case FunctionType::kLen:
+      res = values.size();
+      break;
+    case FunctionType::kSum:
+      res = std::reduce(values.begin(), values.end());
+      break;
+    case FunctionType::kAvg:
+      if (values.empty()) {
+        return absl_ports::InvalidArgumentError(
+            "Got an empty parameter set in avg function.");
+      }
+      res = std::reduce(values.begin(), values.end()) / values.size();
+      break;
+    case FunctionType::kSqrt:
+      res = sqrt(values[0]);
+      break;
+    case FunctionType::kAbs:
+      res = abs(values[0]);
+      break;
+    case FunctionType::kSin:
+      res = sin(values[0]);
+      break;
+    case FunctionType::kCos:
+      res = cos(values[0]);
+      break;
+    case FunctionType::kTan:
+      res = tan(values[0]);
+      break;
+  }
+  if (!std::isfinite(res)) {
+    return absl_ports::InvalidArgumentError(
+        "Got a non-finite value while evaluating math function score "
+        "expression.");
+  }
+  return res;
+}
+
+const std::unordered_map<std::string,
+                         DocumentFunctionScoreExpression::FunctionType>
+    DocumentFunctionScoreExpression::kFunctionNames = {
+        {"documentScore", FunctionType::kDocumentScore},
+        {"creationTimestamp", FunctionType::kCreationTimestamp},
+        {"usageCount", FunctionType::kUsageCount},
+        {"usageLastUsedTimestamp", FunctionType::kUsageLastUsedTimestamp}};
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocumentFunctionScoreExpression>>
+DocumentFunctionScoreExpression::Create(
+    FunctionType function_type,
+    std::vector<std::unique_ptr<ScoreExpression>> args,
+    const DocumentStore* document_store, double default_score,
+    int64_t current_time_ms) {
+  if (args.empty()) {
+    return absl_ports::InvalidArgumentError(
+        "Document-based functions must have at least one argument.");
+  }
+  ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
+
+  if (args[0]->type() != ScoreExpressionType::kDocument) {
+    return absl_ports::InvalidArgumentError(
+        "The first parameter of document-based functions must be \"this\".");
+  }
+  switch (function_type) {
+    case FunctionType::kDocumentScore:
+      [[fallthrough]];
+    case FunctionType::kCreationTimestamp:
+      if (args.size() != 1) {
+        return absl_ports::InvalidArgumentError(
+            "DocumentScore/CreationTimestamp must have 1 argument.");
+      }
+      break;
+    case FunctionType::kUsageCount:
+      [[fallthrough]];
+    case FunctionType::kUsageLastUsedTimestamp:
+      if (args.size() != 2 || args[1]->type() != ScoreExpressionType::kDouble) {
+        return absl_ports::InvalidArgumentError(
+            "UsageCount/UsageLastUsedTimestamp must have 2 arguments. The "
+            "first argument should be \"this\", and the second argument "
+            "should be the usage type.");
+      }
+      break;
+  }
+  return std::unique_ptr<DocumentFunctionScoreExpression>(
+      new DocumentFunctionScoreExpression(function_type, std::move(args),
+                                          document_store, default_score,
+                                          current_time_ms));
+}
+
+libtextclassifier3::StatusOr<double> DocumentFunctionScoreExpression::eval(
+    const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
+  switch (function_type_) {
+    case FunctionType::kDocumentScore:
+      [[fallthrough]];
+    case FunctionType::kCreationTimestamp: {
+      ICING_ASSIGN_OR_RETURN(DocumentAssociatedScoreData score_data,
+                             document_store_.GetDocumentAssociatedScoreData(
+                                 hit_info.document_id()),
+                             default_score_);
+      if (function_type_ == FunctionType::kDocumentScore) {
+        return static_cast<double>(score_data.document_score());
+      }
+      return static_cast<double>(score_data.creation_timestamp_ms());
+    }
+    case FunctionType::kUsageCount:
+      [[fallthrough]];
+    case FunctionType::kUsageLastUsedTimestamp: {
+      ICING_ASSIGN_OR_RETURN(double raw_usage_type,
+                             args_[1]->eval(hit_info, query_it));
+      int usage_type = (int)raw_usage_type;
+      if (usage_type < 1 || usage_type > 3 || raw_usage_type != usage_type) {
+        return absl_ports::InvalidArgumentError(
+            "Usage type must be an integer from 1 to 3");
+      }
+      std::optional<UsageStore::UsageScores> usage_scores =
+          document_store_.GetUsageScores(hit_info.document_id(),
+                                         current_time_ms_);
+      if (!usage_scores) {
+        // If there's no UsageScores entry present for this doc, then just
+        // treat it as a default instance.
+        usage_scores = UsageStore::UsageScores();
+      }
+      if (function_type_ == FunctionType::kUsageCount) {
+        if (usage_type == 1) {
+          return usage_scores->usage_type1_count;
+        } else if (usage_type == 2) {
+          return usage_scores->usage_type2_count;
+        } else {
+          return usage_scores->usage_type3_count;
+        }
+      }
+      if (usage_type == 1) {
+        return usage_scores->usage_type1_last_used_timestamp_s * 1000.0;
+      } else if (usage_type == 2) {
+        return usage_scores->usage_type2_last_used_timestamp_s * 1000.0;
+      } else {
+        return usage_scores->usage_type3_last_used_timestamp_s * 1000.0;
+      }
+    }
+  }
+}
+
+libtextclassifier3::StatusOr<
+    std::unique_ptr<RelevanceScoreFunctionScoreExpression>>
+RelevanceScoreFunctionScoreExpression::Create(
+    std::vector<std::unique_ptr<ScoreExpression>> args,
+    Bm25fCalculator* bm25f_calculator, double default_score) {
+  if (args.size() != 1) {
+    return absl_ports::InvalidArgumentError(
+        "relevanceScore must have 1 argument.");
+  }
+  ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
+
+  if (args[0]->type() != ScoreExpressionType::kDocument) {
+    return absl_ports::InvalidArgumentError(
+        "relevanceScore must take \"this\" as its argument.");
+  }
+  return std::unique_ptr<RelevanceScoreFunctionScoreExpression>(
+      new RelevanceScoreFunctionScoreExpression(bm25f_calculator,
+                                                default_score));
+}
+
+libtextclassifier3::StatusOr<double>
+RelevanceScoreFunctionScoreExpression::eval(
+    const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
+  if (query_it == nullptr) {
+    return default_score_;
+  }
+  return static_cast<double>(
+      bm25f_calculator_.ComputeScore(query_it, hit_info, default_score_));
+}
+
+libtextclassifier3::StatusOr<
+    std::unique_ptr<ChildrenRankingSignalsFunctionScoreExpression>>
+ChildrenRankingSignalsFunctionScoreExpression::Create(
+    std::vector<std::unique_ptr<ScoreExpression>> args,
+    const JoinChildrenFetcher* join_children_fetcher) {
+  if (args.size() != 1) {
+    return absl_ports::InvalidArgumentError(
+        "childrenRankingSignals must have 1 argument.");
+  }
+  ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
+
+  if (args[0]->type() != ScoreExpressionType::kDocument) {
+    return absl_ports::InvalidArgumentError(
+        "childrenRankingSignals must take \"this\" as its argument.");
+  }
+  if (join_children_fetcher == nullptr) {
+    return absl_ports::InvalidArgumentError(
+        "childrenRankingSignals must only be used with join, but "
+        "JoinChildrenFetcher "
+        "is not provided.");
+  }
+  return std::unique_ptr<ChildrenRankingSignalsFunctionScoreExpression>(
+      new ChildrenRankingSignalsFunctionScoreExpression(
+          *join_children_fetcher));
+}
+
+libtextclassifier3::StatusOr<std::vector<double>>
+ChildrenRankingSignalsFunctionScoreExpression::eval_list(
+    const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<ScoredDocumentHit> children_hits,
+      join_children_fetcher_.GetChildren(hit_info.document_id()));
+  std::vector<double> children_scores;
+  children_scores.reserve(children_hits.size());
+  for (const ScoredDocumentHit& child_hit : children_hits) {
+    children_scores.push_back(child_hit.score());
+  }
+  return std::move(children_scores);
+}
+
+libtextclassifier3::StatusOr<
+    std::unique_ptr<PropertyWeightsFunctionScoreExpression>>
+PropertyWeightsFunctionScoreExpression::Create(
+    std::vector<std::unique_ptr<ScoreExpression>> args,
+    const DocumentStore* document_store, const SectionWeights* section_weights,
+    int64_t current_time_ms) {
+  if (args.size() != 1) {
+    return absl_ports::InvalidArgumentError(
+        "propertyWeights must have 1 argument.");
+  }
+  ICING_RETURN_IF_ERROR(CheckChildrenNotNull(args));
+
+  if (args[0]->type() != ScoreExpressionType::kDocument) {
+    return absl_ports::InvalidArgumentError(
+        "propertyWeights must take \"this\" as its argument.");
+  }
+  return std::unique_ptr<PropertyWeightsFunctionScoreExpression>(
+      new PropertyWeightsFunctionScoreExpression(
+          document_store, section_weights, current_time_ms));
+}
+
+libtextclassifier3::StatusOr<std::vector<double>>
+PropertyWeightsFunctionScoreExpression::eval_list(
+    const DocHitInfo& hit_info, const DocHitInfoIterator*) const {
+  std::vector<double> weights;
+  SectionIdMask sections = hit_info.hit_section_ids_mask();
+  SchemaTypeId schema_type_id = GetSchemaTypeId(hit_info.document_id());
+
+  while (sections != 0) {
+    SectionId section_id = __builtin_ctzll(sections);
+    sections &= ~(UINT64_C(1) << section_id);
+    weights.push_back(section_weights_.GetNormalizedSectionWeight(
+        schema_type_id, section_id));
+  }
+  return weights;
+}
+
+SchemaTypeId PropertyWeightsFunctionScoreExpression::GetSchemaTypeId(
+    DocumentId document_id) const {
+  auto filter_data_optional =
+      document_store_.GetAliveDocumentFilterData(document_id, current_time_ms_);
+  if (!filter_data_optional) {
+    // This should never happen. The only failure case for
+    // GetAliveDocumentFilterData is if the document_id is outside of the range
+    // of allocated document_ids, which shouldn't be possible since we're
+    // getting this document_id from the posting lists.
+    ICING_LOG(WARNING) << "No document filter data for document ["
+                       << document_id << "]";
+    return kInvalidSchemaTypeId;
+  }
+  return filter_data_optional.value().schema_type_id();
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/scoring/advanced_scoring/score-expression.h b/icing/scoring/advanced_scoring/score-expression.h
new file mode 100644
index 0000000..08d7997
--- /dev/null
+++ b/icing/scoring/advanced_scoring/score-expression.h
@@ -0,0 +1,348 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_ADVANCED_SCORING_SCORE_EXPRESSION_H_
+#define ICING_SCORING_ADVANCED_SCORING_SCORE_EXPRESSION_H_
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/scoring/bm25f-calculator.h"
+#include "icing/store/document-store.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+enum class ScoreExpressionType {
+  kDouble,
+  kDoubleList,
+  kDocument  // Only "this" is considered as document type.
+};
+
+class ScoreExpression {
+ public:
+  virtual ~ScoreExpression() = default;
+
+  // Evaluate the score expression to double with the current document.
+  //
+  // RETURNS:
+  //   - The evaluated result as a double on success.
+  //   - INVALID_ARGUMENT if a non-finite value is reached while evaluating the
+  //                      expression.
+  //   - INTERNAL if there are inconsistencies.
+  virtual libtextclassifier3::StatusOr<double> eval(
+      const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
+    if (type() == ScoreExpressionType::kDouble) {
+      return absl_ports::UnimplementedError(
+          "All ScoreExpressions of type Double must provide their own "
+          "implementation of eval!");
+    }
+    return absl_ports::InternalError(
+        "Runtime type error: the expression should never be evaluated to a "
+        "double. There must be inconsistencies in the static type checking.");
+  }
+
+  virtual libtextclassifier3::StatusOr<std::vector<double>> eval_list(
+      const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
+    if (type() == ScoreExpressionType::kDoubleList) {
+      return absl_ports::UnimplementedError(
+          "All ScoreExpressions of type Double List must provide their own "
+          "implementation of eval_list!");
+    }
+    return absl_ports::InternalError(
+        "Runtime type error: the expression should never be evaluated to a "
+        "double list. There must be inconsistencies in the static type "
+        "checking.");
+  }
+
+  // Indicate the type to which the current expression will be evaluated.
+  virtual ScoreExpressionType type() const = 0;
+
+  // Indicate whether the current expression is a constant double.
+  // Returns true if and only if the object is of ConstantScoreExpression type.
+  virtual bool is_constant_double() const { return false; }
+};
+
+class ThisExpression : public ScoreExpression {
+ public:
+  static std::unique_ptr<ThisExpression> Create() {
+    return std::unique_ptr<ThisExpression>(new ThisExpression());
+  }
+
+  ScoreExpressionType type() const override {
+    return ScoreExpressionType::kDocument;
+  }
+
+ private:
+  ThisExpression() = default;
+};
+
+class ConstantScoreExpression : public ScoreExpression {
+ public:
+  static std::unique_ptr<ConstantScoreExpression> Create(
+      libtextclassifier3::StatusOr<double> c) {
+    return std::unique_ptr<ConstantScoreExpression>(
+        new ConstantScoreExpression(c));
+  }
+
+  libtextclassifier3::StatusOr<double> eval(
+      const DocHitInfo&, const DocHitInfoIterator*) const override {
+    return c_;
+  }
+
+  ScoreExpressionType type() const override {
+    return ScoreExpressionType::kDouble;
+  }
+
+  bool is_constant_double() const override { return true; }
+
+ private:
+  explicit ConstantScoreExpression(libtextclassifier3::StatusOr<double> c)
+      : c_(c) {}
+
+  libtextclassifier3::StatusOr<double> c_;
+};
+
+class OperatorScoreExpression : public ScoreExpression {
+ public:
+  enum class OperatorType { kPlus, kMinus, kNegative, kTimes, kDiv };
+
+  // RETURNS:
+  //   - An OperatorScoreExpression instance on success if not simplifiable.
+  //   - A ConstantScoreExpression instance on success if simplifiable.
+  //   - FAILED_PRECONDITION on any null pointer in children.
+  //   - INVALID_ARGUMENT on type errors.
+  static libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>> Create(
+      OperatorType op, std::vector<std::unique_ptr<ScoreExpression>> children);
+
+  libtextclassifier3::StatusOr<double> eval(
+      const DocHitInfo& hit_info,
+      const DocHitInfoIterator* query_it) const override;
+
+  ScoreExpressionType type() const override {
+    return ScoreExpressionType::kDouble;
+  }
+
+ private:
+  explicit OperatorScoreExpression(
+      OperatorType op, std::vector<std::unique_ptr<ScoreExpression>> children)
+      : op_(op), children_(std::move(children)) {}
+
+  OperatorType op_;
+  std::vector<std::unique_ptr<ScoreExpression>> children_;
+};
+
+class MathFunctionScoreExpression : public ScoreExpression {
+ public:
+  enum class FunctionType {
+    kLog,
+    kPow,
+    kMax,
+    kMin,
+    kLen,
+    kSum,
+    kAvg,
+    kSqrt,
+    kAbs,
+    kSin,
+    kCos,
+    kTan
+  };
+
+  static const std::unordered_map<std::string, FunctionType> kFunctionNames;
+
+  static const std::unordered_set<FunctionType> kVariableArgumentsFunctions;
+
+  // RETURNS:
+  //   - A MathFunctionScoreExpression instance on success if not simplifiable.
+  //   - A ConstantScoreExpression instance on success if simplifiable.
+  //   - FAILED_PRECONDITION on any null pointer in args.
+  //   - INVALID_ARGUMENT on type errors.
+  static libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>> Create(
+      FunctionType function_type,
+      std::vector<std::unique_ptr<ScoreExpression>> args);
+
+  libtextclassifier3::StatusOr<double> eval(
+      const DocHitInfo& hit_info,
+      const DocHitInfoIterator* query_it) const override;
+
+  ScoreExpressionType type() const override {
+    return ScoreExpressionType::kDouble;
+  }
+
+ private:
+  explicit MathFunctionScoreExpression(
+      FunctionType function_type,
+      std::vector<std::unique_ptr<ScoreExpression>> args)
+      : function_type_(function_type), args_(std::move(args)) {}
+
+  FunctionType function_type_;
+  std::vector<std::unique_ptr<ScoreExpression>> args_;
+};
+
+class DocumentFunctionScoreExpression : public ScoreExpression {
+ public:
+  enum class FunctionType {
+    kDocumentScore,
+    kCreationTimestamp,
+    kUsageCount,
+    kUsageLastUsedTimestamp,
+  };
+
+  static const std::unordered_map<std::string, FunctionType> kFunctionNames;
+
+  // RETURNS:
+  //   - A DocumentFunctionScoreExpression instance on success.
+  //   - FAILED_PRECONDITION on any null pointer in args.
+  //   - INVALID_ARGUMENT on type errors.
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<DocumentFunctionScoreExpression>>
+  Create(FunctionType function_type,
+         std::vector<std::unique_ptr<ScoreExpression>> args,
+         const DocumentStore* document_store, double default_score,
+         int64_t current_time_ms);
+
+  libtextclassifier3::StatusOr<double> eval(
+      const DocHitInfo& hit_info,
+      const DocHitInfoIterator* query_it) const override;
+
+  ScoreExpressionType type() const override {
+    return ScoreExpressionType::kDouble;
+  }
+
+ private:
+  explicit DocumentFunctionScoreExpression(
+      FunctionType function_type,
+      std::vector<std::unique_ptr<ScoreExpression>> args,
+      const DocumentStore* document_store, double default_score,
+      int64_t current_time_ms)
+      : args_(std::move(args)),
+        document_store_(*document_store),
+        default_score_(default_score),
+        function_type_(function_type),
+        current_time_ms_(current_time_ms) {}
+
+  std::vector<std::unique_ptr<ScoreExpression>> args_;
+  const DocumentStore& document_store_;
+  double default_score_;
+  FunctionType function_type_;
+  int64_t current_time_ms_;
+};
+
+class RelevanceScoreFunctionScoreExpression : public ScoreExpression {
+ public:
+  static constexpr std::string_view kFunctionName = "relevanceScore";
+
+  // RETURNS:
+  //   - A RelevanceScoreFunctionScoreExpression instance on success.
+  //   - FAILED_PRECONDITION on any null pointer in args.
+  //   - INVALID_ARGUMENT on type errors.
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<RelevanceScoreFunctionScoreExpression>>
+  Create(std::vector<std::unique_ptr<ScoreExpression>> args,
+         Bm25fCalculator* bm25f_calculator, double default_score);
+
+  libtextclassifier3::StatusOr<double> eval(
+      const DocHitInfo& hit_info,
+      const DocHitInfoIterator* query_it) const override;
+
+  ScoreExpressionType type() const override {
+    return ScoreExpressionType::kDouble;
+  }
+
+ private:
+  explicit RelevanceScoreFunctionScoreExpression(
+      Bm25fCalculator* bm25f_calculator, double default_score)
+      : bm25f_calculator_(*bm25f_calculator), default_score_(default_score) {}
+
+  Bm25fCalculator& bm25f_calculator_;
+  double default_score_;
+};
+
+class ChildrenRankingSignalsFunctionScoreExpression : public ScoreExpression {
+ public:
+  static constexpr std::string_view kFunctionName = "childrenRankingSignals";
+
+  // RETURNS:
+  //   - A ChildrenRankingSignalsFunctionScoreExpression instance on success.
+  //   - FAILED_PRECONDITION on any null pointer in children.
+  //   - INVALID_ARGUMENT on type errors.
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<ChildrenRankingSignalsFunctionScoreExpression>>
+  Create(std::vector<std::unique_ptr<ScoreExpression>> args,
+         const JoinChildrenFetcher* join_children_fetcher);
+
+  libtextclassifier3::StatusOr<std::vector<double>> eval_list(
+      const DocHitInfo& hit_info,
+      const DocHitInfoIterator* query_it) const override;
+
+  ScoreExpressionType type() const override {
+    return ScoreExpressionType::kDoubleList;
+  }
+
+ private:
+  explicit ChildrenRankingSignalsFunctionScoreExpression(
+      const JoinChildrenFetcher& join_children_fetcher)
+      : join_children_fetcher_(join_children_fetcher) {}
+  const JoinChildrenFetcher& join_children_fetcher_;
+};
+
+class PropertyWeightsFunctionScoreExpression : public ScoreExpression {
+ public:
+  static constexpr std::string_view kFunctionName = "propertyWeights";
+
+  // RETURNS:
+  //   - A PropertyWeightsFunctionScoreExpression instance on success.
+  //   - FAILED_PRECONDITION on any null pointer in children.
+  //   - INVALID_ARGUMENT on type errors.
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<PropertyWeightsFunctionScoreExpression>>
+  Create(std::vector<std::unique_ptr<ScoreExpression>> args,
+         const DocumentStore* document_store,
+         const SectionWeights* section_weights, int64_t current_time_ms);
+
+  libtextclassifier3::StatusOr<std::vector<double>> eval_list(
+      const DocHitInfo& hit_info, const DocHitInfoIterator*) const override;
+
+  ScoreExpressionType type() const override {
+    return ScoreExpressionType::kDoubleList;
+  }
+
+  SchemaTypeId GetSchemaTypeId(DocumentId document_id) const;
+
+ private:
+  explicit PropertyWeightsFunctionScoreExpression(
+      const DocumentStore* document_store,
+      const SectionWeights* section_weights, int64_t current_time_ms)
+      : document_store_(*document_store),
+        section_weights_(*section_weights),
+        current_time_ms_(current_time_ms) {}
+  const DocumentStore& document_store_;
+  const SectionWeights& section_weights_;
+  int64_t current_time_ms_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_ADVANCED_SCORING_SCORE_EXPRESSION_H_
diff --git a/icing/scoring/advanced_scoring/score-expression_test.cc b/icing/scoring/advanced_scoring/score-expression_test.cc
new file mode 100644
index 0000000..588090d
--- /dev/null
+++ b/icing/scoring/advanced_scoring/score-expression_test.cc
@@ -0,0 +1,353 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/advanced_scoring/score-expression.h"
+
+#include <cmath>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+
+class NonConstantScoreExpression : public ScoreExpression {
+ public:
+  static std::unique_ptr<NonConstantScoreExpression> Create() {
+    return std::make_unique<NonConstantScoreExpression>();
+  }
+
+  libtextclassifier3::StatusOr<double> eval(
+      const DocHitInfo &, const DocHitInfoIterator *) const override {
+    return 0;
+  }
+
+  ScoreExpressionType type() const override {
+    return ScoreExpressionType::kDouble;
+  }
+
+  bool is_constant_double() const override { return false; }
+};
+
+class ListScoreExpression : public ScoreExpression {
+ public:
+  static std::unique_ptr<ListScoreExpression> Create(
+      const std::vector<double> &values) {
+    std::unique_ptr<ListScoreExpression> res =
+        std::make_unique<ListScoreExpression>();
+    res->values = values;
+    return res;
+  }
+
+  libtextclassifier3::StatusOr<std::vector<double>> eval_list(
+      const DocHitInfo &, const DocHitInfoIterator *) const override {
+    return values;
+  }
+
+  ScoreExpressionType type() const override {
+    return ScoreExpressionType::kDoubleList;
+  }
+
+  std::vector<double> values;
+};
+
+template <typename... Args>
+std::vector<std::unique_ptr<ScoreExpression>> MakeChildren(Args... args) {
+  std::vector<std::unique_ptr<ScoreExpression>> children;
+  (children.push_back(std::move(args)), ...);
+  return children;
+}
+
+TEST(ScoreExpressionTest, OperatorSimplification) {
+  // 1 + 1 = 2
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoreExpression> expression,
+      OperatorScoreExpression::Create(
+          OperatorScoreExpression::OperatorType::kPlus,
+          MakeChildren(ConstantScoreExpression::Create(1),
+                       ConstantScoreExpression::Create(1))));
+  ASSERT_TRUE(expression->is_constant_double());
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(2)));
+
+  // 1 - 2 - 3 = -4
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, OperatorScoreExpression::Create(
+                      OperatorScoreExpression::OperatorType::kMinus,
+                      MakeChildren(ConstantScoreExpression::Create(1),
+                                   ConstantScoreExpression::Create(2),
+                                   ConstantScoreExpression::Create(3))));
+  ASSERT_TRUE(expression->is_constant_double());
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(-4)));
+
+  // 1 * 2 * 3 * 4 = 24
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, OperatorScoreExpression::Create(
+                      OperatorScoreExpression::OperatorType::kTimes,
+                      MakeChildren(ConstantScoreExpression::Create(1),
+                                   ConstantScoreExpression::Create(2),
+                                   ConstantScoreExpression::Create(3),
+                                   ConstantScoreExpression::Create(4))));
+  ASSERT_TRUE(expression->is_constant_double());
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(24)));
+
+  // 1 / 2 / 4 = 0.125
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, OperatorScoreExpression::Create(
+                      OperatorScoreExpression::OperatorType::kDiv,
+                      MakeChildren(ConstantScoreExpression::Create(1),
+                                   ConstantScoreExpression::Create(2),
+                                   ConstantScoreExpression::Create(4))));
+  ASSERT_TRUE(expression->is_constant_double());
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(0.125)));
+
+  // -(2) = -2
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, OperatorScoreExpression::Create(
+                      OperatorScoreExpression::OperatorType::kNegative,
+                      MakeChildren(ConstantScoreExpression::Create(2))));
+  ASSERT_TRUE(expression->is_constant_double());
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(-2)));
+}
+
+TEST(ScoreExpressionTest, MathFunctionSimplification) {
+  // pow(2, 2) = 4
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoreExpression> expression,
+      MathFunctionScoreExpression::Create(
+          MathFunctionScoreExpression::FunctionType::kPow,
+          MakeChildren(ConstantScoreExpression::Create(2),
+                       ConstantScoreExpression::Create(2))));
+  ASSERT_TRUE(expression->is_constant_double());
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(4)));
+
+  // abs(-2) = 2
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kAbs,
+                      MakeChildren(ConstantScoreExpression::Create(-2))));
+  ASSERT_TRUE(expression->is_constant_double());
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(2)));
+
+  // log(e) = 1
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kLog,
+                      MakeChildren(ConstantScoreExpression::Create(M_E))));
+  ASSERT_TRUE(expression->is_constant_double());
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(1)));
+}
+
+TEST(ScoreExpressionTest, CannotSimplifyNonConstant) {
+  // 1 + non_constant = non_constant
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoreExpression> expression,
+      OperatorScoreExpression::Create(
+          OperatorScoreExpression::OperatorType::kPlus,
+          MakeChildren(ConstantScoreExpression::Create(1),
+                       NonConstantScoreExpression::Create())));
+  ASSERT_FALSE(expression->is_constant_double());
+
+  // non_constant * non_constant = non_constant
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, OperatorScoreExpression::Create(
+                      OperatorScoreExpression::OperatorType::kTimes,
+                      MakeChildren(NonConstantScoreExpression::Create(),
+                                   NonConstantScoreExpression::Create())));
+  ASSERT_FALSE(expression->is_constant_double());
+
+  // -(non_constant) = non_constant
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, OperatorScoreExpression::Create(
+                      OperatorScoreExpression::OperatorType::kNegative,
+                      MakeChildren(NonConstantScoreExpression::Create())));
+  ASSERT_FALSE(expression->is_constant_double());
+
+  // pow(non_constant, 2) = non_constant
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kPow,
+                      MakeChildren(NonConstantScoreExpression::Create(),
+                                   ConstantScoreExpression::Create(2))));
+  ASSERT_FALSE(expression->is_constant_double());
+
+  // abs(non_constant) = non_constant
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kAbs,
+                      MakeChildren(NonConstantScoreExpression::Create())));
+  ASSERT_FALSE(expression->is_constant_double());
+
+  // log(non_constant) = non_constant
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kLog,
+                      MakeChildren(NonConstantScoreExpression::Create())));
+  ASSERT_FALSE(expression->is_constant_double());
+}
+
+TEST(ScoreExpressionTest, MathFunctionsWithListTypeArgument) {
+  // max({1, 2, 3}) = 3
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoreExpression> expression,
+      MathFunctionScoreExpression::Create(
+          MathFunctionScoreExpression::FunctionType::kMax,
+          MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(3)));
+
+  // min({1, 2, 3}) = 1
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kMin,
+                      MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(1)));
+
+  // len({1, 2, 3}) = 3
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kLen,
+                      MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(3)));
+
+  // sum({1, 2, 3}) = 6
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kSum,
+                      MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(6)));
+
+  // avg({1, 2, 3}) = 2
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kAvg,
+                      MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(2)));
+
+  // max({4}) = 4
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kMax,
+                      MakeChildren(ListScoreExpression::Create({4}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(4)));
+
+  // min({5}) = 5
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kMin,
+                      MakeChildren(ListScoreExpression::Create({5}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(5)));
+
+  // len({6}) = 1
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kLen,
+                      MakeChildren(ListScoreExpression::Create({6}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(1)));
+
+  // sum({7}) = 7
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kSum,
+                      MakeChildren(ListScoreExpression::Create({7}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(7)));
+
+  // avg({7}) = 7
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kAvg,
+                      MakeChildren(ListScoreExpression::Create({7}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(7)));
+
+  // len({}) = 0
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kLen,
+                      MakeChildren(ListScoreExpression::Create({}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(0)));
+
+  // sum({}) = 0
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kSum,
+                      MakeChildren(ListScoreExpression::Create({}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(0)));
+}
+
+TEST(ScoreExpressionTest, MathFunctionsWithListTypeArgumentError) {
+  // max({}) = evaluation error, since max on empty list does not produce a
+  // valid result.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoreExpression> expression,
+      MathFunctionScoreExpression::Create(
+          MathFunctionScoreExpression::FunctionType::kMax,
+          MakeChildren(ListScoreExpression::Create({}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // avg({}) = evaluation error, since avg on empty list does not produce a
+  // valid result.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      expression, MathFunctionScoreExpression::Create(
+                      MathFunctionScoreExpression::FunctionType::kAvg,
+                      MakeChildren(ListScoreExpression::Create({}))));
+  EXPECT_THAT(expression->eval(DocHitInfo(), nullptr),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // max(1, {2}) = type error, since max must take either n > 0 parameters of
+  // type double, or a single parameter of type list.
+  EXPECT_THAT(MathFunctionScoreExpression::Create(
+                  MathFunctionScoreExpression::FunctionType::kMax,
+                  MakeChildren(ConstantScoreExpression::Create(1),
+                               ListScoreExpression::Create({2}))),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // min({1}, {2}) = type error, since min must take either n > 0 parameters of
+  // type double, or a single parameter of type list.
+  EXPECT_THAT(MathFunctionScoreExpression::Create(
+                  MathFunctionScoreExpression::FunctionType::kMin,
+                  MakeChildren(ListScoreExpression::Create({1}),
+                               ListScoreExpression::Create({2}))),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // sin({1}) = type error, since sin does not support list type parameters.
+  EXPECT_THAT(MathFunctionScoreExpression::Create(
+                  MathFunctionScoreExpression::FunctionType::kSin,
+                  MakeChildren(ListScoreExpression::Create({1}))),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ScoreExpressionTest, ChildrenCannotBeNull) {
+  EXPECT_THAT(OperatorScoreExpression::Create(
+                  OperatorScoreExpression::OperatorType::kPlus,
+                  MakeChildren(ConstantScoreExpression::Create(1), nullptr)),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(MathFunctionScoreExpression::Create(
+                  MathFunctionScoreExpression::FunctionType::kPow,
+                  MakeChildren(ConstantScoreExpression::Create(2), nullptr)),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/scoring/advanced_scoring/scoring-visitor.cc b/icing/scoring/advanced_scoring/scoring-visitor.cc
new file mode 100644
index 0000000..e2b24a2
--- /dev/null
+++ b/icing/scoring/advanced_scoring/scoring-visitor.cc
@@ -0,0 +1,191 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/advanced_scoring/scoring-visitor.h"
+
+#include "icing/absl_ports/str_cat.h"
+
+namespace icing {
+namespace lib {
+
+void ScoringVisitor::VisitFunctionName(const FunctionNameNode* node) {
+  pending_error_ = absl_ports::InternalError(
+      "FunctionNameNode should be handled in VisitFunction!");
+}
+
+void ScoringVisitor::VisitString(const StringNode* node) {
+  pending_error_ =
+      absl_ports::InvalidArgumentError("Scoring does not support String!");
+}
+
+void ScoringVisitor::VisitText(const TextNode* node) {
+  pending_error_ =
+      absl_ports::InternalError("TextNode should be handled in VisitMember!");
+}
+
+void ScoringVisitor::VisitMember(const MemberNode* node) {
+  bool is_member_function = node->function() != nullptr;
+  if (is_member_function) {
+    // If the member node represents a member function, it must have only one
+    // child for "this".
+    if (node->children().size() != 1 ||
+        node->children()[0]->value() != "this") {
+      pending_error_ = absl_ports::InvalidArgumentError(
+          "Member functions can only be called via \"this\".");
+      return;
+    }
+    return VisitFunctionHelper(node->function(), is_member_function);
+  }
+  std::string value;
+  if (node->children().size() == 1) {
+    // If a member has only one child, then it represents a integer literal.
+    value = node->children()[0]->value();
+  } else if (node->children().size() == 2) {
+    // If a member has two children, then it can only represent a floating point
+    // number, so we need to join them by "." to build the numeric literal.
+    value = absl_ports::StrCat(node->children()[0]->value(), ".",
+                               node->children()[1]->value());
+  } else {
+    pending_error_ = absl_ports::InvalidArgumentError(
+        "MemberNode must have 1 or 2 children.");
+    return;
+  }
+  char* end;
+  double number = std::strtod(value.c_str(), &end);
+  if (end != value.c_str() + value.length()) {
+    // While it would be doable to support property references in the scoring
+    // grammar, we currently don't have an efficient way to support such a
+    // lookup (we'd have to read each document). As such, it's simpler to just
+    // restrict the scoring language to not include properties.
+    pending_error_ = absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("Expect a numeric literal, but got ", value));
+    return;
+  }
+  stack_.push_back(ConstantScoreExpression::Create(number));
+}
+
+void ScoringVisitor::VisitFunctionHelper(const FunctionNode* node,
+                                         bool is_member_function) {
+  std::vector<std::unique_ptr<ScoreExpression>> args;
+  if (is_member_function) {
+    args.push_back(ThisExpression::Create());
+  }
+  for (const auto& arg : node->args()) {
+    arg->Accept(this);
+    if (has_pending_error()) {
+      return;
+    }
+    args.push_back(pop_stack());
+  }
+  const std::string& function_name = node->function_name()->value();
+  libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>> expression =
+      absl_ports::InvalidArgumentError(
+          absl_ports::StrCat("Unknown function: ", function_name));
+
+  if (DocumentFunctionScoreExpression::kFunctionNames.find(function_name) !=
+      DocumentFunctionScoreExpression::kFunctionNames.end()) {
+    // Document-based function
+    expression = DocumentFunctionScoreExpression::Create(
+        DocumentFunctionScoreExpression::kFunctionNames.at(function_name),
+        std::move(args), &document_store_, default_score_, current_time_ms_);
+  } else if (function_name ==
+             RelevanceScoreFunctionScoreExpression::kFunctionName) {
+    // relevanceScore function
+    expression = RelevanceScoreFunctionScoreExpression::Create(
+        std::move(args), &bm25f_calculator_, default_score_);
+  } else if (function_name ==
+             ChildrenRankingSignalsFunctionScoreExpression::kFunctionName) {
+    // childrenRankingSignals function
+    expression = ChildrenRankingSignalsFunctionScoreExpression::Create(
+        std::move(args), join_children_fetcher_);
+  } else if (function_name ==
+             PropertyWeightsFunctionScoreExpression::kFunctionName) {
+    // propertyWeights function
+    expression = PropertyWeightsFunctionScoreExpression::Create(
+        std::move(args), &document_store_, &section_weights_, current_time_ms_);
+  } else if (MathFunctionScoreExpression::kFunctionNames.find(function_name) !=
+             MathFunctionScoreExpression::kFunctionNames.end()) {
+    // Math functions
+    expression = MathFunctionScoreExpression::Create(
+        MathFunctionScoreExpression::kFunctionNames.at(function_name),
+        std::move(args));
+  }
+
+  if (!expression.ok()) {
+    pending_error_ = expression.status();
+    return;
+  }
+  stack_.push_back(std::move(expression).ValueOrDie());
+}
+
+void ScoringVisitor::VisitUnaryOperator(const UnaryOperatorNode* node) {
+  if (node->operator_text() != "MINUS") {
+    pending_error_ = absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("Unknown unary operator: ", node->operator_text()));
+    return;
+  }
+  node->child()->Accept(this);
+  if (has_pending_error()) {
+    return;
+  }
+  std::vector<std::unique_ptr<ScoreExpression>> children;
+  children.push_back(pop_stack());
+
+  libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>> expression =
+      OperatorScoreExpression::Create(
+          OperatorScoreExpression::OperatorType::kNegative,
+          std::move(children));
+  if (!expression.ok()) {
+    pending_error_ = expression.status();
+    return;
+  }
+  stack_.push_back(std::move(expression).ValueOrDie());
+}
+
+void ScoringVisitor::VisitNaryOperator(const NaryOperatorNode* node) {
+  std::vector<std::unique_ptr<ScoreExpression>> children;
+  for (const auto& arg : node->children()) {
+    arg->Accept(this);
+    if (has_pending_error()) {
+      return;
+    }
+    children.push_back(pop_stack());
+  }
+
+  libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>> expression =
+      absl_ports::InvalidArgumentError(
+          absl_ports::StrCat("Unknown Nary operator: ", node->operator_text()));
+
+  if (node->operator_text() == "PLUS") {
+    expression = OperatorScoreExpression::Create(
+        OperatorScoreExpression::OperatorType::kPlus, std::move(children));
+  } else if (node->operator_text() == "MINUS") {
+    expression = OperatorScoreExpression::Create(
+        OperatorScoreExpression::OperatorType::kMinus, std::move(children));
+  } else if (node->operator_text() == "TIMES") {
+    expression = OperatorScoreExpression::Create(
+        OperatorScoreExpression::OperatorType::kTimes, std::move(children));
+  } else if (node->operator_text() == "DIV") {
+    expression = OperatorScoreExpression::Create(
+        OperatorScoreExpression::OperatorType::kDiv, std::move(children));
+  }
+  if (!expression.ok()) {
+    pending_error_ = expression.status();
+    return;
+  }
+  stack_.push_back(std::move(expression).ValueOrDie());
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/scoring/advanced_scoring/scoring-visitor.h b/icing/scoring/advanced_scoring/scoring-visitor.h
new file mode 100644
index 0000000..cfee25b
--- /dev/null
+++ b/icing/scoring/advanced_scoring/scoring-visitor.h
@@ -0,0 +1,108 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_ADVANCED_SCORING_SCORING_VISITOR_H_
+#define ICING_SCORING_ADVANCED_SCORING_SCORING_VISITOR_H_
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
+#include "icing/scoring/advanced_scoring/score-expression.h"
+#include "icing/scoring/bm25f-calculator.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+class ScoringVisitor : public AbstractSyntaxTreeVisitor {
+ public:
+  explicit ScoringVisitor(double default_score,
+                          const DocumentStore* document_store,
+                          const SchemaStore* schema_store,
+                          SectionWeights* section_weights,
+                          Bm25fCalculator* bm25f_calculator,
+                          const JoinChildrenFetcher* join_children_fetcher,
+                          int64_t current_time_ms)
+      : default_score_(default_score),
+        document_store_(*document_store),
+        schema_store_(*schema_store),
+        section_weights_(*section_weights),
+        bm25f_calculator_(*bm25f_calculator),
+        join_children_fetcher_(join_children_fetcher),
+        current_time_ms_(current_time_ms) {}
+
+  void VisitFunctionName(const FunctionNameNode* node) override;
+  void VisitString(const StringNode* node) override;
+  void VisitText(const TextNode* node) override;
+  void VisitMember(const MemberNode* node) override;
+
+  void VisitFunction(const FunctionNode* node) override {
+    return VisitFunctionHelper(node, /*is_member_function=*/false);
+  }
+
+  void VisitUnaryOperator(const UnaryOperatorNode* node) override;
+  void VisitNaryOperator(const NaryOperatorNode* node) override;
+
+  // RETURNS:
+  //   - An ScoreExpression instance able to evaluate the expression on success.
+  //   - INVALID_ARGUMENT if the AST does not conform to supported expressions,
+  //   such as type errors.
+  //   - INTERNAL if there are inconsistencies.
+  libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>>
+  Expression() && {
+    if (has_pending_error()) {
+      return pending_error_;
+    }
+    if (stack_.size() != 1) {
+      return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+          "Expect to get only one result from "
+          "ScoringVisitor, but got %zu. There must be inconsistencies.",
+          stack_.size()));
+    }
+    return std::move(stack_[0]);
+  }
+
+ private:
+  // Visit function node. If is_member_function is true, a ThisExpression will
+  // be added as the first function argument.
+  void VisitFunctionHelper(const FunctionNode* node, bool is_member_function);
+
+  bool has_pending_error() const { return !pending_error_.ok(); }
+
+  std::unique_ptr<ScoreExpression> pop_stack() {
+    std::unique_ptr<ScoreExpression> result = std::move(stack_.back());
+    stack_.pop_back();
+    return result;
+  }
+
+  double default_score_;
+  const DocumentStore& document_store_;
+  const SchemaStore& schema_store_;
+  SectionWeights& section_weights_;
+  Bm25fCalculator& bm25f_calculator_;
+  // A non-null join_children_fetcher_ indicates scoring in a join.
+  const JoinChildrenFetcher* join_children_fetcher_;  // Does not own.
+
+  libtextclassifier3::Status pending_error_;
+  std::vector<std::unique_ptr<ScoreExpression>> stack_;
+  int64_t current_time_ms_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_ADVANCED_SCORING_SCORING_VISITOR_H_
diff --git a/icing/scoring/bm25f-calculator.cc b/icing/scoring/bm25f-calculator.cc
new file mode 100644
index 0000000..a80ef34
--- /dev/null
+++ b/icing/scoring/bm25f-calculator.cc
@@ -0,0 +1,248 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/bm25f-calculator.h"
+
+#include <cstdint>
+#include <cstdlib>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/store/corpus-associated-scoring-data.h"
+#include "icing/store/corpus-id.h"
+#include "icing/store/document-associated-score-data.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// Smoothing parameter, determines the relevance of higher term frequency
+// documents. The higher k1, the higher their relevance. 1.2 is the default
+// value in the BM25F literature and works well in most corpora.
+constexpr float k1_ = 1.2f;
+// Smoothing parameter, determines the weight of the document length on the
+// final score. The higher b, the higher the influence of the document length.
+// 0.7 is the default value in the BM25F literature and works well in most
+// corpora.
+constexpr float b_ = 0.7f;
+
+// TODO(b/158603900): add tests for Bm25fCalculator
+Bm25fCalculator::Bm25fCalculator(const DocumentStore* document_store,
+                                 SectionWeights* section_weights,
+                                 int64_t current_time_ms)
+    : document_store_(document_store),
+      section_weights_(*section_weights),
+      current_time_ms_(current_time_ms) {}
+
+// During initialization, Bm25fCalculator iterates through
+// hit-iterators for each query term to pre-compute n(q_i) for each corpus under
+// consideration.
+void Bm25fCalculator::PrepareToScore(
+    std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
+        query_term_iterators) {
+  Clear();
+  TermId term_id = 0;
+  for (auto& iter : *query_term_iterators) {
+    const std::string& term = iter.first;
+    if (term_id_map_.find(term) != term_id_map_.end()) {
+      continue;
+    }
+    term_id_map_[term] = ++term_id;
+    DocHitInfoIterator* term_it = iter.second.get();
+
+    while (term_it->Advance().ok()) {
+      auto status_or = document_store_->GetDocumentAssociatedScoreData(
+          term_it->doc_hit_info().document_id());
+      if (!status_or.ok()) {
+        ICING_LOG(ERROR) << "No document score data";
+        continue;
+      }
+      DocumentAssociatedScoreData data = status_or.ValueOrDie();
+      CorpusId corpus_id = data.corpus_id();
+      CorpusTermInfo corpus_term_info(corpus_id, term_id);
+      corpus_nqi_map_[corpus_term_info.value]++;
+    }
+  }
+}
+
+void Bm25fCalculator::Clear() {
+  term_id_map_.clear();
+  corpus_avgdl_map_.clear();
+  corpus_nqi_map_.clear();
+  corpus_idf_map_.clear();
+}
+
+// Computes BM25F relevance score for query terms matched in document D.
+//
+// BM25F = \sum_i IDF(q_i) * tf(q_i, D)
+//
+// where IDF(q_i) is the Inverse Document Frequency (IDF) weight of the query
+// term q_i in the corpus with document D, and tf(q_i, D) is the weighted and
+// normalized term frequency of query term q_i in the document D.
+float Bm25fCalculator::ComputeScore(const DocHitInfoIterator* query_it,
+                                    const DocHitInfo& hit_info,
+                                    double default_score) {
+  auto status_or =
+      document_store_->GetDocumentAssociatedScoreData(hit_info.document_id());
+  if (!status_or.ok()) {
+    ICING_LOG(ERROR) << "No document score data";
+    return default_score;
+  }
+  DocumentAssociatedScoreData data = status_or.ValueOrDie();
+  std::vector<TermMatchInfo> matched_terms_stats;
+  query_it->PopulateMatchedTermsStats(&matched_terms_stats);
+
+  float score = 0;
+  for (const TermMatchInfo& term_match_info : matched_terms_stats) {
+    float idf_weight =
+        GetCorpusIdfWeightForTerm(term_match_info.term, data.corpus_id());
+    float normalized_tf =
+        ComputedNormalizedTermFrequency(term_match_info, hit_info, data);
+    score += idf_weight * normalized_tf;
+  }
+
+  ICING_VLOG(1) << "BM25F: corpus_id:" << data.corpus_id()
+                << " docid:" << hit_info.document_id() << " score:" << score;
+  return score;
+}
+
+// Compute inverse document frequency (IDF) weight for query term in the given
+// corpus, and cache it in the map.
+//
+//                    N - n(q_i) + 0.5
+// IDF(q_i) = ln(1 + ------------------)
+//                      n(q_i) + 0.5
+//
+// where N is the number of documents in the corpus, and n(q_i) is the number
+// of documents in the corpus containing the query term q_i.
+float Bm25fCalculator::GetCorpusIdfWeightForTerm(std::string_view term,
+                                                 CorpusId corpus_id) {
+  TermId term_id = term_id_map_[term];
+
+  CorpusTermInfo corpus_term_info(corpus_id, term_id);
+  auto iter = corpus_idf_map_.find(corpus_term_info.value);
+  if (iter != corpus_idf_map_.end()) {
+    return iter->second;
+  }
+
+  // First, figure out corpus scoring data.
+  auto status_or = document_store_->GetCorpusAssociatedScoreData(corpus_id);
+  if (!status_or.ok()) {
+    ICING_LOG(ERROR) << "No scoring data for corpus [" << corpus_id << "]";
+    return 0;
+  }
+  CorpusAssociatedScoreData csdata = status_or.ValueOrDie();
+
+  uint32_t num_docs = csdata.num_docs();
+  uint32_t nqi = corpus_nqi_map_[corpus_term_info.value];
+  float idf =
+      nqi != 0 ? log(1.0f + (num_docs - nqi + 0.5f) / (nqi + 0.5f)) : 0.0f;
+  corpus_idf_map_.insert({corpus_term_info.value, idf});
+  ICING_VLOG(1) << "corpus_id:" << corpus_id << " term:" << term
+                << " N:" << num_docs << "nqi:" << nqi << " idf:" << idf;
+  return idf;
+}
+
+// Get per corpus average document length and cache the result in the map.
+// The average doc length is calculated as:
+//
+//                    total_tokens_in_corpus
+// Avg Doc Length =  -------------------------
+//                    num_docs_in_corpus + 1
+float Bm25fCalculator::GetCorpusAvgDocLength(CorpusId corpus_id) {
+  auto iter = corpus_avgdl_map_.find(corpus_id);
+  if (iter != corpus_avgdl_map_.end()) {
+    return iter->second;
+  }
+
+  // First, figure out corpus scoring data.
+  auto status_or = document_store_->GetCorpusAssociatedScoreData(corpus_id);
+  if (!status_or.ok()) {
+    ICING_LOG(ERROR) << "No scoring data for corpus [" << corpus_id << "]";
+    return 0;
+  }
+  CorpusAssociatedScoreData csdata = status_or.ValueOrDie();
+
+  corpus_avgdl_map_[corpus_id] = csdata.average_doc_length_in_tokens();
+  return csdata.average_doc_length_in_tokens();
+}
+
+// Computes normalized term frequency for query term q_i in document D.
+//
+//                            f(q_i, D) * (k1 + 1)
+// Normalized TF = --------------------------------------------
+//                 f(q_i, D) + k1 * (1 - b + b * |D| / avgdl)
+//
+// where f(q_i, D) is the frequency of query term q_i in document D,
+// |D| is the #tokens in D, avgdl is the average document length in the corpus,
+// k1 and b are smoothing parameters.
+float Bm25fCalculator::ComputedNormalizedTermFrequency(
+    const TermMatchInfo& term_match_info, const DocHitInfo& hit_info,
+    const DocumentAssociatedScoreData& data) {
+  uint32_t dl = data.length_in_tokens();
+  float avgdl = GetCorpusAvgDocLength(data.corpus_id());
+  float f_q = ComputeTermFrequencyForMatchedSections(
+      data.corpus_id(), term_match_info, hit_info.document_id());
+  float normalized_tf =
+      f_q * (k1_ + 1) / (f_q + k1_ * (1 - b_ + b_ * dl / avgdl));
+
+  ICING_VLOG(1) << "corpus_id:" << data.corpus_id()
+                << " docid:" << hit_info.document_id() << " dl:" << dl
+                << " avgdl:" << avgdl << " f_q:" << f_q
+                << " norm_tf:" << normalized_tf;
+  return normalized_tf;
+}
+
+float Bm25fCalculator::ComputeTermFrequencyForMatchedSections(
+    CorpusId corpus_id, const TermMatchInfo& term_match_info,
+    DocumentId document_id) const {
+  float sum = 0.0f;
+  SectionIdMask sections = term_match_info.section_ids_mask;
+  SchemaTypeId schema_type_id = GetSchemaTypeId(document_id);
+
+  while (sections != 0) {
+    SectionId section_id = __builtin_ctzll(sections);
+    sections &= ~(UINT64_C(1) << section_id);
+
+    Hit::TermFrequency tf = term_match_info.term_frequencies[section_id];
+    double weighted_tf = tf * section_weights_.GetNormalizedSectionWeight(
+                                  schema_type_id, section_id);
+    if (tf != Hit::kNoTermFrequency) {
+      sum += weighted_tf;
+    }
+  }
+  return sum;
+}
+
+SchemaTypeId Bm25fCalculator::GetSchemaTypeId(DocumentId document_id) const {
+  auto filter_data_optional = document_store_->GetAliveDocumentFilterData(
+      document_id, current_time_ms_);
+  if (!filter_data_optional) {
+    // This should never happen. The only failure case for
+    // GetAliveDocumentFilterData is if the document_id is outside of the range
+    // of allocated document_ids, which shouldn't be possible since we're
+    // getting this document_id from the posting lists.
+    ICING_LOG(WARNING) << "No document filter data for document ["
+                       << document_id << "]";
+    return kInvalidSchemaTypeId;
+  }
+  return filter_data_optional.value().schema_type_id();
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/scoring/bm25f-calculator.h b/icing/scoring/bm25f-calculator.h
new file mode 100644
index 0000000..36f9c68
--- /dev/null
+++ b/icing/scoring/bm25f-calculator.h
@@ -0,0 +1,177 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_BM25F_CALCULATOR_H_
+#define ICING_SCORING_BM25F_CALCULATOR_H_
+
+#include <cstdint>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/legacy/index/icing-bit-util.h"
+#include "icing/scoring/section-weights.h"
+#include "icing/store/corpus-id.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+// Bm25fCalculator encapsulates the logic to compute BM25F term-weight based
+// ranking function.
+//
+// The formula to compute BM25F is as follows:
+//
+// BM25F = \sum_i IDF(q_i) * tf(q_i, D)
+//
+// where IDF(q_i) is the Inverse Document Frequency (IDF) weight of the query
+// term q_i in the corpus with document D, and tf(q_i, D) is the weighted and
+// normalized term frequency of query term q_i in the document D.
+//
+// IDF(q_i) is computed as follows:
+//
+//                     N - n(q_i) + 0.5
+// IDF(q_i) = log(1 + ------------------)
+//                       n(q_i) + 0.5
+//
+// where N is the number of documents in the corpus, and n(q_i) is the number
+// of documents in the corpus containing the query term q_i.
+//
+// Lastly, tf(q_i, D) is computed as follows:
+//
+//                            f(q_i, D) * (k1 + 1)
+// Normalized TF = --------------------------------------------
+//                 f(q_i, D) + k1 * (1 - b + b * |D| / avgdl)
+//
+// where f(q_i, D) is the frequency of query term q_i in document D,
+// |D| is the #tokens in D, avgdl is the average document length in the corpus,
+// k1 and b are smoothing parameters.
+//
+// see: go/icing-bm25f
+// see: glossary/bm25
+class Bm25fCalculator {
+ public:
+  explicit Bm25fCalculator(const DocumentStore *document_store,
+                           SectionWeights *section_weights,
+                           int64_t current_time_ms);
+
+  // Precompute and cache statistics relevant to BM25F.
+  // Populates term_id_map_ and corpus_nqi_map_ for use while scoring other
+  // results.
+  // The query_term_iterators map is used to build the
+  // std::unordered_map<std::string_view, TermId> term_id_map_. It must
+  // outlive the bm25f-calculator otherwise the string_view key in term_id_map_,
+  // used later to compute a document score, will be meaningless.
+  void PrepareToScore(
+      std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+          *query_term_iterators);
+
+  // Compute the BM25F relevance score for the given hit, represented by
+  // DocHitInfo.
+  // The default score will be returned only when the scorer fails to find or
+  // calculate a score for the document.
+  float ComputeScore(const DocHitInfoIterator *query_it,
+                     const DocHitInfo &hit_info, double default_score);
+
+ private:
+  // Compact ID for each query term.
+  using TermId = uint16_t;
+
+  // Compact representation of <CorpusId, TermId> for use as a key in a
+  // hash_map.
+  struct CorpusTermInfo {
+    // Layout bits: 16 bit CorpusId + 16 bit TermId
+    using Value = uint32_t;
+
+    Value value;
+
+    static constexpr int kCorpusIdBits = sizeof(CorpusId);
+    static constexpr int kTermIdBits = sizeof(TermId);
+
+    explicit CorpusTermInfo(CorpusId corpus_id, TermId term_id) : value(0) {
+      BITFIELD_OR(value, kTermIdBits, kCorpusIdBits,
+                  static_cast<uint64_t>(corpus_id));
+      BITFIELD_OR(value, 0, kTermIdBits, term_id);
+    }
+
+    bool operator==(const CorpusTermInfo &other) const {
+      return value == other.value;
+    }
+  };
+
+  // Returns idf weight for the term and provided corpus.
+  float GetCorpusIdfWeightForTerm(std::string_view term, CorpusId corpus_id);
+
+  // Returns the average document length for the corpus. The average is
+  // calculated as the sum of tokens in the corpus' documents over the total
+  // number of documents plus one.
+  float GetCorpusAvgDocLength(CorpusId corpus_id);
+
+  // Returns the normalized term frequency for the term match and document hit.
+  // This normalizes the term frequency by applying smoothing parameters and
+  // factoring document length.
+  float ComputedNormalizedTermFrequency(
+      const TermMatchInfo &term_match_info, const DocHitInfo &hit_info,
+      const DocumentAssociatedScoreData &data);
+
+  // Returns the weighted term frequency for the term match and document. For
+  // each section the term is present, we scale the term frequency by its
+  // section weight. We return the sum of the weighted term frequencies over all
+  // sections.
+  float ComputeTermFrequencyForMatchedSections(
+      CorpusId corpus_id, const TermMatchInfo &term_match_info,
+      DocumentId document_id) const;
+
+  // Returns the schema type id for the document by retrieving it from the
+  // DocumentFilterData.
+  SchemaTypeId GetSchemaTypeId(DocumentId document_id) const;
+
+  // Clears cached scoring data and prepares the calculator for a new scoring
+  // run.
+  void Clear();
+
+  const DocumentStore *document_store_;  // Does not own.
+
+  // Used for accessing normalized section weights when computing the weighted
+  // term frequency.
+  SectionWeights &section_weights_;
+
+  // Map from query term to compact term ID.
+  // Necessary as a key to the other maps.
+  // The use of the string_view as key here means that the query_term_iterators
+  // map must outlive the bm25f
+  std::unordered_map<std::string_view, TermId> term_id_map_;
+
+  // Map from corpus ID to average document length (avgdl).
+  // Necessary to calculate the normalized term frequency.
+  // This information is cached in the DocumentStore::CorpusScoreCache
+  std::unordered_map<CorpusId, float> corpus_avgdl_map_;
+  // Map from <corpus ID, term ID> to number of documents containing term q_i,
+  // called n(q_i).
+  // Necessary to calculate IDF(q_i) (inverse document frequency).
+  // This information must be calculated by iterating through the hits for these
+  // terms.
+  std::unordered_map<CorpusTermInfo::Value, uint32_t> corpus_nqi_map_;
+
+  // Map from <corpus ID, term ID> to IDF(q_i) (inverse document frequency).
+  std::unordered_map<CorpusTermInfo::Value, float> corpus_idf_map_;
+
+  int64_t current_time_ms_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_BM25F_CALCULATOR_H_
diff --git a/icing/scoring/priority-queue-scored-document-hits-ranker.h b/icing/scoring/priority-queue-scored-document-hits-ranker.h
new file mode 100644
index 0000000..0798d7d
--- /dev/null
+++ b/icing/scoring/priority-queue-scored-document-hits-ranker.h
@@ -0,0 +1,128 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_
+#define ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_
+
+#include <queue>
+#include <vector>
+
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/scoring/scored-document-hits-ranker.h"
+
+namespace icing {
+namespace lib {
+
+// ScoredDocumentHitsRanker interface implementation, based on
+// std::priority_queue. We can get next top hit in O(lgN) time.
+template <typename ScoredDataType,
+          typename Converter = typename ScoredDataType::Converter>
+class PriorityQueueScoredDocumentHitsRanker : public ScoredDocumentHitsRanker {
+ public:
+  explicit PriorityQueueScoredDocumentHitsRanker(
+      std::vector<ScoredDataType>&& scored_data_vec, bool is_descending = true);
+
+  ~PriorityQueueScoredDocumentHitsRanker() override = default;
+
+  // Note: ranker may store ScoredDocumentHit or JoinedScoredDocumentHit, so we
+  // have template for scored_data_pq_.
+  // - JoinedScoredDocumentHit is a superset of ScoredDocumentHit, so we unify
+  //   the return type of PopNext to use the superset type
+  //   JoinedScoredDocumentHit in order to make it simple, and rankers storing
+  //   ScoredDocumentHit should convert it to JoinedScoredDocumentHit before
+  //   returning. It makes the implementation simpler, especially for
+  //   ResultRetriever, which now only needs to deal with one single return
+  //   format.
+  // - JoinedScoredDocumentHit has ~2x size of ScoredDocumentHit. Since we cache
+  //   ranker (which contains a priority queue of data) in ResultState, if we
+  //   store the scored hits in JoinedScoredDocumentHit format directly, then it
+  //   doubles the memory usage. Therefore, we still keep the flexibility to
+  //   store ScoredDocumentHit or any other types of data, but require PopNext
+  //   to convert it to JoinedScoredDocumentHit.
+  JoinedScoredDocumentHit PopNext() override;
+
+  void TruncateHitsTo(int new_size) override;
+
+  int size() const override { return scored_data_pq_.size(); }
+
+  bool empty() const override { return scored_data_pq_.empty(); }
+
+ private:
+  // Comparator for std::priority_queue. Since std::priority is a max heap
+  // (descending order), reverse it if we want ascending order.
+  class Comparator {
+   public:
+    explicit Comparator(bool is_ascending) : is_ascending_(is_ascending) {}
+
+    bool operator()(const ScoredDataType& lhs,
+                    const ScoredDataType& rhs) const {
+      // STL comparator requirement: equal MUST return false.
+      // If writing `return is_ascending_ == !(lhs < rhs)`:
+      // - When lhs == rhs, !(lhs < rhs) is true
+      // - If is_ascending_ is true, then we return true for equal case!
+      if (is_ascending_) {
+        return rhs < lhs;
+      }
+      return lhs < rhs;
+    }
+
+   private:
+    bool is_ascending_;
+  };
+
+  Comparator comparator_;
+
+  // Use priority queue to get top K hits in O(KlgN) time.
+  std::priority_queue<ScoredDataType, std::vector<ScoredDataType>, Comparator>
+      scored_data_pq_;
+
+  Converter converter_;
+};
+
+template <typename ScoredDataType, typename Converter>
+PriorityQueueScoredDocumentHitsRanker<ScoredDataType, Converter>::
+    PriorityQueueScoredDocumentHitsRanker(
+        std::vector<ScoredDataType>&& scored_data_vec, bool is_descending)
+    : comparator_(/*is_ascending=*/!is_descending),
+      scored_data_pq_(comparator_, std::move(scored_data_vec)) {}
+
+template <typename ScoredDataType, typename Converter>
+JoinedScoredDocumentHit
+PriorityQueueScoredDocumentHitsRanker<ScoredDataType, Converter>::PopNext() {
+  ScoredDataType next_scored_data = scored_data_pq_.top();
+  scored_data_pq_.pop();
+  return converter_(std::move(next_scored_data));
+}
+
+template <typename ScoredDataType, typename Converter>
+void PriorityQueueScoredDocumentHitsRanker<
+    ScoredDataType, Converter>::TruncateHitsTo(int new_size) {
+  if (new_size < 0 || scored_data_pq_.size() <= new_size) {
+    return;
+  }
+
+  // Copying the best new_size results.
+  std::priority_queue<ScoredDataType, std::vector<ScoredDataType>, Comparator>
+      new_pq(comparator_);
+  for (int i = 0; i < new_size; ++i) {
+    new_pq.push(scored_data_pq_.top());
+    scored_data_pq_.pop();
+  }
+  scored_data_pq_ = std::move(new_pq);
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_
diff --git a/icing/scoring/priority-queue-scored-document-hits-ranker_test.cc b/icing/scoring/priority-queue-scored-document-hits-ranker_test.cc
new file mode 100644
index 0000000..ace2350
--- /dev/null
+++ b/icing/scoring/priority-queue-scored-document-hits-ranker_test.cc
@@ -0,0 +1,255 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+class Converter {
+ public:
+  JoinedScoredDocumentHit operator()(ScoredDocumentHit hit) const {
+    return converter_(std::move(hit));
+  }
+
+ private:
+  ScoredDocumentHit::Converter converter_;
+} converter;
+
+std::vector<JoinedScoredDocumentHit> PopAll(
+    PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>& ranker) {
+  std::vector<JoinedScoredDocumentHit> hits;
+  while (!ranker.empty()) {
+    hits.push_back(ranker.PopNext());
+  }
+  return hits;
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldGetCorrectSizeAndEmpty) {
+  ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+                                 /*score=*/1);
+
+  PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit> ranker(
+      {scored_hit_1, scored_hit_0, scored_hit_2},
+      /*is_descending=*/true);
+  EXPECT_THAT(ranker.size(), Eq(3));
+  EXPECT_FALSE(ranker.empty());
+
+  ranker.PopNext();
+  EXPECT_THAT(ranker.size(), Eq(2));
+  EXPECT_FALSE(ranker.empty());
+
+  ranker.PopNext();
+  EXPECT_THAT(ranker.size(), Eq(1));
+  EXPECT_FALSE(ranker.empty());
+
+  ranker.PopNext();
+  EXPECT_THAT(ranker.size(), Eq(0));
+  EXPECT_TRUE(ranker.empty());
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldRankInDescendingOrder) {
+  ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone,
+                                 /*score=*/1);
+
+  PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit> ranker(
+      {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3},
+      /*is_descending=*/true);
+
+  EXPECT_THAT(ranker, SizeIs(5));
+  std::vector<JoinedScoredDocumentHit> scored_document_hits = PopAll(ranker);
+  EXPECT_THAT(
+      scored_document_hits,
+      ElementsAre(EqualsJoinedScoredDocumentHit(converter(scored_hit_4)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_3)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_2)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_1)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_0))));
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldRankInAscendingOrder) {
+  ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone,
+                                 /*score=*/1);
+
+  PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit> ranker(
+      {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3},
+      /*is_descending=*/false);
+
+  EXPECT_THAT(ranker, SizeIs(5));
+  std::vector<JoinedScoredDocumentHit> scored_document_hits = PopAll(ranker);
+  EXPECT_THAT(
+      scored_document_hits,
+      ElementsAre(EqualsJoinedScoredDocumentHit(converter(scored_hit_0)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_1)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_2)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_3)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_4))));
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest,
+     ShouldRankDuplicateScoredDocumentHits) {
+  ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone,
+                                 /*score=*/1);
+
+  PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit> ranker(
+      {scored_hit_2, scored_hit_4, scored_hit_1, scored_hit_0, scored_hit_2,
+       scored_hit_2, scored_hit_4, scored_hit_3},
+      /*is_descending=*/true);
+
+  EXPECT_THAT(ranker, SizeIs(8));
+  std::vector<JoinedScoredDocumentHit> scored_document_hits = PopAll(ranker);
+  EXPECT_THAT(
+      scored_document_hits,
+      ElementsAre(EqualsJoinedScoredDocumentHit(converter(scored_hit_4)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_4)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_3)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_2)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_2)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_2)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_1)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_0))));
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest,
+     ShouldRankEmptyScoredDocumentHits) {
+  PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit> ranker(
+      /*scored_document_hits=*/{},
+      /*is_descending=*/true);
+  EXPECT_THAT(ranker, IsEmpty());
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldTruncateToNewSize) {
+  ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone,
+                                 /*score=*/1);
+
+  PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit> ranker(
+      {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3},
+      /*is_descending=*/true);
+  ASSERT_THAT(ranker, SizeIs(5));
+
+  ranker.TruncateHitsTo(/*new_size=*/3);
+  EXPECT_THAT(ranker, SizeIs(3));
+  std::vector<JoinedScoredDocumentHit> scored_document_hits = PopAll(ranker);
+  EXPECT_THAT(
+      scored_document_hits,
+      ElementsAre(EqualsJoinedScoredDocumentHit(converter(scored_hit_4)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_3)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_2))));
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldTruncateToZero) {
+  ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone,
+                                 /*score=*/1);
+
+  PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit> ranker(
+      {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3},
+      /*is_descending=*/true);
+  ASSERT_THAT(ranker, SizeIs(5));
+
+  ranker.TruncateHitsTo(/*new_size=*/0);
+  EXPECT_THAT(ranker, IsEmpty());
+}
+
+TEST(PriorityQueueScoredDocumentHitsRankerTest, ShouldNotTruncateToNegative) {
+  ScoredDocumentHit scored_hit_0(/*document_id=*/0, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_1(/*document_id=*/1, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_2(/*document_id=*/2, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_3(/*document_id=*/3, kSectionIdMaskNone,
+                                 /*score=*/1);
+  ScoredDocumentHit scored_hit_4(/*document_id=*/4, kSectionIdMaskNone,
+                                 /*score=*/1);
+
+  PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit> ranker(
+      {scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3},
+      /*is_descending=*/true);
+  ASSERT_THAT(ranker, SizeIs(Eq(5)));
+
+  ranker.TruncateHitsTo(/*new_size=*/-1);
+  EXPECT_THAT(ranker, SizeIs(Eq(5)));
+  // Contents are not affected.
+  std::vector<JoinedScoredDocumentHit> scored_document_hits = PopAll(ranker);
+  EXPECT_THAT(
+      scored_document_hits,
+      ElementsAre(EqualsJoinedScoredDocumentHit(converter(scored_hit_4)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_3)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_2)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_1)),
+                  EqualsJoinedScoredDocumentHit(converter(scored_hit_0))));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/scoring/ranker.cc b/icing/scoring/ranker.cc
index fecee82..d59c98f 100644
--- a/icing/scoring/ranker.cc
+++ b/icing/scoring/ranker.cc
@@ -32,6 +32,7 @@ namespace {
 
 // Helper function to wrap the heapify algorithm, it heapifies the target
 // subtree node in place.
+// TODO(b/152934343) refactor the heapify function and making it into a class.
 void Heapify(
     std::vector<ScoredDocumentHit>* scored_document_hits,
     int target_subtree_root_index,
@@ -71,32 +72,76 @@ void Heapify(
   }
 }
 
-// Helper function to extract the root from the heap. The heap structure will be
-// maintained.
-//
-// Returns:
-//   The current root element on success
-//   RESOURCE_EXHAUSTED_ERROR if heap is empty
-libtextclassifier3::StatusOr<ScoredDocumentHit> PopRoot(
-    std::vector<ScoredDocumentHit>* scored_document_hits_heap,
-    const ScoredDocumentHitComparator& scored_document_hit_comparator) {
-  if (scored_document_hits_heap->empty()) {
-    // An invalid ScoredDocumentHit
-    return absl_ports::ResourceExhaustedError("Heap is empty");
+// Heapify the given term vector from top to bottom. Call it after add or
+// replace an element at the front of the vector.
+void HeapifyTermDown(std::vector<TermMetadata>& scored_terms,
+                     int target_subtree_root_index) {
+  int heap_size = scored_terms.size();
+  if (target_subtree_root_index >= heap_size) {
+    return;
+  }
+
+  // Initializes subtree root as the current minimum node.
+  int min = target_subtree_root_index;
+  // If we represent a heap in an array/vector, indices of left and right
+  // children can be calculated as such.
+  const int left = target_subtree_root_index * 2 + 1;
+  const int right = target_subtree_root_index * 2 + 2;
+
+  // If left child is smaller than current minimum.
+  if (left < heap_size &&
+      scored_terms.at(left).score < scored_terms.at(min).score) {
+    min = left;
+  }
+
+  // If right child is smaller than current minimum.
+  if (right < heap_size &&
+      scored_terms.at(right).score < scored_terms.at(min).score) {
+    min = right;
+  }
+
+  // If the minimum is not the subtree root, swap and continue heapifying the
+  // lower level subtree.
+  if (min != target_subtree_root_index) {
+    std::swap(scored_terms.at(min), scored_terms.at(target_subtree_root_index));
+    HeapifyTermDown(scored_terms, min);
+  }
+}
+
+// Heapify the given term vector from bottom to top. Call it after add an
+// element at the end of the vector.
+void HeapifyTermUp(std::vector<TermMetadata>& scored_terms,
+                   int target_subtree_child_index) {
+  // If we represent a heap in an array/vector, indices of root can be
+  // calculated as such.
+  const int root = (target_subtree_child_index + 1) / 2 - 1;
+
+  // If the current child is smaller than the root, swap and continue heapifying
+  // the upper level subtree
+  if (root >= 0 && scored_terms.at(target_subtree_child_index).score <
+                       scored_terms.at(root).score) {
+    std::swap(scored_terms.at(root),
+              scored_terms.at(target_subtree_child_index));
+    HeapifyTermUp(scored_terms, root);
+  }
+}
+
+TermMetadata PopRootTerm(std::vector<TermMetadata>& scored_terms) {
+  if (scored_terms.empty()) {
+    // Return an invalid TermMetadata as a sentinel value.
+    return TermMetadata(/*content_in=*/"", /*hit_count_in=*/-1);
   }
 
   // Steps to extract root from heap:
   // 1. copy out root
-  ScoredDocumentHit root = scored_document_hits_heap->at(0);
-  const size_t last_node_index = scored_document_hits_heap->size() - 1;
+  TermMetadata root = scored_terms.at(0);
+  const size_t last_node_index = scored_terms.size() - 1;
   // 2. swap root and the last node
-  std::swap(scored_document_hits_heap->at(0),
-            scored_document_hits_heap->at(last_node_index));
+  std::swap(scored_terms.at(0), scored_terms.at(last_node_index));
   // 3. remove last node
-  scored_document_hits_heap->pop_back();
+  scored_terms.pop_back();
   // 4. heapify root
-  Heapify(scored_document_hits_heap, /*target_subtree_root_index=*/0,
-          scored_document_hit_comparator);
+  HeapifyTermDown(scored_terms, /*target_subtree_root_index=*/0);
   return root;
 }
 
@@ -115,6 +160,42 @@ void BuildHeapInPlace(
   }
 }
 
+void PushToTermHeap(TermMetadata term, int number_to_return,
+                    std::vector<TermMetadata>& scored_terms_heap) {
+  if (scored_terms_heap.size() < number_to_return) {
+    scored_terms_heap.push_back(std::move(term));
+    // We insert at end, so we should heapify bottom up.
+    HeapifyTermUp(scored_terms_heap, scored_terms_heap.size() - 1);
+  } else if (scored_terms_heap.at(0).score < term.score) {
+    scored_terms_heap.at(0) = std::move(term);
+    // We insert at root, so we should heapify top down.
+    HeapifyTermDown(scored_terms_heap, /*target_subtree_root_index=*/0);
+  }
+}
+
+libtextclassifier3::StatusOr<ScoredDocumentHit> PopNextTopResultFromHeap(
+    std::vector<ScoredDocumentHit>* scored_document_hits_heap,
+    const ScoredDocumentHitComparator& scored_document_hit_comparator) {
+  if (scored_document_hits_heap->empty()) {
+    // An invalid ScoredDocumentHit
+    return absl_ports::ResourceExhaustedError("Heap is empty");
+  }
+
+  // Steps to extract root from heap:
+  // 1. copy out root
+  ScoredDocumentHit root = scored_document_hits_heap->at(0);
+  const size_t last_node_index = scored_document_hits_heap->size() - 1;
+  // 2. swap root and the last node
+  std::swap(scored_document_hits_heap->at(0),
+            scored_document_hits_heap->at(last_node_index));
+  // 3. remove last node
+  scored_document_hits_heap->pop_back();
+  // 4. heapify root
+  Heapify(scored_document_hits_heap, /*target_subtree_root_index=*/0,
+          scored_document_hit_comparator);
+  return root;
+}
+
 std::vector<ScoredDocumentHit> PopTopResultsFromHeap(
     std::vector<ScoredDocumentHit>* scored_document_hits_heap, int num_results,
     const ScoredDocumentHitComparator& scored_document_hit_comparator) {
@@ -123,7 +204,8 @@ std::vector<ScoredDocumentHit> PopTopResultsFromHeap(
       num_results, static_cast<int>(scored_document_hits_heap->size()));
   while (result_size-- > 0) {
     libtextclassifier3::StatusOr<ScoredDocumentHit> next_best_document_hit_or =
-        PopRoot(scored_document_hits_heap, scored_document_hit_comparator);
+        PopNextTopResultFromHeap(scored_document_hits_heap,
+                                 scored_document_hit_comparator);
     if (next_best_document_hit_or.ok()) {
       scored_document_hit_result.push_back(
           std::move(next_best_document_hit_or).ValueOrDie());
@@ -134,5 +216,15 @@ std::vector<ScoredDocumentHit> PopTopResultsFromHeap(
   return scored_document_hit_result;
 }
 
+std::vector<TermMetadata> PopAllTermsFromHeap(
+    std::vector<TermMetadata>& scored_terms_heap) {
+  std::vector<TermMetadata> top_term_result;
+  top_term_result.reserve(scored_terms_heap.size());
+  while (!scored_terms_heap.empty()) {
+    top_term_result.push_back(PopRootTerm(scored_terms_heap));
+  }
+  return top_term_result;
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/scoring/ranker.h b/icing/scoring/ranker.h
index 785c133..bfe1077 100644
--- a/icing/scoring/ranker.h
+++ b/icing/scoring/ranker.h
@@ -17,6 +17,8 @@
 
 #include <vector>
 
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/term-metadata.h"
 #include "icing/scoring/scored-document-hit.h"
 
 // Provides functionality to get the top N results from an unsorted vector.
@@ -31,6 +33,17 @@ void BuildHeapInPlace(
     std::vector<ScoredDocumentHit>* scored_document_hits,
     const ScoredDocumentHitComparator& scored_document_hit_comparator);
 
+// Returns the single next top result (i.e. the current root element) from the
+// given heap and remove it from the heap. The heap structure will be
+// maintained.
+//
+// Returns:
+//   The next top result element on success
+//   RESOURCE_EXHAUSTED_ERROR if heap is empty
+libtextclassifier3::StatusOr<ScoredDocumentHit> PopNextTopResultFromHeap(
+    std::vector<ScoredDocumentHit>* scored_document_hits_heap,
+    const ScoredDocumentHitComparator& scored_document_hit_comparator);
+
 // Returns the top num_results results from the given heap and remove those
 // results from the heap. An empty vector will be returned if heap is empty.
 //
@@ -39,6 +52,18 @@ std::vector<ScoredDocumentHit> PopTopResultsFromHeap(
     std::vector<ScoredDocumentHit>* scored_document_hits_heap, int num_results,
     const ScoredDocumentHitComparator& scored_document_hit_comparator);
 
+// The heap is a min-heap. So that we can avoid some push operations by
+// comparing to the root term, and only pushing if greater than root. The time
+// complexity for a single push is O(lgK) which K is the number_to_return.
+// REQUIRED: scored_terms_heap is not null.
+void PushToTermHeap(TermMetadata term, int number_to_return,
+                    std::vector<TermMetadata>& scored_terms_heap);
+
+// Return all terms from the given terms heap. And since the heap is a min-heap,
+// the output vector will be increasing order.
+// REQUIRED: scored_terms_heap is not null.
+std::vector<TermMetadata> PopAllTermsFromHeap(
+    std::vector<TermMetadata>& scored_terms_heap);
 }  // namespace lib
 }  // namespace icing
 
diff --git a/icing/scoring/ranker_benchmark.cc b/icing/scoring/ranker_benchmark.cc
index 8983dd9..c2f13de 100644
--- a/icing/scoring/ranker_benchmark.cc
+++ b/icing/scoring/ranker_benchmark.cc
@@ -27,7 +27,7 @@ namespace {
 //    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
 //    //icing/scoring:ranker_benchmark
 //
-//    $ blaze-bin/icing/scoring/ranker_benchmark --benchmarks=all
+//    $ blaze-bin/icing/scoring/ranker_benchmark --benchmark_filter=all
 //    --benchmark_memory_usage
 //
 // Run on an Android device:
@@ -38,7 +38,7 @@ namespace {
 //    $ adb push blaze-bin/icing/scoring/ranker_benchmark
 //    /data/local/tmp/
 //
-//    $ adb shell /data/local/tmp/ranker_benchmark --benchmarks=all
+//    $ adb shell /data/local/tmp/ranker_benchmark --benchmark_filter=all
 
 void BM_GetTopN(benchmark::State& state) {
   int num_to_score = state.range(0);
diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc
index c3ed40a..7cb5a95 100644
--- a/icing/scoring/score-and-rank_benchmark.cc
+++ b/icing/scoring/score-and-rank_benchmark.cc
@@ -49,7 +49,7 @@
 //    //icing/scoring:score-and-rank_benchmark
 //
 //    $ blaze-bin/icing/scoring/score-and-rank_benchmark
-//    --benchmarks=all --benchmark_memory_usage
+//    --benchmark_filter=all --benchmark_memory_usage
 //
 // Run on an Android device:
 //    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
@@ -59,7 +59,8 @@
 //    $ adb push blaze-bin/icing/scoring/score-and-rank_benchmark
 //    /data/local/tmp/
 //
-//    $ adb shell /data/local/tmp/score-and-rank_benchmark --benchmarks=all
+//    $ adb shell /data/local/tmp/score-and-rank_benchmark
+//    --benchmark_filter=all
 
 namespace icing {
 namespace lib {
@@ -88,6 +89,18 @@ DocumentProto CreateEmailDocument(int id, int document_score,
       .Build();
 }
 
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+    const Filesystem* filesystem, const std::string& base_dir,
+    const Clock* clock, const SchemaStore* schema_store) {
+  return DocumentStore::Create(
+      filesystem, base_dir, clock, schema_store,
+      /*force_recovery_and_revalidate_documents=*/false,
+      /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+      /*use_persistent_hash_map=*/false,
+      PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+      /*initialize_stats=*/nullptr);
+}
+
 void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
   const std::string base_dir = GetTestTempDir() + "/score_and_rank_benchmark";
   const std::string document_store_dir = base_dir + "/document_store";
@@ -96,26 +109,33 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
   // Creates file directories
   Filesystem filesystem;
   filesystem.DeleteDirectoryRecursively(base_dir.c_str());
-  filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
-  filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem, base_dir));
+  ASSERT_TRUE(
+      filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+  ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
 
   Clock clock;
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem, document_store_dir, &clock,
-                            schema_store.get()));
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem, schema_store_dir, &clock));
 
-  ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem, document_store_dir, &clock,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   ScoringSpecProto scoring_spec;
   scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(scoring_spec, document_store.get()));
-
+      ScoringProcessor::Create(scoring_spec, document_store.get(),
+                               schema_store.get(),
+                               clock.GetSystemTimeMilliseconds()));
   int num_to_score = state.range(0);
   int num_of_documents = state.range(1);
 
@@ -155,7 +175,6 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
         PopTopResultsFromHeap(&scored_document_hits, /*num_results=*/20,
                               scored_document_hit_comparator);
   }
-
   // Clean up
   document_store.reset();
   schema_store.reset();
@@ -195,26 +214,34 @@ void BM_ScoreAndRankDocumentHitsByCreationTime(benchmark::State& state) {
   // Creates file directories
   Filesystem filesystem;
   filesystem.DeleteDirectoryRecursively(base_dir.c_str());
-  filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
-  filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem, base_dir));
+  ASSERT_TRUE(
+      filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+  ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
 
   Clock clock;
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem, document_store_dir, &clock,
-                            schema_store.get()));
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem, schema_store_dir, &clock));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem, document_store_dir, &clock,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
-  ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   ScoringSpecProto scoring_spec;
   scoring_spec.set_rank_by(
       ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(scoring_spec, document_store.get()));
+      ScoringProcessor::Create(scoring_spec, document_store.get(),
+                               schema_store.get(),
+                               clock.GetSystemTimeMilliseconds()));
 
   int num_to_score = state.range(0);
   int num_of_documents = state.range(1);
@@ -295,25 +322,33 @@ void BM_ScoreAndRankDocumentHitsNoScoring(benchmark::State& state) {
   // Creates file directories
   Filesystem filesystem;
   filesystem.DeleteDirectoryRecursively(base_dir.c_str());
-  filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
-  filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
-
-  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
-                             SchemaStore::Create(&filesystem, base_dir));
+  ASSERT_TRUE(
+      filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+  ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
 
   Clock clock;
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem, document_store_dir, &clock,
-                            schema_store.get()));
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem, schema_store_dir, &clock));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem, document_store_dir, &clock,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
-  ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   ScoringSpecProto scoring_spec;
   scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(scoring_spec, document_store.get()));
+      ScoringProcessor::Create(scoring_spec, document_store.get(),
+                               schema_store.get(),
+                               clock.GetSystemTimeMilliseconds()));
 
   int num_to_score = state.range(0);
   int num_of_documents = state.range(1);
@@ -381,6 +416,127 @@ BENCHMARK(BM_ScoreAndRankDocumentHitsNoScoring)
     ->ArgPair(10000, 18000)
     ->ArgPair(10000, 20000);
 
+void BM_ScoreAndRankDocumentHitsByRelevanceScoring(benchmark::State& state) {
+  const std::string base_dir = GetTestTempDir() + "/score_and_rank_benchmark";
+  const std::string document_store_dir = base_dir + "/document_store";
+  const std::string schema_store_dir = base_dir + "/schema_store";
+
+  // Creates file directories
+  Filesystem filesystem;
+  filesystem.DeleteDirectoryRecursively(base_dir.c_str());
+  ASSERT_TRUE(
+      filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+  ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
+
+  Clock clock;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem, schema_store_dir, &clock));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem, document_store_dir, &clock,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(scoring_spec, document_store.get(),
+                               schema_store.get(),
+                               clock.GetSystemTimeMilliseconds()));
+
+  int num_to_score = state.range(0);
+  int num_of_documents = state.range(1);
+
+  std::mt19937 random_generator;
+  std::uniform_int_distribution<int> distribution(
+      1, std::numeric_limits<int>::max());
+
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+
+  // Puts documents into document store
+  std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos;
+  for (int i = 0; i < num_of_documents; i++) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentId document_id,
+        document_store->Put(CreateEmailDocument(
+                                /*id=*/i, /*document_score=*/1,
+                                /*creation_timestamp_ms=*/1),
+                            /*num_tokens=*/10));
+    DocHitInfoTermFrequencyPair doc_hit =
+        DocHitInfo(document_id, section_id_mask);
+    // Set five matches for term "foo" for each document hit.
+    doc_hit.UpdateSection(section_id, /*hit_term_frequency=*/5);
+    doc_hit_infos.push_back(doc_hit);
+  }
+
+  ScoredDocumentHitComparator scored_document_hit_comparator(
+      /*is_descending=*/true);
+
+  for (auto _ : state) {
+    // Creates a dummy DocHitInfoIterator with results, we need to pause the
+    // timer here so that the cost of copying test data is not included.
+    state.PauseTiming();
+    std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+        std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+    // Create a query term iterator that assigns the document hits to term
+    // "foo".
+    std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+        query_term_iterators;
+    query_term_iterators["foo"] =
+        std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+    state.ResumeTiming();
+
+    std::vector<ScoredDocumentHit> scored_document_hits =
+        scoring_processor->Score(std::move(doc_hit_info_iterator), num_to_score,
+                                 &query_term_iterators);
+
+    BuildHeapInPlace(&scored_document_hits, scored_document_hit_comparator);
+    // Ranks and gets the first page, 20 is a common page size
+    std::vector<ScoredDocumentHit> results =
+        PopTopResultsFromHeap(&scored_document_hits, /*num_results=*/20,
+                              scored_document_hit_comparator);
+  }
+
+  // Clean up
+  document_store.reset();
+  schema_store.reset();
+  filesystem.DeleteDirectoryRecursively(base_dir.c_str());
+}
+BENCHMARK(BM_ScoreAndRankDocumentHitsByRelevanceScoring)
+    // num_to_score, num_of_documents in document store
+    ->ArgPair(1000, 30000)
+    ->ArgPair(3000, 30000)
+    ->ArgPair(5000, 30000)
+    ->ArgPair(7000, 30000)
+    ->ArgPair(9000, 30000)
+    ->ArgPair(11000, 30000)
+    ->ArgPair(13000, 30000)
+    ->ArgPair(15000, 30000)
+    ->ArgPair(17000, 30000)
+    ->ArgPair(19000, 30000)
+    ->ArgPair(21000, 30000)
+    ->ArgPair(23000, 30000)
+    ->ArgPair(25000, 30000)
+    ->ArgPair(27000, 30000)
+    ->ArgPair(29000, 30000)
+    // Starting from this line, we're trying to see if num_of_documents affects
+    // performance
+    ->ArgPair(10000, 10000)
+    ->ArgPair(10000, 12000)
+    ->ArgPair(10000, 14000)
+    ->ArgPair(10000, 16000)
+    ->ArgPair(10000, 18000)
+    ->ArgPair(10000, 20000);
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/scoring/scored-document-hit.cc b/icing/scoring/scored-document-hit.cc
new file mode 100644
index 0000000..f519a16
--- /dev/null
+++ b/icing/scoring/scored-document-hit.cc
@@ -0,0 +1,30 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/scored-document-hit.h"
+
+namespace icing {
+namespace lib {
+
+JoinedScoredDocumentHit ScoredDocumentHit::Converter::operator()(
+    ScoredDocumentHit&& scored_doc_hit) const {
+  double final_score = scored_doc_hit.score();
+  return JoinedScoredDocumentHit(
+      final_score,
+      /*parent_scored_document_hit=*/std::move(scored_doc_hit),
+      /*child_scored_document_hits=*/{});
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/scoring/scored-document-hit.h b/icing/scoring/scored-document-hit.h
index c2e51b8..5fc2f3a 100644
--- a/icing/scoring/scored-document-hit.h
+++ b/icing/scoring/scored-document-hit.h
@@ -24,11 +24,19 @@
 namespace icing {
 namespace lib {
 
+class JoinedScoredDocumentHit;
+
 // A data class containing information about the document, hit sections, and a
 // score. The score is calculated against both the document and the hit
 // sections.
 class ScoredDocumentHit {
  public:
+  class Converter {
+   public:
+    JoinedScoredDocumentHit operator()(
+        ScoredDocumentHit&& scored_doc_hit) const;
+  };
+
   ScoredDocumentHit(DocumentId document_id, SectionIdMask hit_section_id_mask,
                     double score)
       : document_id_(document_id),
@@ -53,8 +61,8 @@ class ScoredDocumentHit {
   double score_;
 } __attribute__((packed));
 
-static_assert(sizeof(ScoredDocumentHit) == 14,
-              "Size of ScoredDocHit should be 14");
+static_assert(sizeof(ScoredDocumentHit) == 20,
+              "Size of ScoredDocHit should be 20");
 static_assert(icing_is_packed_pod<ScoredDocumentHit>::value, "go/icing-ubsan");
 
 // A custom comparator for ScoredDocumentHit that determines which
@@ -71,13 +79,79 @@ class ScoredDocumentHitComparator {
 
   bool operator()(const ScoredDocumentHit& lhs,
                   const ScoredDocumentHit& rhs) const {
-    return is_descending_ == !(lhs < rhs);
+    // STL comparator requirement: equal MUST return false.
+    // If writing `return is_descending_ == !(lhs < rhs)`:
+    // - When lhs == rhs, !(lhs < rhs) is true
+    // - If is_descending_ is true, then we return true for equal case!
+    if (is_descending_) {
+      return rhs < lhs;
+    }
+    return lhs < rhs;
   }
 
  private:
   bool is_descending_;
 };
 
+// A data class containing information about a composite document after joining,
+// including final score, parent ScoredDocumentHit, and a vector of all child
+// ScoredDocumentHits. The final score is calculated by the strategy specified
+// in join spec/rank strategy. It could be aggregated score, raw parent doc
+// score, or anything else.
+//
+// ScoredDocumentHitsRanker may store ScoredDocumentHit or
+// JoinedScoredDocumentHit.
+// - We could've created a virtual class for them and ScoredDocumentHitsRanker
+//   uses the abstract type.
+// - However, Icing lib caches ScoredDocumentHitsRanker (which contains a list
+//   of (Joined)ScoredDocumentHits) in ResultState. Inheriting the virtual class
+//   makes both classes have additional 8 bytes for vtable, which increases 40%
+//   and 15% memory usage respectively.
+// - Also since JoinedScoredDocumentHit is a super-set of ScoredDocumentHit,
+//   let's avoid the common virtual class and instead implement a convert
+//   function (original type -> JoinedScoredDocumentHit) for each class, so
+//   ScoredDocumentHitsRanker::PopNext can return a common type (i.e.
+//   JoinedScoredDocumentHit).
+class JoinedScoredDocumentHit {
+ public:
+  class Converter {
+   public:
+    JoinedScoredDocumentHit operator()(
+        JoinedScoredDocumentHit&& scored_doc_hit) const {
+      return scored_doc_hit;
+    }
+  };
+
+  explicit JoinedScoredDocumentHit(
+      double final_score, ScoredDocumentHit parent_scored_document_hit,
+      std::vector<ScoredDocumentHit> child_scored_document_hits)
+      : final_score_(final_score),
+        parent_scored_document_hit_(std::move(parent_scored_document_hit)),
+        child_scored_document_hits_(std::move(child_scored_document_hits)) {}
+
+  bool operator<(const JoinedScoredDocumentHit& other) const {
+    if (final_score_ != other.final_score_) {
+      return final_score_ < other.final_score_;
+    }
+    return parent_scored_document_hit_ < other.parent_scored_document_hit_;
+  }
+
+  double final_score() const { return final_score_; }
+
+  const ScoredDocumentHit& parent_scored_document_hit() const {
+    return parent_scored_document_hit_;
+  }
+
+  const std::vector<ScoredDocumentHit>& child_scored_document_hits() const {
+    return child_scored_document_hits_;
+  }
+
+ private:
+  double final_score_;
+  ScoredDocumentHit parent_scored_document_hit_;
+  std::vector<ScoredDocumentHit> child_scored_document_hits_;
+} __attribute__((packed));
+
 }  // namespace lib
 }  // namespace icing
 
diff --git a/icing/scoring/scored-document-hit_test.cc b/icing/scoring/scored-document-hit_test.cc
new file mode 100644
index 0000000..cb9703b
--- /dev/null
+++ b/icing/scoring/scored-document-hit_test.cc
@@ -0,0 +1,77 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/scored-document-hit.h"
+
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::DoubleEq;
+using ::testing::IsEmpty;
+
+TEST(ScoredDocumentHitTest, ScoredDocumentHitConvertToJoinedScoredDocumentHit) {
+  ScoredDocumentHit::Converter converter;
+
+  double score = 2.0;
+  ScoredDocumentHit scored_document_hit(/*document_id=*/5,
+                                        /*section_id_mask=*/49, score);
+
+  JoinedScoredDocumentHit joined_scored_document_hit =
+      converter(ScoredDocumentHit(scored_document_hit));
+  EXPECT_THAT(joined_scored_document_hit.final_score(), DoubleEq(score));
+  EXPECT_THAT(joined_scored_document_hit.parent_scored_document_hit(),
+              EqualsScoredDocumentHit(scored_document_hit));
+  EXPECT_THAT(joined_scored_document_hit.child_scored_document_hits(),
+              IsEmpty());
+}
+
+TEST(ScoredDocumentHitTest,
+     JoinedScoredDocumentHitConvertToJoinedScoredDocumentHit) {
+  JoinedScoredDocumentHit::Converter converter;
+
+  ScoredDocumentHit parent_scored_document_hit(/*document_id=*/5,
+                                               /*section_id_mask=*/49,
+                                               /*score=*/1.0);
+  std::vector<ScoredDocumentHit> child_scored_document_hits{
+      ScoredDocumentHit(/*document_id=*/1,
+                        /*section_id_mask=*/1,
+                        /*score=*/2.0),
+      ScoredDocumentHit(/*document_id=*/2,
+                        /*section_id_mask=*/2,
+                        /*score=*/3.0),
+      ScoredDocumentHit(/*document_id=*/3,
+                        /*section_id_mask=*/3,
+                        /*score=*/4.0)};
+
+  JoinedScoredDocumentHit joined_scored_document_hit(
+      /*final_score=*/12345.6789, std::move(parent_scored_document_hit),
+      std::move(child_scored_document_hits));
+  EXPECT_THAT(converter(JoinedScoredDocumentHit(joined_scored_document_hit)),
+              EqualsJoinedScoredDocumentHit(joined_scored_document_hit));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/scoring/scored-document-hits-ranker.h b/icing/scoring/scored-document-hits-ranker.h
new file mode 100644
index 0000000..9b76ce7
--- /dev/null
+++ b/icing/scoring/scored-document-hits-ranker.h
@@ -0,0 +1,62 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_SCORED_DOCUMENT_HITS_RANKER_H_
+#define ICING_SCORING_SCORED_DOCUMENT_HITS_RANKER_H_
+
+#include "icing/scoring/scored-document-hit.h"
+
+namespace icing {
+namespace lib {
+
+// TODO(sungyc): re-evaluate other similar implementations (e.g. std::sort +
+//               std::queue/std::vector). Also revisit the capacity shrinking
+//               issue for PopNext().
+
+// ScoredDocumentHitsRanker is an interface class for ranking
+// ScoredDocumentHits.
+class ScoredDocumentHitsRanker {
+ public:
+  virtual ~ScoredDocumentHitsRanker() = default;
+
+  // Pop the next top JoinedScoredDocumentHit and return. It is undefined to
+  // call PopNext on an empty ranker, so the caller should check if it is not
+  // empty before calling.
+  //
+  // Note: ranker may store ScoredDocumentHit or JoinedScoredDocumentHit. We can
+  // add template for this interface, but since JoinedScoredDocumentHit is a
+  // superset of ScoredDocumentHit, we unify the return type of PopNext to use
+  // the superset type JoinedScoredDocumentHit in order to make it simple, and
+  // rankers storing ScoredDocumentHit should convert it to
+  // JoinedScoredDocumentHit before returning. It makes the implementation
+  // simpler, especially for ResultRetriever, which now only needs to deal with
+  // one single return format.
+  virtual JoinedScoredDocumentHit PopNext() = 0;
+
+  // Truncates the remaining ScoredDocumentHits to the given size. The best
+  // ScoredDocumentHits (according to the ranking policy) should be kept.
+  // If new_size is invalid (< 0), or greater or equal to # of remaining
+  // ScoredDocumentHits, then no action will be taken. Otherwise truncates the
+  // the remaining ScoredDocumentHits to the given size.
+  virtual void TruncateHitsTo(int new_size) = 0;
+
+  virtual int size() const = 0;
+
+  virtual bool empty() const = 0;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_SCORED_DOCUMENT_HITS_RANKER_H_
diff --git a/icing/scoring/scorer-factory.cc b/icing/scoring/scorer-factory.cc
new file mode 100644
index 0000000..e56f10c
--- /dev/null
+++ b/icing/scoring/scorer-factory.cc
@@ -0,0 +1,242 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/scorer-factory.h"
+
+#include <memory>
+#include <unordered_map>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/scoring/advanced_scoring/advanced-scorer.h"
+#include "icing/scoring/bm25f-calculator.h"
+#include "icing/scoring/scorer.h"
+#include "icing/scoring/section-weights.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+class DocumentScoreScorer : public Scorer {
+ public:
+  explicit DocumentScoreScorer(const DocumentStore* document_store,
+                               double default_score)
+      : document_store_(*document_store), default_score_(default_score) {}
+
+  double GetScore(const DocHitInfo& hit_info,
+                  const DocHitInfoIterator*) override {
+    ICING_ASSIGN_OR_RETURN(
+        DocumentAssociatedScoreData score_data,
+        document_store_.GetDocumentAssociatedScoreData(hit_info.document_id()),
+        default_score_);
+
+    return static_cast<double>(score_data.document_score());
+  }
+
+ private:
+  const DocumentStore& document_store_;
+  double default_score_;
+};
+
+class DocumentCreationTimestampScorer : public Scorer {
+ public:
+  explicit DocumentCreationTimestampScorer(const DocumentStore* document_store,
+                                           double default_score)
+      : document_store_(*document_store), default_score_(default_score) {}
+
+  double GetScore(const DocHitInfo& hit_info,
+                  const DocHitInfoIterator*) override {
+    ICING_ASSIGN_OR_RETURN(
+        DocumentAssociatedScoreData score_data,
+        document_store_.GetDocumentAssociatedScoreData(hit_info.document_id()),
+        default_score_);
+
+    return static_cast<double>(score_data.creation_timestamp_ms());
+  }
+
+ private:
+  const DocumentStore& document_store_;
+  double default_score_;
+};
+
+class RelevanceScoreScorer : public Scorer {
+ public:
+  explicit RelevanceScoreScorer(
+      std::unique_ptr<SectionWeights> section_weights,
+      std::unique_ptr<Bm25fCalculator> bm25f_calculator, double default_score)
+      : section_weights_(std::move(section_weights)),
+        bm25f_calculator_(std::move(bm25f_calculator)),
+        default_score_(default_score) {}
+
+  void PrepareToScore(
+      std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
+          query_term_iterators) override {
+    bm25f_calculator_->PrepareToScore(query_term_iterators);
+  }
+
+  double GetScore(const DocHitInfo& hit_info,
+                  const DocHitInfoIterator* query_it) override {
+    if (!query_it) {
+      return default_score_;
+    }
+
+    return static_cast<double>(
+        bm25f_calculator_->ComputeScore(query_it, hit_info, default_score_));
+  }
+
+ private:
+  std::unique_ptr<SectionWeights> section_weights_;
+  std::unique_ptr<Bm25fCalculator> bm25f_calculator_;
+  double default_score_;
+};
+
+// A scorer which assigns scores to documents based on usage reports.
+class UsageScorer : public Scorer {
+ public:
+  UsageScorer(const DocumentStore* document_store,
+              ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+              double default_score, int64_t current_time_ms)
+      : document_store_(*document_store),
+        ranking_strategy_(ranking_strategy),
+        default_score_(default_score),
+        current_time_ms_(current_time_ms) {}
+
+  double GetScore(const DocHitInfo& hit_info,
+                  const DocHitInfoIterator*) override {
+    std::optional<UsageStore::UsageScores> usage_scores =
+        document_store_.GetUsageScores(hit_info.document_id(),
+                                       current_time_ms_);
+    if (!usage_scores) {
+      // If there's no UsageScores entry present for this doc, then just
+      // treat it as a default instance.
+      usage_scores = UsageStore::UsageScores();
+    }
+
+    switch (ranking_strategy_) {
+      case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT:
+        return usage_scores->usage_type1_count;
+      case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT:
+        return usage_scores->usage_type2_count;
+      case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT:
+        return usage_scores->usage_type3_count;
+      case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP:
+        return usage_scores->usage_type1_last_used_timestamp_s * 1000.0;
+      case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP:
+        return usage_scores->usage_type2_last_used_timestamp_s * 1000.0;
+      case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
+        return usage_scores->usage_type3_last_used_timestamp_s * 1000.0;
+      default:
+        // This shouldn't happen if this scorer is used correctly.
+        return default_score_;
+    }
+  }
+
+ private:
+  const DocumentStore& document_store_;
+  ScoringSpecProto::RankingStrategy::Code ranking_strategy_;
+  double default_score_;
+  int64_t current_time_ms_;
+};
+
+// A special scorer which does nothing but assigns the default score to each
+// document. This is used especially when no scoring is required in a query.
+class NoScorer : public Scorer {
+ public:
+  explicit NoScorer(double default_score) : default_score_(default_score) {}
+
+  double GetScore(const DocHitInfo& hit_info,
+                  const DocHitInfoIterator*) override {
+    return default_score_;
+  }
+
+ private:
+  double default_score_;
+};
+
+namespace scorer_factory {
+
+libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create(
+    const ScoringSpecProto& scoring_spec, double default_score,
+    const DocumentStore* document_store, const SchemaStore* schema_store,
+    int64_t current_time_ms, const JoinChildrenFetcher* join_children_fetcher) {
+  ICING_RETURN_ERROR_IF_NULL(document_store);
+  ICING_RETURN_ERROR_IF_NULL(schema_store);
+
+  if (!scoring_spec.advanced_scoring_expression().empty() &&
+      scoring_spec.rank_by() !=
+          ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION) {
+    return absl_ports::InvalidArgumentError(
+        "Advanced scoring is not enabled, but the advanced scoring expression "
+        "is not empty!");
+  }
+
+  switch (scoring_spec.rank_by()) {
+    case ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE:
+      return std::make_unique<DocumentScoreScorer>(document_store,
+                                                   default_score);
+    case ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP:
+      return std::make_unique<DocumentCreationTimestampScorer>(document_store,
+                                                               default_score);
+    case ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE: {
+      ICING_ASSIGN_OR_RETURN(
+          std::unique_ptr<SectionWeights> section_weights,
+          SectionWeights::Create(schema_store, scoring_spec));
+
+      auto bm25f_calculator = std::make_unique<Bm25fCalculator>(
+          document_store, section_weights.get(), current_time_ms);
+      return std::make_unique<RelevanceScoreScorer>(std::move(section_weights),
+                                                    std::move(bm25f_calculator),
+                                                    default_score);
+    }
+    case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT:
+      [[fallthrough]];
+    case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT:
+      [[fallthrough]];
+    case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT:
+      [[fallthrough]];
+    case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP:
+      [[fallthrough]];
+    case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP:
+      [[fallthrough]];
+    case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
+      return std::make_unique<UsageScorer>(document_store,
+                                           scoring_spec.rank_by(),
+                                           default_score, current_time_ms);
+    case ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION:
+      if (scoring_spec.advanced_scoring_expression().empty()) {
+        return absl_ports::InvalidArgumentError(
+            "Advanced scoring is enabled, but the expression is empty!");
+      }
+      return AdvancedScorer::Create(scoring_spec, default_score, document_store,
+                                    schema_store, current_time_ms,
+                                    join_children_fetcher);
+    case ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE:
+      // Use join aggregate score to rank. Since the aggregation score is
+      // calculated by child documents after joining (in JoinProcessor), we can
+      // simply use NoScorer for parent documents.
+      [[fallthrough]];
+    case ScoringSpecProto::RankingStrategy::NONE:
+      return std::make_unique<NoScorer>(default_score);
+  }
+}
+
+}  // namespace scorer_factory
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/scoring/scorer-factory.h b/icing/scoring/scorer-factory.h
new file mode 100644
index 0000000..659bebd
--- /dev/null
+++ b/icing/scoring/scorer-factory.h
@@ -0,0 +1,49 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_SCORER_FACTORY_H_
+#define ICING_SCORING_SCORER_FACTORY_H_
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/scoring/scorer.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+namespace scorer_factory {
+
+// Factory function to create a Scorer which does not take ownership of any
+// input components (DocumentStore), and all pointers must refer to valid
+// objects that outlive the created Scorer instance. The default score will be
+// returned only when the scorer fails to find or calculate a score for the
+// document.
+//
+// Returns:
+//   A Scorer on success
+//   FAILED_PRECONDITION on any null pointer input
+//   INVALID_ARGUMENT if fails to create an instance
+libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create(
+    const ScoringSpecProto& scoring_spec, double default_score,
+    const DocumentStore* document_store, const SchemaStore* schema_store,
+    int64_t current_time_ms,
+    const JoinChildrenFetcher* join_children_fetcher = nullptr);
+
+}  // namespace scorer_factory
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_SCORER_FACTORY_H_
diff --git a/icing/scoring/scorer-test-utils.h b/icing/scoring/scorer-test-utils.h
new file mode 100644
index 0000000..e8ca853
--- /dev/null
+++ b/icing/scoring/scorer-test-utils.h
@@ -0,0 +1,77 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_SCORER_TEST_UTILS_H_
+#define ICING_SCORING_SCORER_TEST_UTILS_H_
+
+#include "icing/proto/scoring.pb.h"
+
+namespace icing {
+namespace lib {
+
+enum class ScorerTestingMode { kNormal, kAdvanced };
+
+inline ScoringSpecProto CreateScoringSpecForRankingStrategy(
+    ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+    ScorerTestingMode testing_mode) {
+  ScoringSpecProto scoring_spec;
+  if (testing_mode != ScorerTestingMode::kAdvanced) {
+    scoring_spec.set_rank_by(ranking_strategy);
+    return scoring_spec;
+  }
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+  switch (ranking_strategy) {
+    case ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE:
+      scoring_spec.set_advanced_scoring_expression("this.documentScore()");
+      return scoring_spec;
+    case ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP:
+      scoring_spec.set_advanced_scoring_expression("this.creationTimestamp()");
+      return scoring_spec;
+    case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT:
+      scoring_spec.set_advanced_scoring_expression("this.usageCount(1)");
+      return scoring_spec;
+    case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT:
+      scoring_spec.set_advanced_scoring_expression("this.usageCount(2)");
+      return scoring_spec;
+    case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT:
+      scoring_spec.set_advanced_scoring_expression("this.usageCount(3)");
+      return scoring_spec;
+    case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP:
+      scoring_spec.set_advanced_scoring_expression(
+          "this.usageLastUsedTimestamp(1)");
+      return scoring_spec;
+    case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP:
+      scoring_spec.set_advanced_scoring_expression(
+          "this.usageLastUsedTimestamp(2)");
+      return scoring_spec;
+    case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
+      scoring_spec.set_advanced_scoring_expression(
+          "this.usageLastUsedTimestamp(3)");
+      return scoring_spec;
+    case ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE:
+      scoring_spec.set_advanced_scoring_expression("this.relevanceScore()");
+      return scoring_spec;
+    case ScoringSpecProto::RankingStrategy::NONE:
+    case ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE:
+    case ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION:
+      scoring_spec.set_rank_by(ranking_strategy);
+      return scoring_spec;
+  }
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_SCORER_TEST_UTILS_H_
diff --git a/icing/scoring/scorer.cc b/icing/scoring/scorer.cc
deleted file mode 100644
index ab5308c..0000000
--- a/icing/scoring/scorer.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/scoring/scorer.h"
-
-#include <memory>
-
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/absl_ports/canonical_errors.h"
-#include "icing/proto/scoring.pb.h"
-#include "icing/store/document-associated-score-data.h"
-#include "icing/store/document-id.h"
-#include "icing/store/document-store.h"
-#include "icing/util/status-macros.h"
-
-namespace icing {
-namespace lib {
-
-class DocumentScoreScorer : public Scorer {
- public:
-  explicit DocumentScoreScorer(const DocumentStore* document_store,
-                               double default_score)
-      : document_store_(*document_store), default_score_(default_score) {}
-
-  double GetScore(DocumentId document_id) override {
-    ICING_ASSIGN_OR_RETURN(
-        DocumentAssociatedScoreData score_data,
-        document_store_.GetDocumentAssociatedScoreData(document_id),
-        default_score_);
-
-    return static_cast<double>(score_data.document_score());
-  }
-
- private:
-  const DocumentStore& document_store_;
-  double default_score_;
-};
-
-class DocumentCreationTimestampScorer : public Scorer {
- public:
-  explicit DocumentCreationTimestampScorer(const DocumentStore* document_store,
-                                           double default_score)
-      : document_store_(*document_store), default_score_(default_score) {}
-
-  double GetScore(DocumentId document_id) override {
-    ICING_ASSIGN_OR_RETURN(
-        DocumentAssociatedScoreData score_data,
-        document_store_.GetDocumentAssociatedScoreData(document_id),
-        default_score_);
-
-    return static_cast<double>(score_data.creation_timestamp_ms());
-  }
-
- private:
-  const DocumentStore& document_store_;
-  double default_score_;
-};
-
-// A special scorer which does nothing but assigns the default score to each
-// document. This is used especially when no scoring is required in a query.
-class NoScorer : public Scorer {
- public:
-  explicit NoScorer(double default_score) : default_score_(default_score) {}
-
-  double GetScore(DocumentId document_id) override { return default_score_; }
-
- private:
-  double default_score_;
-};
-
-libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Scorer::Create(
-    ScoringSpecProto::RankingStrategy::Code rank_by, double default_score,
-    const DocumentStore* document_store) {
-  ICING_RETURN_ERROR_IF_NULL(document_store);
-
-  switch (rank_by) {
-    case ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE:
-      return std::make_unique<DocumentScoreScorer>(document_store,
-                                                   default_score);
-    case ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP:
-      return std::make_unique<DocumentCreationTimestampScorer>(document_store,
-                                                               default_score);
-    case ScoringSpecProto::RankingStrategy::NONE:
-      return std::make_unique<NoScorer>(default_score);
-  }
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/scoring/scorer.h b/icing/scoring/scorer.h
index 55c6b5c..ec48502 100644
--- a/icing/scoring/scorer.h
+++ b/icing/scoring/scorer.h
@@ -16,11 +16,11 @@
 #define ICING_SCORING_SCORER_H_
 
 #include <memory>
+#include <unordered_map>
 
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/proto/scoring.pb.h"
-#include "icing/store/document-id.h"
-#include "icing/store/document-store.h"
 
 namespace icing {
 namespace lib {
@@ -30,34 +30,30 @@ class Scorer {
  public:
   virtual ~Scorer() = default;
 
-  // Factory function to create a Scorer which does not take ownership of any
-  // input components (DocumentStore), and all pointers must refer to valid
-  // objects that outlive the created Scorer instance. The default score will be
-  // returned only when the scorer fails to find or calculate a score for the
-  // document.
-  //
-  // Returns:
-  //   A Scorer on success
-  //   FAILED_PRECONDITION on any null pointer input
-  //   INVALID_ARGUMENT if fails to create an instance
-  static libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create(
-      ScoringSpecProto::RankingStrategy::Code rank_by, double default_score,
-      const DocumentStore* document_store);
-
   // Returns a non-negative score of a document. The score can be a
   // document-associated score which comes from the DocumentProto directly, an
-  // accumulated score, or even an inferred score. If it fails to find or
-  // calculate a score, the user-provided default score will be returned.
+  // accumulated score, a relevance score, or even an inferred score. If it
+  // fails to find or calculate a score, the user-provided default score will be
+  // returned.
   //
   // Some examples of possible scores:
   // 1. Document-associated scores: document score, creation timestamp score.
   // 2. Accumulated scores: usage count score.
   // 3. Inferred scores: a score calculated by a machine learning model.
+  // 4. Relevance score: computed as BM25F score.
   //
   // NOTE: This method is performance-sensitive as it's called for every
   // potential result document. We're trying to avoid returning StatusOr<double>
   // to save a little more time and memory.
-  virtual double GetScore(DocumentId document_id) = 0;
+  virtual double GetScore(const DocHitInfo& hit_info,
+                          const DocHitInfoIterator* query_it = nullptr) = 0;
+
+  // Currently only overriden by the RelevanceScoreScorer.
+  // NOTE: the query_term_iterators map must
+  // outlive the scorer, see bm25f-calculator for more details.
+  virtual void PrepareToScore(
+      std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
+          query_term_iterators) {}
 };
 
 }  // namespace lib
diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
index 4dda603..5194c7f 100644
--- a/icing/scoring/scorer_test.cc
+++ b/icing/scoring/scorer_test.cc
@@ -21,10 +21,16 @@
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
+#include "icing/scoring/scorer-factory.h"
+#include "icing/scoring/scorer-test-utils.h"
+#include "icing/scoring/section-weights.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
 #include "icing/testing/common-matchers.h"
@@ -36,9 +42,8 @@ namespace lib {
 
 namespace {
 using ::testing::Eq;
-using ::testing::Test;
 
-class ScorerTest : public Test {
+class ScorerTest : public ::testing::TestWithParam<ScorerTestingMode> {
  protected:
   ScorerTest()
       : test_dir_(GetTestTempDir() + "/icing"),
@@ -54,23 +59,34 @@ class ScorerTest : public Test {
     fake_clock2_.SetSystemTimeMilliseconds(1572200000000);
 
     ICING_ASSERT_OK_AND_ASSIGN(
-        schema_store_, SchemaStore::Create(&filesystem_, schema_store_dir_));
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock1_));
 
     ICING_ASSERT_OK_AND_ASSIGN(
-        document_store_,
-        DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock1_,
-                              schema_store_.get()));
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, doc_store_dir_, &fake_clock1_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    document_store_ = std::move(create_result.document_store);
 
     // Creates a simple email schema
-    SchemaProto test_email_schema;
-    auto type_config = test_email_schema.add_types();
-    type_config->set_schema_type("email");
-    auto subject = type_config->add_properties();
-    subject->set_property_name("subject");
-    subject->set_data_type(PropertyConfigProto::DataType::STRING);
-    subject->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-    ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
+    SchemaProto test_email_schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("subject")
+                    .SetDataType(TYPE_STRING)
+                    .SetCardinality(CARDINALITY_REQUIRED)))
+            .Build();
+
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        test_email_schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
   }
 
   void TearDown() override {
@@ -81,10 +97,16 @@ class ScorerTest : public Test {
 
   DocumentStore* document_store() { return document_store_.get(); }
 
+  SchemaStore* schema_store() { return schema_store_.get(); }
+
   const FakeClock& fake_clock1() { return fake_clock1_; }
 
   const FakeClock& fake_clock2() { return fake_clock2_; }
 
+  void SetFakeClock1Time(int64_t new_time) {
+    fake_clock1_.SetSystemTimeMilliseconds(new_time);
+  }
+
  private:
   const std::string test_dir_;
   const std::string doc_store_dir_;
@@ -96,24 +118,53 @@ class ScorerTest : public Test {
   FakeClock fake_clock2_;
 };
 
-TEST_F(ScorerTest, CreationWithNullPointerShouldFail) {
-  EXPECT_THAT(Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
-                             /*default_score=*/0, /*document_store=*/nullptr),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+                              int64_t timestamp_ms,
+                              UsageReport::UsageType usage_type) {
+  UsageReport usage_report;
+  usage_report.set_document_namespace(name_space);
+  usage_report.set_document_uri(uri);
+  usage_report.set_usage_timestamp_ms(timestamp_ms);
+  usage_report.set_usage_type(usage_type);
+  return usage_report;
+}
+
+TEST_P(ScorerTest, CreationWithNullDocumentStoreShouldFail) {
+  EXPECT_THAT(
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
+          /*default_score=*/0, /*document_store=*/nullptr, schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
-TEST_F(ScorerTest, ShouldGetDefaultScore) {
+TEST_P(ScorerTest, CreationWithNullSchemaStoreShouldFail) {
+  EXPECT_THAT(
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
+          /*default_score=*/0, document_store(),
+          /*schema_store=*/nullptr, fake_clock1().GetSystemTimeMilliseconds()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_P(ScorerTest, ShouldGetDefaultScoreIfDocumentDoesntExist) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
-                     /*default_score=*/10, document_store()));
-
-  DocumentId non_existing_document_id = 1;
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
+          /*default_score=*/10, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+
+  // Non existent document id
+  DocHitInfo docHitInfo = DocHitInfo(/*document_id_in=*/1);
   // The caller-provided default score is returned
-  EXPECT_THAT(scorer->GetScore(non_existing_document_id), Eq(10));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
 }
 
-TEST_F(ScorerTest, ShouldGetDefaultDocumentScore) {
+TEST_P(ScorerTest, ShouldGetDefaultDocumentScore) {
   // Creates a test document with the default document score 0
   DocumentProto test_document =
       DocumentBuilder()
@@ -127,13 +178,17 @@ TEST_F(ScorerTest, ShouldGetDefaultDocumentScore) {
                              document_store()->Put(test_document));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
-                     /*default_score=*/10, document_store()));
-
-  EXPECT_THAT(scorer->GetScore(document_id), Eq(0));
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
+          /*default_score=*/10, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(0));
 }
 
-TEST_F(ScorerTest, ShouldGetCorrectDocumentScore) {
+TEST_P(ScorerTest, ShouldGetCorrectDocumentScore) {
   // Creates a test document with document score 5
   DocumentProto test_document =
       DocumentBuilder()
@@ -148,13 +203,44 @@ TEST_F(ScorerTest, ShouldGetCorrectDocumentScore) {
                              document_store()->Put(test_document));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
-                     /*default_score=*/0, document_store()));
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
+          /*default_score=*/0, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(5));
+}
+
+// See scoring-processor_test.cc and icing-search-engine_test.cc for better
+// Bm25F scoring tests.
+TEST_P(ScorerTest, QueryIteratorNullRelevanceScoreShouldReturnDefaultScore) {
+  // Creates a test document with document score 5
+  DocumentProto test_document =
+      DocumentBuilder()
+          .SetScore(5)
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampMs(fake_clock2().GetSystemTimeMilliseconds())
+          .Build();
 
-  EXPECT_THAT(scorer->GetScore(document_id), Eq(5));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam()),
+          /*default_score=*/10, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
 }
 
-TEST_F(ScorerTest, ShouldGetCorrectCreationTimestampScore) {
+TEST_P(ScorerTest, ShouldGetCorrectCreationTimestampScore) {
   // Creates test_document1 with fake timestamp1
   DocumentProto test_document1 =
       DocumentBuilder()
@@ -178,34 +264,457 @@ TEST_F(ScorerTest, ShouldGetCorrectCreationTimestampScore) {
                              document_store()->Put(test_document2));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP,
-                     /*default_score=*/0, document_store()));
-
-  EXPECT_THAT(scorer->GetScore(document_id1),
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP,
+              GetParam()),
+          /*default_score=*/0, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+
+  DocHitInfo docHitInfo1 = DocHitInfo(document_id1);
+  DocHitInfo docHitInfo2 = DocHitInfo(document_id2);
+  EXPECT_THAT(scorer->GetScore(docHitInfo1),
               Eq(fake_clock1().GetSystemTimeMilliseconds()));
-  EXPECT_THAT(scorer->GetScore(document_id2),
+  EXPECT_THAT(scorer->GetScore(docHitInfo2),
               Eq(fake_clock2().GetSystemTimeMilliseconds()));
 }
 
-TEST_F(ScorerTest, NoScorerShouldAlwaysReturnDefaultScore) {
+TEST_P(ScorerTest, ShouldGetCorrectUsageCountScoreForType1) {
+  DocumentProto test_document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+
+  // Create 3 scorers for 3 different usage types.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer1,
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT, GetParam()),
+          /*default_score=*/0, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer2,
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT, GetParam()),
+          /*default_score=*/0, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer3,
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT, GetParam()),
+          /*default_score=*/0, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+
+  // Report a type1 usage.
+  UsageReport usage_report_type1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1));
+
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(1));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+}
+
+TEST_P(ScorerTest, ShouldGetCorrectUsageCountScoreForType2) {
+  DocumentProto test_document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+
+  // Create 3 scorers for 3 different usage types.
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Scorer> scorer,
-      Scorer::Create(ScoringSpecProto::RankingStrategy::NONE,
-                     /*default_score=*/3, document_store()));
+      std::unique_ptr<Scorer> scorer1,
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT, GetParam()),
+          /*default_score=*/0, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer2,
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT, GetParam()),
+          /*default_score=*/0, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer3,
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT, GetParam()),
+          /*default_score=*/0, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+
+  // Report a type2 usage.
+  UsageReport usage_report_type2 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE2);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2));
+
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(1));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+}
+
+TEST_P(ScorerTest, ShouldGetCorrectUsageCountScoreForType3) {
+  DocumentProto test_document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+
+  // Create 3 scorers for 3 different usage types.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer1,
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT, GetParam()),
+          /*default_score=*/0, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer2,
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT, GetParam()),
+          /*default_score=*/0, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer3,
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT, GetParam()),
+          /*default_score=*/0, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+
+  // Report a type1 usage.
+  UsageReport usage_report_type3 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE3);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3));
+
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(1));
+}
+
+TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType1) {
+  DocumentProto test_document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+
+  // Create 3 scorers for 3 different usage types.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer1,
+      scorer_factory::Create(CreateScoringSpecForRankingStrategy(
+                                 ScoringSpecProto::RankingStrategy::
+                                     USAGE_TYPE1_LAST_USED_TIMESTAMP,
+                                 GetParam()),
+                             /*default_score=*/0, document_store(),
+                             schema_store(),
+                             fake_clock1().GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer2,
+      scorer_factory::Create(CreateScoringSpecForRankingStrategy(
+                                 ScoringSpecProto::RankingStrategy::
+                                     USAGE_TYPE2_LAST_USED_TIMESTAMP,
+                                 GetParam()),
+                             /*default_score=*/0, document_store(),
+                             schema_store(),
+                             fake_clock1().GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer3,
+      scorer_factory::Create(CreateScoringSpecForRankingStrategy(
+                                 ScoringSpecProto::RankingStrategy::
+                                     USAGE_TYPE3_LAST_USED_TIMESTAMP,
+                                 GetParam()),
+                             /*default_score=*/0, document_store(),
+                             schema_store(),
+                             fake_clock1().GetSystemTimeMilliseconds()));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+
+  UsageReport usage_report_type1_time1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time1));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(1000));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+
+  // Report usage with timestamp = 5000ms, score should be updated.
+  UsageReport usage_report_type1_time5 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time5));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5000));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+
+  // Report usage with timestamp = 3000ms, score should not be updated.
+  UsageReport usage_report_type1_time3 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time3));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5000));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+}
+
+TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType2) {
+  DocumentProto test_document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+
+  // Create 3 scorers for 3 different usage types.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer1,
+      scorer_factory::Create(CreateScoringSpecForRankingStrategy(
+                                 ScoringSpecProto::RankingStrategy::
+                                     USAGE_TYPE1_LAST_USED_TIMESTAMP,
+                                 GetParam()),
+                             /*default_score=*/0, document_store(),
+                             schema_store(),
+                             fake_clock1().GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer2,
+      scorer_factory::Create(CreateScoringSpecForRankingStrategy(
+                                 ScoringSpecProto::RankingStrategy::
+                                     USAGE_TYPE2_LAST_USED_TIMESTAMP,
+                                 GetParam()),
+                             /*default_score=*/0, document_store(),
+                             schema_store(),
+                             fake_clock1().GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer3,
+      scorer_factory::Create(CreateScoringSpecForRankingStrategy(
+                                 ScoringSpecProto::RankingStrategy::
+                                     USAGE_TYPE3_LAST_USED_TIMESTAMP,
+                                 GetParam()),
+                             /*default_score=*/0, document_store(),
+                             schema_store(),
+                             fake_clock1().GetSystemTimeMilliseconds()));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+
+  UsageReport usage_report_type2_time1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE2);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time1));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(1000));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+
+  // Report usage with timestamp = 5000ms, score should be updated.
+  UsageReport usage_report_type2_time5 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+      UsageReport::USAGE_TYPE2);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time5));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5000));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+
+  // Report usage with timestamp = 3000ms, score should not be updated.
+  UsageReport usage_report_type2_time3 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
+      UsageReport::USAGE_TYPE2);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time3));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5000));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+}
+
+TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType3) {
+  DocumentProto test_document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+
+  // Create 3 scorers for 3 different usage types.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer1,
+      scorer_factory::Create(CreateScoringSpecForRankingStrategy(
+                                 ScoringSpecProto::RankingStrategy::
+                                     USAGE_TYPE1_LAST_USED_TIMESTAMP,
+                                 GetParam()),
+                             /*default_score=*/0, document_store(),
+                             schema_store(),
+                             fake_clock1().GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer2,
+      scorer_factory::Create(CreateScoringSpecForRankingStrategy(
+                                 ScoringSpecProto::RankingStrategy::
+                                     USAGE_TYPE2_LAST_USED_TIMESTAMP,
+                                 GetParam()),
+                             /*default_score=*/0, document_store(),
+                             schema_store(),
+                             fake_clock1().GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer3,
+      scorer_factory::Create(CreateScoringSpecForRankingStrategy(
+                                 ScoringSpecProto::RankingStrategy::
+                                     USAGE_TYPE3_LAST_USED_TIMESTAMP,
+                                 GetParam()),
+                             /*default_score=*/0, document_store(),
+                             schema_store(),
+                             fake_clock1().GetSystemTimeMilliseconds()));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
+
+  UsageReport usage_report_type3_time1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE3);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time1));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(1000));
+
+  // Report usage with timestamp = 5000ms, score should be updated.
+  UsageReport usage_report_type3_time5 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+      UsageReport::USAGE_TYPE3);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time5));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5000));
+
+  // Report usage with timestamp = 3000ms, score should not be updated.
+  UsageReport usage_report_type3_time3 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
+      UsageReport::USAGE_TYPE3);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time3));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5000));
+}
 
-  EXPECT_THAT(scorer->GetScore(/*document_id=*/0), Eq(3));
-  EXPECT_THAT(scorer->GetScore(/*document_id=*/1), Eq(3));
-  EXPECT_THAT(scorer->GetScore(/*document_id=*/2), Eq(3));
+TEST_P(ScorerTest, NoScorerShouldAlwaysReturnDefaultScore) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      scorer_factory::Create(
+          CreateScoringSpecForRankingStrategy(
+              ScoringSpecProto::RankingStrategy::NONE, GetParam()),
+          /*default_score=*/3, document_store(), schema_store(),
+          fake_clock1().GetSystemTimeMilliseconds()));
+
+  DocHitInfo docHitInfo1 = DocHitInfo(/*document_id_in=*/0);
+  DocHitInfo docHitInfo2 = DocHitInfo(/*document_id_in=*/1);
+  DocHitInfo docHitInfo3 = DocHitInfo(/*document_id_in=*/2);
+  EXPECT_THAT(scorer->GetScore(docHitInfo1), Eq(3));
+  EXPECT_THAT(scorer->GetScore(docHitInfo2), Eq(3));
+  EXPECT_THAT(scorer->GetScore(docHitInfo3), Eq(3));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      scorer, Scorer::Create(ScoringSpecProto::RankingStrategy::NONE,
-                             /*default_score=*/111, document_store()));
+      scorer, scorer_factory::Create(
+                  CreateScoringSpecForRankingStrategy(
+                      ScoringSpecProto::RankingStrategy::NONE, GetParam()),
+                  /*default_score=*/111, document_store(), schema_store(),
+                  fake_clock1().GetSystemTimeMilliseconds()));
+
+  docHitInfo1 = DocHitInfo(/*document_id_in=*/4);
+  docHitInfo2 = DocHitInfo(/*document_id_in=*/5);
+  docHitInfo3 = DocHitInfo(/*document_id_in=*/6);
+  EXPECT_THAT(scorer->GetScore(docHitInfo1), Eq(111));
+  EXPECT_THAT(scorer->GetScore(docHitInfo2), Eq(111));
+  EXPECT_THAT(scorer->GetScore(docHitInfo3), Eq(111));
+}
 
-  EXPECT_THAT(scorer->GetScore(/*document_id=*/4), Eq(111));
-  EXPECT_THAT(scorer->GetScore(/*document_id=*/5), Eq(111));
-  EXPECT_THAT(scorer->GetScore(/*document_id=*/6), Eq(111));
+TEST_P(ScorerTest, ShouldScaleUsageTimestampScoreForMaxTimestamp) {
+  DocumentProto test_document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer1,
+      scorer_factory::Create(CreateScoringSpecForRankingStrategy(
+                                 ScoringSpecProto::RankingStrategy::
+                                     USAGE_TYPE1_LAST_USED_TIMESTAMP,
+                                 GetParam()),
+                             /*default_score=*/0, document_store(),
+                             schema_store(),
+                             fake_clock1().GetSystemTimeMilliseconds()));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+  // Create usage report for the maximum allowable timestamp.
+  UsageReport usage_report_type1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1",
+      /*timestamp_ms=*/std::numeric_limits<uint32_t>::max() * 1000.0,
+      UsageReport::USAGE_TYPE1);
+
+  double max_int_usage_timestamp_score =
+      std::numeric_limits<uint32_t>::max() * 1000.0;
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(max_int_usage_timestamp_score));
 }
 
+INSTANTIATE_TEST_SUITE_P(ScorerTest, ScorerTest,
+                         testing::Values(ScorerTestingMode::kNormal,
+                                         ScorerTestingMode::kAdvanced));
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/scoring/scoring-processor.cc b/icing/scoring/scoring-processor.cc
index 0933094..b827bd8 100644
--- a/icing/scoring/scoring-processor.cc
+++ b/icing/scoring/scoring-processor.cc
@@ -14,7 +14,10 @@
 
 #include "icing/scoring/scoring-processor.h"
 
+#include <limits>
 #include <memory>
+#include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -22,8 +25,10 @@
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/index/hit/doc-hit-info.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/proto/scoring.pb.h"
 #include "icing/scoring/ranker.h"
 #include "icing/scoring/scored-document-hit.h"
+#include "icing/scoring/scorer-factory.h"
 #include "icing/scoring/scorer.h"
 #include "icing/store/document-store.h"
 #include "icing/util/status-macros.h"
@@ -39,28 +44,36 @@ constexpr double kDefaultScoreInAscendingOrder =
 
 libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
 ScoringProcessor::Create(const ScoringSpecProto& scoring_spec,
-                         const DocumentStore* document_store) {
+                         const DocumentStore* document_store,
+                         const SchemaStore* schema_store,
+                         int64_t current_time_ms,
+                         const JoinChildrenFetcher* join_children_fetcher) {
   ICING_RETURN_ERROR_IF_NULL(document_store);
+  ICING_RETURN_ERROR_IF_NULL(schema_store);
 
   bool is_descending_order =
       scoring_spec.order_by() == ScoringSpecProto::Order::DESC;
 
   ICING_ASSIGN_OR_RETURN(
       std::unique_ptr<Scorer> scorer,
-      Scorer::Create(scoring_spec.rank_by(),
-                     is_descending_order ? kDefaultScoreInDescendingOrder
-                                         : kDefaultScoreInAscendingOrder,
-                     document_store));
-
+      scorer_factory::Create(scoring_spec,
+                             is_descending_order
+                                 ? kDefaultScoreInDescendingOrder
+                                 : kDefaultScoreInAscendingOrder,
+                             document_store, schema_store, current_time_ms,
+                             join_children_fetcher));
   // Using `new` to access a non-public constructor.
   return std::unique_ptr<ScoringProcessor>(
       new ScoringProcessor(std::move(scorer)));
 }
 
 std::vector<ScoredDocumentHit> ScoringProcessor::Score(
-    std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator,
-    int num_to_score) {
+    std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator, int num_to_score,
+    std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
+        query_term_iterators,
+    QueryStatsProto::SearchStats* search_stats) {
   std::vector<ScoredDocumentHit> scored_document_hits;
+  scorer_->PrepareToScore(query_term_iterators);
 
   while (doc_hit_info_iterator->Advance().ok() && num_to_score-- > 0) {
     const DocHitInfo& doc_hit_info = doc_hit_info_iterator->doc_hit_info();
@@ -69,11 +82,24 @@ std::vector<ScoredDocumentHit> ScoringProcessor::Score(
     // The final score of the doc_hit_info = score of doc * demotion factor of
     // hit.
     double score =
-        scorer_->GetScore(doc_hit_info.document_id()) * hit_demotion_factor;
+        scorer_->GetScore(doc_hit_info, doc_hit_info_iterator.get()) *
+        hit_demotion_factor;
     scored_document_hits.emplace_back(
         doc_hit_info.document_id(), doc_hit_info.hit_section_ids_mask(), score);
   }
 
+  if (search_stats != nullptr) {
+    search_stats->set_num_documents_scored(scored_document_hits.size());
+    DocHitInfoIterator::CallStats iterator_call_stats =
+        doc_hit_info_iterator->GetCallStats();
+    search_stats->set_num_fetched_hits_lite_index(
+        iterator_call_stats.num_leaf_advance_calls_lite_index);
+    search_stats->set_num_fetched_hits_main_index(
+        iterator_call_stats.num_leaf_advance_calls_main_index);
+    search_stats->set_num_fetched_hits_integer_index(
+        iterator_call_stats.num_leaf_advance_calls_integer_index);
+  }
+
   return scored_document_hits;
 }
 
diff --git a/icing/scoring/scoring-processor.h b/icing/scoring/scoring-processor.h
index 60c3b32..8634a22 100644
--- a/icing/scoring/scoring-processor.h
+++ b/icing/scoring/scoring-processor.h
@@ -15,13 +15,19 @@
 #ifndef ICING_SCORING_SCORING_PROCESSOR_H_
 #define ICING_SCORING_SCORING_PROCESSOR_H_
 
+#include <cstdint>
 #include <memory>
+#include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/proto/logging.pb.h"
 #include "icing/proto/scoring.pb.h"
+#include "icing/schema/schema-store.h"
 #include "icing/scoring/scored-document-hit.h"
 #include "icing/scoring/scorer.h"
 #include "icing/store/document-store.h"
@@ -40,19 +46,24 @@ class ScoringProcessor {
   //   A ScoringProcessor on success
   //   FAILED_PRECONDITION on any null pointer input
   static libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>> Create(
-      const ScoringSpecProto& scoring_spec,
-      const DocumentStore* document_store);
+      const ScoringSpecProto& scoring_spec, const DocumentStore* document_store,
+      const SchemaStore* schema_store, int64_t current_time_ms,
+      const JoinChildrenFetcher* join_children_fetcher = nullptr);
 
   // Assigns scores to DocHitInfos from the given DocHitInfoIterator and returns
   // a vector of ScoredDocumentHits. The size of results is no more than
   // num_to_score. The order of results is the same as DocHitInfos from
   // DocHitInfoIterator.
   //
-  // NOTE: if the scoring spec doesn't require a scoring strategy, all
+  // If necessary, query_term_iterators is used to compute the BM25F relevance
+  // score. NOTE: if the scoring spec doesn't require a scoring strategy, all
   // ScoredDocumentHits will be assigned a default score 0.
   std::vector<ScoredDocumentHit> Score(
       std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator,
-      int num_to_score);
+      int num_to_score,
+      std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
+          query_term_iterators = nullptr,
+      QueryStatsProto::SearchStats* search_stats = nullptr);
 
  private:
   explicit ScoringProcessor(std::unique_ptr<Scorer> scorer)
diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
index b93bf1a..deddff8 100644
--- a/icing/scoring/scoring-processor_test.cc
+++ b/icing/scoring/scoring-processor_test.cc
@@ -24,6 +24,10 @@
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/scoring/scorer-test-utils.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
 #include "icing/testing/tmp-directory.h"
@@ -33,11 +37,13 @@ namespace lib {
 
 namespace {
 using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Gt;
 using ::testing::IsEmpty;
 using ::testing::SizeIs;
-using ::testing::Test;
 
-class ScoringProcessorTest : public Test {
+class ScoringProcessorTest
+    : public ::testing::TestWithParam<ScorerTestingMode> {
  protected:
   ScoringProcessorTest()
       : test_dir_(GetTestTempDir() + "/icing"),
@@ -50,24 +56,47 @@ class ScoringProcessorTest : public Test {
     filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str());
     filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
 
-    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
-                               SchemaStore::Create(&filesystem_, test_dir_));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
 
     ICING_ASSERT_OK_AND_ASSIGN(
-        document_store_,
-        DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
-                              schema_store_.get()));
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*use_persistent_hash_map=*/false,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    document_store_ = std::move(create_result.document_store);
 
     // Creates a simple email schema
-    SchemaProto test_email_schema;
-    auto type_config = test_email_schema.add_types();
-    type_config->set_schema_type("email");
-    auto subject = type_config->add_properties();
-    subject->set_property_name("subject");
-    subject->set_data_type(PropertyConfigProto::DataType::STRING);
-    subject->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-    ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
+    SchemaProto test_email_schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder()
+                         .SetType("email")
+                         .AddProperty(
+                             PropertyConfigBuilder()
+                                 .SetName("subject")
+                                 .SetDataTypeString(
+                                     TermMatchType::PREFIX,
+                                     StringIndexingConfig::TokenizerType::PLAIN)
+                                 .SetDataType(TYPE_STRING)
+                                 .SetCardinality(CARDINALITY_OPTIONAL))
+                         .AddProperty(
+                             PropertyConfigBuilder()
+                                 .SetName("body")
+                                 .SetDataTypeString(
+                                     TermMatchType::PREFIX,
+                                     StringIndexingConfig::TokenizerType::PLAIN)
+                                 .SetDataType(TYPE_STRING)
+                                 .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        test_email_schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
   }
 
   void TearDown() override {
@@ -78,6 +107,10 @@ class ScoringProcessorTest : public Test {
 
   DocumentStore* document_store() { return document_store_.get(); }
 
+  SchemaStore* schema_store() { return schema_store_.get(); }
+
+  const FakeClock& fake_clock() const { return fake_clock_; }
+
  private:
   const std::string test_dir_;
   const std::string doc_store_dir_;
@@ -120,38 +153,84 @@ CreateAndInsertsDocumentsWithScores(DocumentStore* document_store,
   return std::pair(doc_hit_infos, scored_document_hits);
 }
 
-TEST_F(ScoringProcessorTest, CreationWithNullPointerShouldFail) {
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+                              int64_t timestamp_ms,
+                              UsageReport::UsageType usage_type) {
+  UsageReport usage_report;
+  usage_report.set_document_namespace(name_space);
+  usage_report.set_document_uri(uri);
+  usage_report.set_usage_timestamp_ms(timestamp_ms);
+  usage_report.set_usage_type(usage_type);
+  return usage_report;
+}
+
+TypePropertyWeights CreateTypePropertyWeights(
+    std::string schema_type, std::vector<PropertyWeight> property_weights) {
+  TypePropertyWeights type_property_weights;
+  type_property_weights.set_schema_type(std::move(schema_type));
+  type_property_weights.mutable_property_weights()->Reserve(
+      property_weights.size());
+
+  for (PropertyWeight& property_weight : property_weights) {
+    *type_property_weights.add_property_weights() = std::move(property_weight);
+  }
+
+  return type_property_weights;
+}
+
+PropertyWeight CreatePropertyWeight(std::string path, double weight) {
+  PropertyWeight property_weight;
+  property_weight.set_path(std::move(path));
+  property_weight.set_weight(weight);
+  return property_weight;
+}
+
+TEST_F(ScoringProcessorTest, CreationWithNullDocumentStoreShouldFail) {
   ScoringSpecProto spec_proto;
-  EXPECT_THAT(ScoringProcessor::Create(spec_proto, /*document_store=*/nullptr),
+  EXPECT_THAT(ScoringProcessor::Create(
+                  spec_proto, /*document_store=*/nullptr, schema_store(),
+                  fake_clock().GetSystemTimeMilliseconds()),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
-TEST_F(ScoringProcessorTest, ShouldCreateInstance) {
+TEST_F(ScoringProcessorTest, CreationWithNullSchemaStoreShouldFail) {
   ScoringSpecProto spec_proto;
-  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-  ICING_EXPECT_OK(ScoringProcessor::Create(spec_proto, document_store()));
+  EXPECT_THAT(
+      ScoringProcessor::Create(spec_proto, document_store(),
+                               /*schema_store=*/nullptr,
+                               fake_clock().GetSystemTimeMilliseconds()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
-TEST_F(ScoringProcessorTest, ShouldHandleEmptyDocHitIterator) {
+TEST_P(ScoringProcessorTest, ShouldCreateInstance) {
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
+  ICING_EXPECT_OK(
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
+}
+
+TEST_P(ScoringProcessorTest, ShouldHandleEmptyDocHitIterator) {
   // Creates an empty DocHitInfoIterator
   std::vector<DocHitInfo> doc_hit_infos = {};
   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  ScoringSpecProto spec_proto;
-  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
 
   // Creates a ScoringProcessor
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store()));
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/5),
               IsEmpty());
 }
 
-TEST_F(ScoringProcessorTest, ShouldHandleNonPositiveNumToScore) {
+TEST_P(ScoringProcessorTest, ShouldHandleNonPositiveNumToScore) {
   // Sets up documents
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentId document_id1,
@@ -164,13 +243,14 @@ TEST_F(ScoringProcessorTest, ShouldHandleNonPositiveNumToScore) {
   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  ScoringSpecProto spec_proto;
-  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
 
   // Creates a ScoringProcessor
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store()));
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/-1),
@@ -183,7 +263,7 @@ TEST_F(ScoringProcessorTest, ShouldHandleNonPositiveNumToScore) {
               IsEmpty());
 }
 
-TEST_F(ScoringProcessorTest, ShouldRespectNumToScore) {
+TEST_P(ScoringProcessorTest, ShouldRespectNumToScore) {
   // Sets up documents
   ICING_ASSERT_OK_AND_ASSIGN(
       auto doc_hit_result_pair,
@@ -194,13 +274,14 @@ TEST_F(ScoringProcessorTest, ShouldRespectNumToScore) {
   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  ScoringSpecProto spec_proto;
-  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
 
   // Creates a ScoringProcessor
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store()));
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/2),
@@ -213,7 +294,7 @@ TEST_F(ScoringProcessorTest, ShouldRespectNumToScore) {
               SizeIs(3));
 }
 
-TEST_F(ScoringProcessorTest, ShouldScoreByDocumentScore) {
+TEST_P(ScoringProcessorTest, ShouldScoreByDocumentScore) {
   // Creates input doc_hit_infos and expected output scored_document_hits
   ICING_ASSERT_OK_AND_ASSIGN(
       auto doc_hit_result_pair,
@@ -226,13 +307,14 @@ TEST_F(ScoringProcessorTest, ShouldScoreByDocumentScore) {
   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  ScoringSpecProto spec_proto;
-  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
 
   // Creates a ScoringProcessor
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store()));
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/3),
@@ -241,7 +323,574 @@ TEST_F(ScoringProcessorTest, ShouldScoreByDocumentScore) {
                           EqualsScoredDocumentHit(scored_document_hits.at(2))));
 }
 
-TEST_F(ScoringProcessorTest, ShouldScoreByCreationTimestamp) {
+TEST_P(ScoringProcessorTest,
+       ShouldScoreByRelevanceScore_DocumentsWithDifferentLength) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document3 =
+      CreateDocument("icing", "email/3", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store()->Put(document1, /*num_tokens=*/10));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store()->Put(document2, /*num_tokens=*/100));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store()->Put(document3, /*num_tokens=*/50));
+
+  DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
+  doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+  DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
+  doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+  DocHitInfoTermFrequencyPair doc_hit_info3 = DocHitInfo(document_id3);
+  doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = UINT64_C(1) << section_id;
+
+  // Creates input doc_hit_infos and expected output scored_document_hits
+  std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {
+      doc_hit_info1, doc_hit_info2, doc_hit_info3};
+
+  // Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
+
+  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+      query_term_iterators;
+  query_term_iterators["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+  // Since the three documents all contain the query term "foo" exactly once,
+  // the document's length determines the final score. Document shorter than the
+  // average corpus length are slightly boosted.
+  ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask,
+                                             /*score=*/0.187114);
+  ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask,
+                                             /*score=*/0.084904);
+  ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask,
+                                             /*score=*/0.121896);
+  EXPECT_THAT(
+      scoring_processor->Score(std::move(doc_hit_info_iterator),
+                               /*num_to_score=*/3, &query_term_iterators),
+      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit2),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit3)));
+}
+
+TEST_P(ScoringProcessorTest,
+       ShouldScoreByRelevanceScore_DocumentsWithSameLength) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document3 =
+      CreateDocument("icing", "email/3", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store()->Put(document1, /*num_tokens=*/10));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store()->Put(document2, /*num_tokens=*/10));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store()->Put(document3, /*num_tokens=*/10));
+
+  DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
+  doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+  DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
+  doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+  DocHitInfoTermFrequencyPair doc_hit_info3 = DocHitInfo(document_id3);
+  doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = UINT64_C(1) << section_id;
+
+  // Creates input doc_hit_infos and expected output scored_document_hits
+  std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {
+      doc_hit_info1, doc_hit_info2, doc_hit_info3};
+
+  // Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
+
+  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+      query_term_iterators;
+  query_term_iterators["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+  // Since the three documents all contain the query term "foo" exactly once
+  // and they have the same length, they will have the same BM25F scoret.
+  ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask,
+                                             /*score=*/0.118455);
+  ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask,
+                                             /*score=*/0.118455);
+  ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask,
+                                             /*score=*/0.118455);
+  EXPECT_THAT(
+      scoring_processor->Score(std::move(doc_hit_info_iterator),
+                               /*num_to_score=*/3, &query_term_iterators),
+      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit2),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit3)));
+}
+
+TEST_P(ScoringProcessorTest,
+       ShouldScoreByRelevanceScore_DocumentsWithDifferentQueryFrequency) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document3 =
+      CreateDocument("icing", "email/3", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store()->Put(document1, /*num_tokens=*/10));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store()->Put(document2, /*num_tokens=*/10));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store()->Put(document3, /*num_tokens=*/10));
+
+  DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
+  // Document 1 contains the query term "foo" 5 times
+  doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/5);
+  DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
+  // Document 1 contains the query term "foo" 1 time
+  doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+  DocHitInfoTermFrequencyPair doc_hit_info3 = DocHitInfo(document_id3);
+  // Document 1 contains the query term "foo" 3 times
+  doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+  doc_hit_info3.UpdateSection(/*section_id*/ 1, /*hit_term_frequency=*/2);
+
+  SectionIdMask section_id_mask1 = 0b00000001;
+  SectionIdMask section_id_mask2 = 0b00000001;
+  SectionIdMask section_id_mask3 = 0b00000011;
+
+  // Creates input doc_hit_infos and expected output scored_document_hits
+  std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {
+      doc_hit_info1, doc_hit_info2, doc_hit_info3};
+
+  // Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
+
+  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+      query_term_iterators;
+  query_term_iterators["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+  // Since the three documents all have the same length, the score is decided by
+  // the frequency of the query term "foo".
+  ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask1,
+                                             /*score=*/0.226674);
+  ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask2,
+                                             /*score=*/0.118455);
+  ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask3,
+                                             /*score=*/0.196720);
+  EXPECT_THAT(
+      scoring_processor->Score(std::move(doc_hit_info_iterator),
+                               /*num_to_score=*/3, &query_term_iterators),
+      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit2),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit3)));
+}
+
+TEST_P(ScoringProcessorTest,
+       ShouldScoreByRelevanceScore_HitTermWithZeroFrequency) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store()->Put(document1, /*num_tokens=*/10));
+
+  // Document 1 contains the term "foo" 0 times in the "subject" property
+  DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
+  doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/0);
+
+  // Creates input doc_hit_infos and expected output scored_document_hits
+  std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1};
+
+  // Creates a dummy DocHitInfoIterator with 1 result for the query "foo"
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
+
+  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+      query_term_iterators;
+  query_term_iterators["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  SectionIdMask section_id_mask1 = 0b00000001;
+
+  // Since the document hit has zero frequency, expect a score of zero.
+  ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask1,
+                                             /*score=*/0.000000);
+  EXPECT_THAT(
+      scoring_processor->Score(std::move(doc_hit_info_iterator),
+                               /*num_to_score=*/1, &query_term_iterators),
+      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1)));
+}
+
+TEST_P(ScoringProcessorTest,
+       ShouldScoreByRelevanceScore_SameHitFrequencyDifferentPropertyWeights) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store()->Put(document1, /*num_tokens=*/1));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store()->Put(document2, /*num_tokens=*/1));
+
+  // Document 1 contains the term "foo" 1 time in the "body" property
+  SectionId body_section_id = 0;
+  DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
+  doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
+
+  // Document 2 contains the term "foo" 1 time in the "subject" property
+  SectionId subject_section_id = 1;
+  DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
+  doc_hit_info2.UpdateSection(subject_section_id, /*hit_term_frequency=*/1);
+
+  // Creates input doc_hit_infos and expected output scored_document_hits
+  std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1,
+                                                            doc_hit_info2};
+
+  // Creates a dummy DocHitInfoIterator with 2 results for the query "foo"
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
+
+  PropertyWeight body_property_weight =
+      CreatePropertyWeight(/*path=*/"body", /*weight=*/0.5);
+  PropertyWeight subject_property_weight =
+      CreatePropertyWeight(/*path=*/"subject", /*weight=*/2.0);
+  *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
+      /*schema_type=*/"email", {body_property_weight, subject_property_weight});
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
+
+  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+      query_term_iterators;
+  query_term_iterators["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  SectionIdMask body_section_id_mask = 1U << body_section_id;
+  SectionIdMask subject_section_id_mask = 1U << subject_section_id;
+
+  // We expect document 2 to have a higher score than document 1 as it matches
+  // "foo" in the "subject" property, which is weighed higher than the "body"
+  // property. Final scores are computed with smoothing applied.
+  ScoredDocumentHit expected_scored_doc_hit1(document_id1, body_section_id_mask,
+                                             /*score=*/0.053624);
+  ScoredDocumentHit expected_scored_doc_hit2(document_id2,
+                                             subject_section_id_mask,
+                                             /*score=*/0.153094);
+  EXPECT_THAT(
+      scoring_processor->Score(std::move(doc_hit_info_iterator),
+                               /*num_to_score=*/2, &query_term_iterators),
+      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit2)));
+}
+
+TEST_P(ScoringProcessorTest,
+       ShouldScoreByRelevanceScore_WithImplicitPropertyWeight) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store()->Put(document1, /*num_tokens=*/1));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store()->Put(document2, /*num_tokens=*/1));
+
+  // Document 1 contains the term "foo" 1 time in the "body" property
+  SectionId body_section_id = 0;
+  DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
+  doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
+
+  // Document 2 contains the term "foo" 1 time in the "subject" property
+  SectionId subject_section_id = 1;
+  DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
+  doc_hit_info2.UpdateSection(subject_section_id, /*hit_term_frequency=*/1);
+
+  // Creates input doc_hit_infos and expected output scored_document_hits
+  std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1,
+                                                            doc_hit_info2};
+
+  // Creates a dummy DocHitInfoIterator with 2 results for the query "foo"
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
+
+  PropertyWeight body_property_weight =
+      CreatePropertyWeight(/*path=*/"body", /*weight=*/0.5);
+  *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
+      /*schema_type=*/"email", {body_property_weight});
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
+
+  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+      query_term_iterators;
+  query_term_iterators["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  SectionIdMask body_section_id_mask = 1U << body_section_id;
+  SectionIdMask subject_section_id_mask = 1U << subject_section_id;
+
+  // We expect document 2 to have a higher score than document 1 as it matches
+  // "foo" in the "subject" property, which is weighed higher than the "body"
+  // property. This is because the "subject" property is implictly given a
+  // a weight of 1.0, the default weight value. Final scores are computed with
+  // smoothing applied.
+  ScoredDocumentHit expected_scored_doc_hit1(document_id1, body_section_id_mask,
+                                             /*score=*/0.094601);
+  ScoredDocumentHit expected_scored_doc_hit2(document_id2,
+                                             subject_section_id_mask,
+                                             /*score=*/0.153094);
+  EXPECT_THAT(
+      scoring_processor->Score(std::move(doc_hit_info_iterator),
+                               /*num_to_score=*/2, &query_term_iterators),
+      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit2)));
+}
+
+TEST_P(ScoringProcessorTest,
+       ShouldScoreByRelevanceScore_WithDefaultPropertyWeight) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store()->Put(document1, /*num_tokens=*/1));
+
+  // Document 1 contains the term "foo" 1 time in the "body" property
+  SectionId body_section_id = 0;
+  DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
+  doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
+
+  // Creates input doc_hit_infos and expected output scored_document_hits
+  std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1};
+
+  // Creates a dummy DocHitInfoIterator with 1 result for the query "foo"
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
+
+  *spec_proto.add_type_property_weights() =
+      CreateTypePropertyWeights(/*schema_type=*/"email", {});
+
+  // Creates a ScoringProcessor with no explicit weights set.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
+
+  ScoringSpecProto spec_proto_with_weights =
+      CreateScoringSpecForRankingStrategy(
+          ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
+
+  PropertyWeight body_property_weight = CreatePropertyWeight(/*path=*/"body",
+                                                             /*weight=*/1.0);
+  *spec_proto_with_weights.add_type_property_weights() =
+      CreateTypePropertyWeights(/*schema_type=*/"email",
+                                {body_property_weight});
+
+  // Creates a ScoringProcessor with default weight set for "body" property.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor_with_weights,
+      ScoringProcessor::Create(spec_proto_with_weights, document_store(),
+                               schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
+
+  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+      query_term_iterators;
+  query_term_iterators["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  // Create a doc hit iterator
+  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+      query_term_iterators_scoring_with_weights;
+  query_term_iterators_scoring_with_weights["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  SectionIdMask body_section_id_mask = 1U << body_section_id;
+
+  // We expect document 1 to have the same score whether a weight is explicitly
+  // set to 1.0 or implictly scored with the default weight. Final scores are
+  // computed with smoothing applied.
+  ScoredDocumentHit expected_scored_doc_hit(document_id1, body_section_id_mask,
+                                            /*score=*/0.208191);
+  EXPECT_THAT(
+      scoring_processor->Score(std::move(doc_hit_info_iterator),
+                               /*num_to_score=*/1, &query_term_iterators),
+      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit)));
+
+  // Restore ownership of doc hit iterator and query term iterator to test.
+  doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+  query_term_iterators["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  EXPECT_THAT(scoring_processor_with_weights->Score(
+                  std::move(doc_hit_info_iterator),
+                  /*num_to_score=*/1, &query_term_iterators),
+              ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit)));
+}
+
+TEST_P(ScoringProcessorTest,
+       ShouldScoreByRelevanceScore_WithZeroPropertyWeight) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store()->Put(document1, /*num_tokens=*/1));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store()->Put(document2, /*num_tokens=*/1));
+
+  // Document 1 contains the term "foo" 1 time in the "body" property
+  SectionId body_section_id = 0;
+  DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id1);
+  doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
+
+  // Document 2 contains the term "foo" 1 time in the "subject" property
+  SectionId subject_section_id = 1;
+  DocHitInfoTermFrequencyPair doc_hit_info2 = DocHitInfo(document_id2);
+  doc_hit_info2.UpdateSection(subject_section_id, /*hit_term_frequency=*/1);
+
+  // Creates input doc_hit_infos and expected output scored_document_hits
+  std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1,
+                                                            doc_hit_info2};
+
+  // Creates a dummy DocHitInfoIterator with 2 results for the query "foo"
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam());
+
+  // Sets property weight for "body" to 0.0.
+  PropertyWeight body_property_weight =
+      CreatePropertyWeight(/*path=*/"body", /*weight=*/0.0);
+  // Sets property weight for "subject" to 1.0.
+  PropertyWeight subject_property_weight =
+      CreatePropertyWeight(/*path=*/"subject", /*weight=*/1.0);
+  *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
+      /*schema_type=*/"email", {body_property_weight, subject_property_weight});
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
+
+  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+      query_term_iterators;
+  query_term_iterators["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  std::vector<ScoredDocumentHit> scored_document_hits =
+      scoring_processor->Score(std::move(doc_hit_info_iterator),
+                               /*num_to_score=*/2, &query_term_iterators);
+
+  // We expect document1 to have a score of 0.0 as the query term "foo" matches
+  // in the "body" property which has a weight of 0.0. This is a result of the
+  // weighted term frequency being scaled down to 0.0 for the hit. We expect
+  // document2 to have a positive score as the query term "foo" matches in the
+  // "subject" property which has a weight of 1.0.
+  EXPECT_THAT(scored_document_hits, SizeIs(2));
+  EXPECT_THAT(scored_document_hits.at(0).document_id(), Eq(document_id1));
+  EXPECT_THAT(scored_document_hits.at(0).score(), Eq(0.0));
+  EXPECT_THAT(scored_document_hits.at(1).document_id(), Eq(document_id2));
+  EXPECT_THAT(scored_document_hits.at(1).score(), Gt(0.0));
+}
+
+TEST_P(ScoringProcessorTest, ShouldScoreByCreationTimestamp) {
   DocumentProto document1 =
       CreateDocument("icing", "email/1", kDefaultScore,
                      /*creation_timestamp_ms=*/1571100001111);
@@ -274,13 +923,14 @@ TEST_F(ScoringProcessorTest, ShouldScoreByCreationTimestamp) {
   std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
-  ScoringSpecProto spec_proto;
-  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP, GetParam());
 
   // Creates a ScoringProcessor which ranks in descending order
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store()));
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/3),
@@ -289,7 +939,129 @@ TEST_F(ScoringProcessorTest, ShouldScoreByCreationTimestamp) {
                           EqualsScoredDocumentHit(scored_document_hit1)));
 }
 
-TEST_F(ScoringProcessorTest, ShouldHandleNoScores) {
+TEST_P(ScoringProcessorTest, ShouldScoreByUsageCount) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document3 =
+      CreateDocument("icing", "email/3", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store()->Put(document1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store()->Put(document2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store()->Put(document3));
+
+  // Report usage for doc1 once and doc2 twice.
+  UsageReport usage_report_doc1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE1);
+  UsageReport usage_report_doc2 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc1));
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
+
+  DocHitInfo doc_hit_info1(document_id1);
+  DocHitInfo doc_hit_info2(document_id2);
+  DocHitInfo doc_hit_info3(document_id3);
+  ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
+                                         /*score=*/1);
+  ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
+                                         /*score=*/2);
+  ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
+                                         /*score=*/0);
+
+  // Creates a dummy DocHitInfoIterator with 3 results
+  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
+                                           doc_hit_info3};
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT, GetParam());
+
+  // Creates a ScoringProcessor which ranks in descending order
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
+
+  EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
+                                       /*num_to_score=*/3),
+              ElementsAre(EqualsScoredDocumentHit(scored_document_hit1),
+                          EqualsScoredDocumentHit(scored_document_hit2),
+                          EqualsScoredDocumentHit(scored_document_hit3)));
+}
+
+TEST_P(ScoringProcessorTest, ShouldScoreByUsageTimestamp) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document3 =
+      CreateDocument("icing", "email/3", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store()->Put(document1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store()->Put(document2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store()->Put(document3));
+
+  // Report usage for doc1 and doc2.
+  UsageReport usage_report_doc1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE1);
+  UsageReport usage_report_doc2 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/5000,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc1));
+  ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
+
+  DocHitInfo doc_hit_info1(document_id1);
+  DocHitInfo doc_hit_info2(document_id2);
+  DocHitInfo doc_hit_info3(document_id3);
+  ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
+                                         /*score=*/1000);
+  ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
+                                         /*score=*/5000);
+  ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
+                                         /*score=*/0);
+
+  // Creates a dummy DocHitInfoIterator with 3 results
+  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
+                                           doc_hit_info3};
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP,
+      GetParam());
+
+  // Creates a ScoringProcessor which ranks in descending order
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
+
+  EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
+                                       /*num_to_score=*/3),
+              ElementsAre(EqualsScoredDocumentHit(scored_document_hit1),
+                          EqualsScoredDocumentHit(scored_document_hit2),
+                          EqualsScoredDocumentHit(scored_document_hit3)));
+}
+
+TEST_P(ScoringProcessorTest, ShouldHandleNoScores) {
   // Creates input doc_hit_infos and corresponding scored_document_hits
   ICING_ASSERT_OK_AND_ASSIGN(
       auto doc_hit_result_pair,
@@ -310,13 +1082,14 @@ TEST_F(ScoringProcessorTest, ShouldHandleNoScores) {
   ScoredDocumentHit scored_document_hit_default =
       ScoredDocumentHit(4, kSectionIdMaskNone, /*score=*/0.0);
 
-  ScoringSpecProto spec_proto;
-  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam());
 
   // Creates a ScoringProcessor which ranks in descending order
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store()));
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/4),
               ElementsAre(EqualsScoredDocumentHit(scored_document_hit_default),
@@ -325,7 +1098,7 @@ TEST_F(ScoringProcessorTest, ShouldHandleNoScores) {
                           EqualsScoredDocumentHit(scored_document_hits.at(2))));
 }
 
-TEST_F(ScoringProcessorTest, ShouldWrapResultsWhenNoScoring) {
+TEST_P(ScoringProcessorTest, ShouldWrapResultsWhenNoScoring) {
   DocumentProto document1 = CreateDocument("icing", "email/1", /*score=*/1,
                                            kDefaultCreationTimestampMs);
   DocumentProto document2 = CreateDocument("icing", "email/2", /*score=*/2,
@@ -359,13 +1132,14 @@ TEST_F(ScoringProcessorTest, ShouldWrapResultsWhenNoScoring) {
       std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
 
   // A ScoringSpecProto with no scoring strategy
-  ScoringSpecProto spec_proto;
-  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
+  ScoringSpecProto spec_proto = CreateScoringSpecForRankingStrategy(
+      ScoringSpecProto::RankingStrategy::NONE, GetParam());
 
   // Creates a ScoringProcessor which ranks in descending order
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<ScoringProcessor> scoring_processor,
-      ScoringProcessor::Create(spec_proto, document_store()));
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store(),
+                               fake_clock().GetSystemTimeMilliseconds()));
 
   EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
                                        /*num_to_score=*/3),
@@ -374,6 +1148,10 @@ TEST_F(ScoringProcessorTest, ShouldWrapResultsWhenNoScoring) {
                           EqualsScoredDocumentHit(scored_document_hit1)));
 }
 
+INSTANTIATE_TEST_SUITE_P(ScoringProcessorTest, ScoringProcessorTest,
+                         testing::Values(ScorerTestingMode::kNormal,
+                                         ScorerTestingMode::kAdvanced));
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/scoring/section-weights.cc b/icing/scoring/section-weights.cc
new file mode 100644
index 0000000..ed7cd5e
--- /dev/null
+++ b/icing/scoring/section-weights.cc
@@ -0,0 +1,151 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/section-weights.h"
+
+#include <cfloat>
+#include <unordered_map>
+#include <utility>
+
+#include "icing/proto/scoring.pb.h"
+#include "icing/schema/section.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Normalizes all weights in the map to be in range [0.0, 1.0], where the max
+// weight is normalized to 1.0. In the case that all weights are equal to 0.0,
+// the normalized weight for each will be 0.0.
+inline void NormalizeSectionWeights(
+    double max_weight, std::unordered_map<SectionId, double>& section_weights) {
+  if (max_weight == 0.0) {
+    return;
+  }
+  for (auto& raw_weight : section_weights) {
+    raw_weight.second = raw_weight.second / max_weight;
+  }
+}
+}  // namespace
+
+libtextclassifier3::StatusOr<std::unique_ptr<SectionWeights>>
+SectionWeights::Create(const SchemaStore* schema_store,
+                       const ScoringSpecProto& scoring_spec) {
+  ICING_RETURN_ERROR_IF_NULL(schema_store);
+
+  std::unordered_map<SchemaTypeId, NormalizedSectionWeights>
+      schema_property_weight_map;
+  for (const TypePropertyWeights& type_property_weights :
+       scoring_spec.type_property_weights()) {
+    std::string_view schema_type = type_property_weights.schema_type();
+    auto schema_type_id_or = schema_store->GetSchemaTypeId(schema_type);
+    if (!schema_type_id_or.ok()) {
+      ICING_LOG(WARNING) << "No schema type id found for schema type: "
+                         << schema_type;
+      continue;
+    }
+    SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
+    auto section_metadata_list_or =
+        schema_store->GetSectionMetadata(schema_type.data());
+    if (!section_metadata_list_or.ok()) {
+      ICING_LOG(WARNING) << "No metadata found for schema type: "
+                         << schema_type;
+      continue;
+    }
+
+    const std::vector<SectionMetadata>* metadata_list =
+        section_metadata_list_or.ValueOrDie();
+
+    std::unordered_map<std::string, double> property_paths_weights;
+    for (const PropertyWeight& property_weight :
+         type_property_weights.property_weights()) {
+      double property_path_weight = property_weight.weight();
+
+      // Return error on negative weights.
+      if (property_path_weight < 0.0) {
+        return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+            "Property weight for property path \"%s\" is negative. Negative "
+            "weights are invalid.",
+            property_weight.path().c_str()));
+      }
+      property_paths_weights.insert(
+          {property_weight.path(), property_path_weight});
+    }
+    NormalizedSectionWeights normalized_section_weights =
+        ExtractNormalizedSectionWeights(property_paths_weights, *metadata_list);
+
+    schema_property_weight_map.insert(
+        {schema_type_id,
+         {/*section_weights*/ std::move(
+              normalized_section_weights.section_weights),
+          /*default_weight*/ normalized_section_weights.default_weight}});
+  }
+  // Using `new` to access a non-public constructor.
+  return std::unique_ptr<SectionWeights>(
+      new SectionWeights(std::move(schema_property_weight_map)));
+}
+
+double SectionWeights::GetNormalizedSectionWeight(SchemaTypeId schema_type_id,
+                                                  SectionId section_id) const {
+  auto schema_type_map = schema_section_weight_map_.find(schema_type_id);
+  if (schema_type_map == schema_section_weight_map_.end()) {
+    // Return default weight if the schema type has no weights specified.
+    return kDefaultSectionWeight;
+  }
+
+  auto section_weight =
+      schema_type_map->second.section_weights.find(section_id);
+  if (section_weight == schema_type_map->second.section_weights.end()) {
+    // If there is no entry for SectionId, the weight is implicitly the
+    // normalized default weight.
+    return schema_type_map->second.default_weight;
+  }
+  return section_weight->second;
+}
+
+inline SectionWeights::NormalizedSectionWeights
+SectionWeights::ExtractNormalizedSectionWeights(
+    const std::unordered_map<std::string, double>& raw_weights,
+    const std::vector<SectionMetadata>& metadata_list) {
+  double max_weight = -std::numeric_limits<double>::infinity();
+  std::unordered_map<SectionId, double> section_weights;
+  for (const SectionMetadata& section_metadata : metadata_list) {
+    std::string_view metadata_path = section_metadata.path;
+    double section_weight = kDefaultSectionWeight;
+    auto iter = raw_weights.find(metadata_path.data());
+    if (iter != raw_weights.end()) {
+      section_weight = iter->second;
+      section_weights.insert({section_metadata.id, section_weight});
+    }
+    // Replace max if we see new max weight.
+    max_weight = std::max(max_weight, section_weight);
+  }
+
+  NormalizeSectionWeights(max_weight, section_weights);
+  // Set normalized default weight to 1.0 in case there is no section
+  // metadata and max_weight is -INF (we should not see this case).
+  double normalized_default_weight =
+      max_weight == -std::numeric_limits<double>::infinity()
+          ? kDefaultSectionWeight
+          : kDefaultSectionWeight / max_weight;
+  SectionWeights::NormalizedSectionWeights normalized_section_weights =
+      SectionWeights::NormalizedSectionWeights();
+  normalized_section_weights.section_weights = std::move(section_weights);
+  normalized_section_weights.default_weight = normalized_default_weight;
+  return normalized_section_weights;
+}
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/scoring/section-weights.h b/icing/scoring/section-weights.h
new file mode 100644
index 0000000..ab69af2
--- /dev/null
+++ b/icing/scoring/section-weights.h
@@ -0,0 +1,96 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_SECTION_WEIGHTS_H_
+#define ICING_SCORING_SECTION_WEIGHTS_H_
+
+#include <unordered_map>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+inline constexpr double kDefaultSectionWeight = 1.0;
+
+// Provides functions for setting and retrieving section weights for schema
+// type properties. Section weights are used to promote and demote term matches
+// in sections when scoring results. Section weights are provided by property
+// path, and can range from (0, DBL_MAX]. The SectionId is matched to the
+// property path by going over the schema type's section metadata. Weights that
+// correspond to a valid property path are then normalized against the maxmium
+// section weight, and put into map for quick access for scorers. By default,
+// a section is given a raw, pre-normalized weight of 1.0.
+class SectionWeights {
+ public:
+  // SectionWeights instances should not be copied.
+  SectionWeights(const SectionWeights&) = delete;
+  SectionWeights& operator=(const SectionWeights&) = delete;
+
+  // Factory function to create a SectionWeights instance. Raw weights are
+  // provided through the ScoringSpecProto. Provided property paths for weights
+  // are validated against the schema type's section metadata. If the property
+  // path doesn't exist, the property weight is ignored. If a weight is 0 or
+  // negative, an invalid argument error is returned. Raw weights are then
+  // normalized against the maximum weight for that schema type.
+  //
+  // Returns:
+  //   A SectionWeights instance on success
+  //   FAILED_PRECONDITION on any null pointer input
+  //   INVALID_ARGUMENT if a provided weight for a property path is less than or
+  // equal to 0.
+  static libtextclassifier3::StatusOr<std::unique_ptr<SectionWeights>> Create(
+      const SchemaStore* schema_store, const ScoringSpecProto& scoring_spec);
+
+  // Returns the normalized section weight by SchemaTypeId and SectionId. If
+  // the SchemaTypeId, or the SectionId for a SchemaTypeId, is not found in the
+  // normalized weights map, the default weight is returned instead.
+  double GetNormalizedSectionWeight(SchemaTypeId schema_type_id,
+                                    SectionId section_id) const;
+
+ private:
+  // Holds the normalized section weights for a schema type, as well as the
+  // normalized default weight for sections that have no weight set.
+  struct NormalizedSectionWeights {
+    std::unordered_map<SectionId, double> section_weights;
+    double default_weight;
+  };
+
+  explicit SectionWeights(
+      const std::unordered_map<SchemaTypeId, NormalizedSectionWeights>
+          schema_section_weight_map)
+      : schema_section_weight_map_(std::move(schema_section_weight_map)) {}
+
+  // Creates a map of section ids to normalized weights from the raw property
+  // path weight map and section metadata and calculates the normalized default
+  // section weight.
+  static inline SectionWeights::NormalizedSectionWeights
+  ExtractNormalizedSectionWeights(
+      const std::unordered_map<std::string, double>& raw_weights,
+      const std::vector<SectionMetadata>& metadata_list);
+
+  // A map of (SchemaTypeId -> SectionId -> Normalized Weight), allows for fast
+  // look up of normalized weights. This is precomputed when creating a
+  // SectionWeights instance.
+  std::unordered_map<SchemaTypeId, NormalizedSectionWeights>
+      schema_section_weight_map_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_SECTION_WEIGHTS_H_
diff --git a/icing/scoring/section-weights_test.cc b/icing/scoring/section-weights_test.cc
new file mode 100644
index 0000000..28b1797
--- /dev/null
+++ b/icing/scoring/section-weights_test.cc
@@ -0,0 +1,447 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/section-weights.h"
+
+#include <cfloat>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+using ::testing::Eq;
+
+class SectionWeightsTest : public testing::Test {
+ protected:
+  SectionWeightsTest()
+      : test_dir_(GetTestTempDir() + "/icing"),
+        schema_store_dir_(test_dir_ + "/schema_store") {}
+
+  void SetUp() override {
+    // Creates file directories
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+    SchemaTypeConfigProto sender_schema =
+        SchemaTypeConfigBuilder()
+            .SetType("sender")
+            .AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("name")
+                    .SetDataTypeString(
+                        TermMatchType::PREFIX,
+                        StringIndexingConfig::TokenizerType::PLAIN)
+                    .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL))
+            .Build();
+    SchemaTypeConfigProto email_schema =
+        SchemaTypeConfigBuilder()
+            .SetType("email")
+            .AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("subject")
+                    .SetDataTypeString(
+                        TermMatchType::PREFIX,
+                        StringIndexingConfig::TokenizerType::PLAIN)
+                    .SetDataType(PropertyConfigProto::DataType::STRING)
+                    .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL))
+            .AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("body")
+                    .SetDataTypeString(
+                        TermMatchType::PREFIX,
+                        StringIndexingConfig::TokenizerType::PLAIN)
+                    .SetDataType(PropertyConfigProto::DataType::STRING)
+                    .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL))
+            .AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("sender")
+                    .SetDataTypeDocument("sender",
+                                         /*index_nested_properties=*/true)
+                    .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL))
+            .Build();
+    SchemaProto schema =
+        SchemaBuilder().AddType(sender_schema).AddType(email_schema).Build();
+
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+  }
+
+  void TearDown() override {
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  SchemaStore *schema_store() { return schema_store_.get(); }
+
+ private:
+  const std::string test_dir_;
+  const std::string schema_store_dir_;
+  Filesystem filesystem_;
+  FakeClock fake_clock_;
+  std::unique_ptr<SchemaStore> schema_store_;
+};
+
+TEST_F(SectionWeightsTest, ShouldNormalizeSinglePropertyWeight) {
+  ScoringSpecProto spec_proto;
+
+  TypePropertyWeights *type_property_weights =
+      spec_proto.add_type_property_weights();
+  type_property_weights->set_schema_type("sender");
+
+  PropertyWeight *property_weight =
+      type_property_weights->add_property_weights();
+  property_weight->set_weight(5.0);
+  property_weight->set_path("name");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SectionWeights> section_weights,
+      SectionWeights::Create(schema_store(), spec_proto));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId sender_schema_type_id,
+                             schema_store()->GetSchemaTypeId("sender"));
+
+  // section_id 0 corresponds to property "name".
+  // We expect 1.0 as there is only one property in the "sender" schema type
+  // so it should take the max normalized weight of 1.0.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(sender_schema_type_id,
+                                                          /*section_id=*/0),
+              Eq(1.0));
+}
+
+TEST_F(SectionWeightsTest, ShouldAcceptMaxWeightValue) {
+  ScoringSpecProto spec_proto;
+
+  TypePropertyWeights *type_property_weights =
+      spec_proto.add_type_property_weights();
+  type_property_weights->set_schema_type("sender");
+
+  PropertyWeight *property_weight =
+      type_property_weights->add_property_weights();
+  property_weight->set_weight(DBL_MAX);
+  property_weight->set_path("name");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SectionWeights> section_weights,
+      SectionWeights::Create(schema_store(), spec_proto));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId sender_schema_type_id,
+                             schema_store()->GetSchemaTypeId("sender"));
+
+  // section_id 0 corresponds to property "name".
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(sender_schema_type_id,
+                                                          /*section_id=*/0),
+              Eq(1.0));
+}
+
+TEST_F(SectionWeightsTest, ShouldFailWithNegativeWeights) {
+  ScoringSpecProto spec_proto;
+
+  TypePropertyWeights *type_property_weights =
+      spec_proto.add_type_property_weights();
+  type_property_weights->set_schema_type("email");
+
+  PropertyWeight *body_propery_weight =
+      type_property_weights->add_property_weights();
+  body_propery_weight->set_weight(-100.0);
+  body_propery_weight->set_path("body");
+
+  EXPECT_THAT(SectionWeights::Create(schema_store(), spec_proto).status(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SectionWeightsTest, ShouldAcceptZeroWeight) {
+  ScoringSpecProto spec_proto;
+
+  TypePropertyWeights *type_property_weights =
+      spec_proto.add_type_property_weights();
+  type_property_weights->set_schema_type("email");
+
+  PropertyWeight *body_property_weight =
+      type_property_weights->add_property_weights();
+  body_property_weight->set_weight(2.0);
+  body_property_weight->set_path("body");
+
+  PropertyWeight *subject_property_weight =
+      type_property_weights->add_property_weights();
+  subject_property_weight->set_weight(0.0);
+  subject_property_weight->set_path("subject");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SectionWeights> section_weights,
+      SectionWeights::Create(schema_store(), spec_proto));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
+                             schema_store()->GetSchemaTypeId("email"));
+
+  // Normalized weight for "body" property.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/0),
+              Eq(1.0));
+  // Normalized weight for "subject" property.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/2),
+              Eq(0.0));
+}
+
+TEST_F(SectionWeightsTest, ShouldNormalizeToZeroWhenAllWeightsZero) {
+  ScoringSpecProto spec_proto;
+
+  TypePropertyWeights *type_property_weights =
+      spec_proto.add_type_property_weights();
+  type_property_weights->set_schema_type("email");
+
+  PropertyWeight *body_property_weight =
+      type_property_weights->add_property_weights();
+  body_property_weight->set_weight(0.0);
+  body_property_weight->set_path("body");
+
+  PropertyWeight *sender_property_weight =
+      type_property_weights->add_property_weights();
+  sender_property_weight->set_weight(0.0);
+  sender_property_weight->set_path("sender.name");
+
+  PropertyWeight *subject_property_weight =
+      type_property_weights->add_property_weights();
+  subject_property_weight->set_weight(0.0);
+  subject_property_weight->set_path("subject");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SectionWeights> section_weights,
+      SectionWeights::Create(schema_store(), spec_proto));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
+                             schema_store()->GetSchemaTypeId("email"));
+
+  // Normalized weight for "body" property.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/0),
+              Eq(0.0));
+  // Normalized weight for "sender.name" property (the nested property).
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/1),
+              Eq(0.0));
+  // Normalized weight for "subject" property.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/2),
+              Eq(0.0));
+}
+
+TEST_F(SectionWeightsTest, ShouldReturnDefaultIfTypePropertyWeightsNotSet) {
+  ScoringSpecProto spec_proto;
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SectionWeights> section_weights,
+      SectionWeights::Create(schema_store(), spec_proto));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
+                             schema_store()->GetSchemaTypeId("email"));
+
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/0),
+              Eq(kDefaultSectionWeight));
+}
+
+TEST_F(SectionWeightsTest, ShouldSetNestedPropertyWeights) {
+  ScoringSpecProto spec_proto;
+
+  TypePropertyWeights *type_property_weights =
+      spec_proto.add_type_property_weights();
+  type_property_weights->set_schema_type("email");
+
+  PropertyWeight *body_property_weight =
+      type_property_weights->add_property_weights();
+  body_property_weight->set_weight(1.0);
+  body_property_weight->set_path("body");
+
+  PropertyWeight *subject_property_weight =
+      type_property_weights->add_property_weights();
+  subject_property_weight->set_weight(100.0);
+  subject_property_weight->set_path("subject");
+
+  PropertyWeight *nested_property_weight =
+      type_property_weights->add_property_weights();
+  nested_property_weight->set_weight(50.0);
+  nested_property_weight->set_path("sender.name");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SectionWeights> section_weights,
+      SectionWeights::Create(schema_store(), spec_proto));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
+                             schema_store()->GetSchemaTypeId("email"));
+
+  // Normalized weight for "body" property.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/0),
+              Eq(0.01));
+  // Normalized weight for "sender.name" property (the nested property).
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/1),
+              Eq(0.5));
+  // Normalized weight for "subject" property.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/2),
+              Eq(1.0));
+}
+
+TEST_F(SectionWeightsTest, ShouldNormalizeIfAllWeightsBelowOne) {
+  ScoringSpecProto spec_proto;
+
+  TypePropertyWeights *type_property_weights =
+      spec_proto.add_type_property_weights();
+  type_property_weights->set_schema_type("email");
+
+  PropertyWeight *body_property_weight =
+      type_property_weights->add_property_weights();
+  body_property_weight->set_weight(0.1);
+  body_property_weight->set_path("body");
+
+  PropertyWeight *sender_name_weight =
+      type_property_weights->add_property_weights();
+  sender_name_weight->set_weight(0.2);
+  sender_name_weight->set_path("sender.name");
+
+  PropertyWeight *subject_property_weight =
+      type_property_weights->add_property_weights();
+  subject_property_weight->set_weight(0.4);
+  subject_property_weight->set_path("subject");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SectionWeights> section_weights,
+      SectionWeights::Create(schema_store(), spec_proto));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
+                             schema_store()->GetSchemaTypeId("email"));
+
+  // Normalized weight for "body" property.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/0),
+              Eq(1.0 / 4.0));
+  // Normalized weight for "sender.name" property (the nested property).
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/1),
+              Eq(2.0 / 4.0));
+  // Normalized weight for "subject" property.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/2),
+              Eq(1.0));
+}
+
+TEST_F(SectionWeightsTest, ShouldSetNestedPropertyWeightSeparatelyForTypes) {
+  ScoringSpecProto spec_proto;
+
+  TypePropertyWeights *email_type_property_weights =
+      spec_proto.add_type_property_weights();
+  email_type_property_weights->set_schema_type("email");
+
+  PropertyWeight *body_property_weight =
+      email_type_property_weights->add_property_weights();
+  body_property_weight->set_weight(1.0);
+  body_property_weight->set_path("body");
+
+  PropertyWeight *subject_property_weight =
+      email_type_property_weights->add_property_weights();
+  subject_property_weight->set_weight(100.0);
+  subject_property_weight->set_path("subject");
+
+  PropertyWeight *sender_name_property_weight =
+      email_type_property_weights->add_property_weights();
+  sender_name_property_weight->set_weight(50.0);
+  sender_name_property_weight->set_path("sender.name");
+
+  TypePropertyWeights *sender_type_property_weights =
+      spec_proto.add_type_property_weights();
+  sender_type_property_weights->set_schema_type("sender");
+
+  PropertyWeight *sender_property_weight =
+      sender_type_property_weights->add_property_weights();
+  sender_property_weight->set_weight(25.0);
+  sender_property_weight->set_path("sender");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SectionWeights> section_weights,
+      SectionWeights::Create(schema_store(), spec_proto));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
+                             schema_store()->GetSchemaTypeId("email"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId sender_schema_type_id,
+                             schema_store()->GetSchemaTypeId("sender"));
+
+  // Normalized weight for "sender.name" property (the nested property)
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/1),
+              Eq(0.5));
+  // Normalized weight for "name" property for "sender" schema type. As it is
+  // the only property of the type, it should take the max normalized weight of
+  // 1.0.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(sender_schema_type_id,
+                                                          /*section_id=*/2),
+              Eq(1.0));
+}
+
+TEST_F(SectionWeightsTest, ShouldSkipNonExistentPathWhenSettingWeights) {
+  ScoringSpecProto spec_proto;
+
+  TypePropertyWeights *type_property_weights =
+      spec_proto.add_type_property_weights();
+  type_property_weights->set_schema_type("email");
+
+  // If this property weight isn't skipped, then the max property weight would
+  // be set to 100.0 and all weights would be normalized against the max.
+  PropertyWeight *non_valid_property_weight =
+      type_property_weights->add_property_weights();
+  non_valid_property_weight->set_weight(100.0);
+  non_valid_property_weight->set_path("sender.organization");
+
+  PropertyWeight *subject_property_weight =
+      type_property_weights->add_property_weights();
+  subject_property_weight->set_weight(10.0);
+  subject_property_weight->set_path("subject");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SectionWeights> section_weights,
+      SectionWeights::Create(schema_store(), spec_proto));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
+                             schema_store()->GetSchemaTypeId("email"));
+
+  // Normalized weight for "body" property. Because the weight is not explicitly
+  // set, it is set to the default of 1.0 before being normalized.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/0),
+              Eq(0.1));
+  // Normalized weight for "sender.name" property (the nested property). Because
+  // the weight is not explicitly set, it is set to the default of 1.0 before
+  // being normalized.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/1),
+              Eq(0.1));
+  // Normalized weight for "subject" property. Because the invalid property path
+  // is skipped when assigning weights, subject takes the max normalized weight
+  // of 1.0 instead.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/2),
+              Eq(1.0));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/store/corpus-associated-scoring-data.h b/icing/store/corpus-associated-scoring-data.h
new file mode 100644
index 0000000..52be5cd
--- /dev/null
+++ b/icing/store/corpus-associated-scoring-data.h
@@ -0,0 +1,79 @@
+// Copyright (C) 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_TYPE_NAMESPACE_ASSOCIATED_SCORING_DATA_H_
+#define ICING_STORE_TYPE_NAMESPACE_ASSOCIATED_SCORING_DATA_H_
+
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+#include "icing/legacy/core/icing-packed-pod.h"
+
+namespace icing {
+namespace lib {
+
+// This is the cache entity of corpus-associated scores. The ground-truth data
+// is stored somewhere else. The cache includes:
+// 1. Number of documents contained in the corpus.
+//    Positive values are required.
+// 2. The sum of the documents' lengths, in number of tokens.
+class CorpusAssociatedScoreData {
+ public:
+  explicit CorpusAssociatedScoreData(int num_docs = 0,
+                                     int64_t sum_length_in_tokens = 0)
+      : sum_length_in_tokens_(sum_length_in_tokens), num_docs_(num_docs) {}
+
+  bool operator==(const CorpusAssociatedScoreData& other) const {
+    return num_docs_ == other.num_docs() &&
+           sum_length_in_tokens_ == other.sum_length_in_tokens();
+  }
+
+  uint32_t num_docs() const { return num_docs_; }
+  void set_num_docs(uint32_t val) { num_docs_ = val; }
+
+  uint64_t sum_length_in_tokens() const { return sum_length_in_tokens_; }
+  void set_sum_length_in_tokens(uint64_t val) { sum_length_in_tokens_ = val; }
+
+  float average_doc_length_in_tokens() const {
+    return sum_length_in_tokens_ / (1.0f + num_docs_);
+  }
+
+  // Adds a new document.
+  // Adds the document's length to the total length of the corpus,
+  // sum_length_in_tokens_.
+  void AddDocument(uint32_t doc_length_in_tokens) {
+    ++num_docs_;
+    sum_length_in_tokens_ =
+        (std::numeric_limits<int>::max() - doc_length_in_tokens <
+         sum_length_in_tokens_)
+            ? std::numeric_limits<int>::max()
+            : sum_length_in_tokens_ + doc_length_in_tokens;
+  }
+
+ private:
+  // The sum total of the length of all documents in the corpus.
+  int sum_length_in_tokens_;
+  int num_docs_;
+} __attribute__((packed));
+
+static_assert(sizeof(CorpusAssociatedScoreData) == 8,
+              "Size of CorpusAssociatedScoreData should be 8");
+static_assert(icing_is_packed_pod<CorpusAssociatedScoreData>::value,
+              "go/icing-ubsan");
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_TYPE_NAMESPACE_ASSOCIATED_SCORING_DATA_H_
diff --git a/icing/store/corpus-id.h b/icing/store/corpus-id.h
new file mode 100644
index 0000000..01135b9
--- /dev/null
+++ b/icing/store/corpus-id.h
@@ -0,0 +1,32 @@
+// Copyright (C) 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_CORPUS_ID_H_
+#define ICING_STORE_CORPUS_ID_H_
+
+#include <cstdint>
+
+namespace icing {
+namespace lib {
+
+// Identifier for corpus, i.e. a <namespace, schema_type> pair>, in
+// DocumentProto. Generated in DocumentStore.
+using CorpusId = int32_t;
+
+inline constexpr CorpusId kInvalidCorpusId = -1;
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_CORPUS_ID_H_
diff --git a/icing/store/document-associated-score-data.h b/icing/store/document-associated-score-data.h
index b9039c5..9a711c8 100644
--- a/icing/store/document-associated-score-data.h
+++ b/icing/store/document-associated-score-data.h
@@ -19,6 +19,7 @@
 #include <type_traits>
 
 #include "icing/legacy/core/icing-packed-pod.h"
+#include "icing/store/corpus-id.h"
 
 namespace icing {
 namespace lib {
@@ -26,33 +27,46 @@ namespace lib {
 // This is the cache entity of document-associated scores. It contains scores
 // that are related to the document itself. The ground-truth data is stored
 // somewhere else. The cache includes:
-// 1. Document score. It's defined in and passed from DocumentProto.score.
+// 1. Corpus Id.
+// 2. Document score. It's defined in and passed from DocumentProto.score.
 //    Positive values are required.
-// 2. Document creation timestamp. Unix timestamp of when the document is
+// 3. Document creation timestamp. Unix timestamp of when the document is
 //    created and inserted into Icing.
+// 4. Document length in number of tokens.
 class DocumentAssociatedScoreData {
  public:
-  explicit DocumentAssociatedScoreData(int document_score,
-                                       int64_t creation_timestamp_ms)
-      : document_score_(document_score),
-        creation_timestamp_ms_(creation_timestamp_ms) {}
+  explicit DocumentAssociatedScoreData(CorpusId corpus_id, int document_score,
+                                       int64_t creation_timestamp_ms,
+                                       int length_in_tokens = 0)
+      : creation_timestamp_ms_(creation_timestamp_ms),
+        corpus_id_(corpus_id),
+        document_score_(document_score),
+        length_in_tokens_(length_in_tokens) {}
 
   bool operator==(const DocumentAssociatedScoreData& other) const {
     return document_score_ == other.document_score() &&
-           creation_timestamp_ms_ == other.creation_timestamp_ms();
+           creation_timestamp_ms_ == other.creation_timestamp_ms() &&
+           length_in_tokens_ == other.length_in_tokens() &&
+           corpus_id_ == other.corpus_id();
   }
 
+  CorpusId corpus_id() const { return corpus_id_; }
+
   int document_score() const { return document_score_; }
 
   int64_t creation_timestamp_ms() const { return creation_timestamp_ms_; }
 
+  int length_in_tokens() const { return length_in_tokens_; }
+
  private:
-  int document_score_;
   int64_t creation_timestamp_ms_;
+  CorpusId corpus_id_;
+  int document_score_;
+  int length_in_tokens_;
 } __attribute__((packed));
 
-static_assert(sizeof(DocumentAssociatedScoreData) == 12,
-              "Size of DocumentAssociatedScoreData should be 12");
+static_assert(sizeof(DocumentAssociatedScoreData) == 20,
+              "Size of DocumentAssociatedScoreData should be 20");
 static_assert(icing_is_packed_pod<DocumentAssociatedScoreData>::value,
               "go/icing-ubsan");
 
diff --git a/icing/store/document-filter-data.h b/icing/store/document-filter-data.h
index 198bc49..3970132 100644
--- a/icing/store/document-filter-data.h
+++ b/icing/store/document-filter-data.h
@@ -25,6 +25,7 @@ namespace icing {
 namespace lib {
 
 using SchemaTypeId = int16_t;
+inline constexpr SchemaTypeId kInvalidSchemaTypeId = -1;
 
 class DocumentFilterData {
  public:
diff --git a/icing/store/document-id.h b/icing/store/document-id.h
index cbe9959..7ea33b8 100644
--- a/icing/store/document-id.h
+++ b/icing/store/document-id.h
@@ -23,10 +23,11 @@ namespace lib {
 // Id of a document
 using DocumentId = int32_t;
 
-// We use 20 bits to encode document_ids and use the largest value (1M - 1) to
+// We use 22 bits to encode document_ids and use the largest value (2^22 - 1) to
 // represent an invalid document_id.
-inline constexpr int kDocumentIdBits = 20;
-inline constexpr DocumentId kInvalidDocumentId = (1u << kDocumentIdBits) - 1;
+inline constexpr int kDocumentIdBits = 22;
+inline constexpr DocumentId kInvalidDocumentId =
+    (INT32_C(1) << kDocumentIdBits) - 1;
 inline constexpr DocumentId kMinDocumentId = 0;
 inline constexpr DocumentId kMaxDocumentId = kInvalidDocumentId - 1;
 
diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc
new file mode 100644
index 0000000..2abd315
--- /dev/null
+++ b/icing/store/document-log-creator.cc
@@ -0,0 +1,205 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/document-log-creator.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/annotate.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/file-backed-proto-log.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Used in DocumentId mapper to mark a document as deleted
+constexpr char kDocumentLogFilename[] = "document_log";
+
+std::string DocumentLogFilenameV0() {
+  // Originally only had this one version, no suffix.
+  return kDocumentLogFilename;
+}
+
+std::string DocumentLogFilenameV1() {
+  return absl_ports::StrCat(kDocumentLogFilename, "_v1");
+}
+
+std::string MakeDocumentLogFilenameV0(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV0());
+}
+
+std::string MakeDocumentLogFilenameV1(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV1());
+}
+
+}  // namespace
+
+std::string DocumentLogCreator::GetDocumentLogFilename() {
+  // This should always return the latest version of the document log in use.
+  // The current latest version is V1.
+  return DocumentLogFilenameV1();
+}
+
+libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult>
+DocumentLogCreator::Create(const Filesystem* filesystem,
+                           const std::string& base_dir,
+                           int32_t compression_level) {
+  bool v0_exists =
+      filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str());
+  bool v1_exists =
+      filesystem->FileExists(MakeDocumentLogFilenameV1(base_dir).c_str());
+
+  bool new_file = false;
+  int preexisting_file_version = kCurrentVersion;
+  if (v0_exists && !v1_exists) {
+    ICING_RETURN_IF_ERROR(
+        MigrateFromV0ToV1(filesystem, base_dir, compression_level));
+
+    // Need to regenerate derived files since documents may be written to a
+    // different file offset in the log.
+    preexisting_file_version = 0;
+  } else if (!v1_exists) {
+    // First time initializing a v1 log. There are no existing derived files at
+    // this point, so we should generate some. "regenerate" here also means
+    // "generate for the first time", i.e. we shouldn't expect there to be any
+    // existing derived files.
+    new_file = true;
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      PortableFileBackedProtoLog<DocumentWrapper>::CreateResult
+          log_create_result,
+      PortableFileBackedProtoLog<DocumentWrapper>::Create(
+          filesystem, MakeDocumentLogFilenameV1(base_dir),
+          PortableFileBackedProtoLog<DocumentWrapper>::Options(
+              /*compress_in=*/true,
+              PortableFileBackedProtoLog<DocumentWrapper>::kMaxProtoSize,
+              compression_level)));
+
+  CreateResult create_result = {std::move(log_create_result),
+                                preexisting_file_version, new_file};
+  return create_result;
+}
+
+libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1(
+    const Filesystem* filesystem, const std::string& base_dir,
+    int32_t compression_level) {
+  ICING_VLOG(1) << "Migrating from v0 to v1 document log.";
+
+  // Our v0 proto log was non-portable, create it so we can read protos out from
+  // it.
+  auto v0_create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
+      filesystem, MakeDocumentLogFilenameV0(base_dir),
+      FileBackedProtoLog<DocumentWrapper>::Options(/*compress_in=*/true));
+  if (!v0_create_result_or.ok()) {
+    return absl_ports::Annotate(
+        v0_create_result_or.status(),
+        "Failed to initialize v0 document log while migrating.");
+    return v0_create_result_or.status();
+  }
+  FileBackedProtoLog<DocumentWrapper>::CreateResult v0_create_result =
+      std::move(v0_create_result_or).ValueOrDie();
+  std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> v0_proto_log =
+      std::move(v0_create_result.proto_log);
+
+  // Create a v1 portable proto log that we will write our protos to.
+  auto v1_create_result_or =
+      PortableFileBackedProtoLog<DocumentWrapper>::Create(
+          filesystem, MakeDocumentLogFilenameV1(base_dir),
+          PortableFileBackedProtoLog<DocumentWrapper>::Options(
+              /*compress_in=*/true,
+              /*max_proto_size_in=*/
+              PortableFileBackedProtoLog<DocumentWrapper>::kMaxProtoSize,
+              /*compression_level_in=*/compression_level));
+  if (!v1_create_result_or.ok()) {
+    return absl_ports::Annotate(
+        v1_create_result_or.status(),
+        "Failed to initialize v1 document log while migrating.");
+  }
+  PortableFileBackedProtoLog<DocumentWrapper>::CreateResult v1_create_result =
+      std::move(v1_create_result_or).ValueOrDie();
+  std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> v1_proto_log =
+      std::move(v1_create_result.proto_log);
+
+  // Dummy empty document to be used when copying over deleted documents.
+  DocumentProto empty_document;
+
+  // Start reading out from the old log and putting them in the new log.
+  ICING_ASSIGN_OR_RETURN(FileBackedProtoLog<DocumentWrapper>::Iterator iterator,
+                         v0_proto_log->GetIterator());
+  auto iterator_status = iterator.Advance();
+  while (iterator_status.ok()) {
+    libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or =
+        v0_proto_log->ReadProto(iterator.GetOffset());
+
+    bool deleted_document = false;
+    DocumentWrapper document_wrapper;
+    if (absl_ports::IsNotFound(document_wrapper_or.status())) {
+      // Proto was erased, we can skip copying this into our new log.
+      *document_wrapper.mutable_document() = empty_document;
+      deleted_document = true;
+    } else if (!document_wrapper_or.ok()) {
+      // Some real error, pass up
+      return document_wrapper_or.status();
+    } else {
+      document_wrapper = std::move(document_wrapper_or).ValueOrDie();
+    }
+
+    auto offset_or = v1_proto_log->WriteProto(document_wrapper);
+    if (!offset_or.ok()) {
+      return absl_ports::Annotate(
+          offset_or.status(),
+          "Failed to write proto to v1 document log while migrating.");
+    }
+
+    // If the original document was deleted, erase the proto we just wrote.
+    // We do this to maintain the document_ids, i.e. we still want document_id 2
+    // to point to a deleted document even though we may not have the document
+    // contents anymore. DocumentStore guarantees that the document_ids don't
+    // change unless an Optimize is triggered.
+    if (deleted_document) {
+      int64_t offset = offset_or.ValueOrDie();
+      auto erased_status = v1_proto_log->EraseProto(offset);
+      if (!erased_status.ok()) {
+        return absl_ports::Annotate(
+            erased_status,
+            "Failed to erase proto in v1 document log while migrating.");
+      }
+    }
+
+    iterator_status = iterator.Advance();
+  }
+
+  // Close out our file log pointers.
+  v0_proto_log.reset();
+  v1_proto_log.reset();
+
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/store/document-log-creator.h b/icing/store/document-log-creator.h
new file mode 100644
index 0000000..0c2794a
--- /dev/null
+++ b/icing/store/document-log-creator.h
@@ -0,0 +1,85 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_DOCUMENT_LOG_CREATOR_H_
+#define ICING_STORE_DOCUMENT_LOG_CREATOR_H_
+
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/proto/document_wrapper.pb.h"
+
+namespace icing {
+namespace lib {
+
+// Handles creation of the document log and any underlying migrations that may
+// be necessary.
+class DocumentLogCreator {
+ public:
+  // Version 0 refers to FileBackedProtoLog
+  // Version 1 refers to PortableFileBackedProtoLog with kFileFormatVersion = 0
+  static constexpr int32_t kCurrentVersion = 1;
+  struct CreateResult {
+    // The create result passed up from the PortableFileBackedProtoLog::Create.
+    // Contains the document log.
+    PortableFileBackedProtoLog<DocumentWrapper>::CreateResult log_create_result;
+
+    // The version number of the pre-existing document log file.
+    // If there is no document log file, it will be set to kCurrentVersion.
+    int preexisting_file_version;
+
+    // Whether the created file is new.
+    bool new_file;
+  };
+
+  // Creates the document log in the base_dir. Will create one if it doesn't
+  // already exist.
+  //
+  // This also handles any potential migrations from old document log versions.
+  // At the end of this call, the most up-to-date log will be returned and will
+  // be usable.
+  //
+  // Returns:
+  //   CreateResult on success.
+  //   INTERNAL on any I/O error.
+  static libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult> Create(
+      const Filesystem* filesystem, const std::string& base_dir,
+      int32_t compression_level);
+
+  // Returns the filename of the document log, without any directory prefixes.
+  // Used mainly for testing purposes.
+  static std::string GetDocumentLogFilename();
+
+ private:
+  // Handles migrating a v0 document log (not portable) to a v1 document log
+  // (portable). This will initialize the log in the beginning, and close it
+  // when migration is done. Callers will need to reinitialize the log on their
+  // own.
+  //
+  // Returns:
+  //   OK on success.
+  //   INVALID_ARGUMENT if some invalid option was passed to the document log.
+  //   INTERNAL on I/O error.
+  static libtextclassifier3::Status MigrateFromV0ToV1(
+      const Filesystem* filesystem, const std::string& base_dir,
+      int32_t compression_level);
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_DOCUMENT_LOG_CREATOR_H_
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 93cebaa..094eea1 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -17,8 +17,10 @@
 #include <cstdint>
 #include <limits>
 #include <memory>
+#include <optional>
 #include <string>
 #include <string_view>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -32,19 +34,38 @@
 #include "icing/file/file-backed-vector.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/memory-mapped-file.h"
+#include "icing/file/portable-file-backed-proto-log.h"
 #include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/usage.pb.h"
 #include "icing/schema/schema-store.h"
+#include "icing/store/corpus-associated-scoring-data.h"
+#include "icing/store/corpus-id.h"
 #include "icing/store/document-associated-score-data.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
-#include "icing/store/key-mapper.h"
+#include "icing/store/document-log-creator.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
 #include "icing/store/namespace-id.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/store/usage-store.h"
+#include "icing/tokenization/language-segmenter.h"
 #include "icing/util/clock.h"
 #include "icing/util/crc32.h"
+#include "icing/util/data-loss.h"
+#include "icing/util/encode-util.h"
+#include "icing/util/fingerprint-util.h"
 #include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
 
 namespace icing {
 namespace lib {
@@ -53,18 +74,32 @@ namespace {
 
 // Used in DocumentId mapper to mark a document as deleted
 constexpr int64_t kDocDeletedFlag = -1;
-constexpr char kDocumentLogFilename[] = "document_log";
 constexpr char kDocumentIdMapperFilename[] = "document_id_mapper";
+constexpr char kUriHashMapperWorkingPath[] = "uri_mapper";
 constexpr char kDocumentStoreHeaderFilename[] = "document_store_header";
 constexpr char kScoreCacheFilename[] = "score_cache";
+constexpr char kCorpusScoreCache[] = "corpus_score_cache";
 constexpr char kFilterCacheFilename[] = "filter_cache";
 constexpr char kNamespaceMapperFilename[] = "namespace_mapper";
-
-constexpr int32_t kUriMapperMaxSize = 12 * 1024 * 1024;  // 12 MiB
-
-// 384 KiB for a KeyMapper would allow each internal array to have a max of
-// 128 KiB for storage.
+constexpr char kUsageStoreDirectoryName[] = "usage_store";
+constexpr char kCorpusIdMapperFilename[] = "corpus_mapper";
+
+// Determined through manual testing to allow for 4 million uris. 4 million
+// because we allow up to 4 million DocumentIds.
+constexpr int32_t kUriDynamicTrieKeyMapperMaxSize =
+    144 * 1024 * 1024;  // 144 MiB
+
+constexpr int32_t kUriHashKeyMapperMaxNumEntries =
+    kMaxDocumentId + 1;  // 1 << 22, 4M
+// - Key: namespace_id_str (3 bytes) + fingerprinted_uri (10 bytes) + '\0' (1
+//        byte)
+// - Value: DocumentId (4 bytes)
+constexpr int32_t kUriHashKeyMapperKVByteSize = 13 + 1 + sizeof(DocumentId);
+
+// 384 KiB for a DynamicTrieKeyMapper would allow each internal array to have a
+// max of 128 KiB for storage.
 constexpr int32_t kNamespaceMapperMaxSize = 3 * 128 * 1024;  // 384 KiB
+constexpr int32_t kCorpusMapperMaxSize = 3 * 128 * 1024;     // 384 KiB
 
 DocumentWrapper CreateDocumentWrapper(DocumentProto&& document) {
   DocumentWrapper document_wrapper;
@@ -72,49 +107,26 @@ DocumentWrapper CreateDocumentWrapper(DocumentProto&& document) {
   return document_wrapper;
 }
 
-DocumentWrapper CreateDocumentTombstone(std::string_view document_namespace,
-                                        std::string_view document_uri) {
-  DocumentWrapper document_wrapper;
-  document_wrapper.set_deleted(true);
-  DocumentProto* document = document_wrapper.mutable_document();
-  document->set_namespace_(std::string(document_namespace));
-  document->set_uri(std::string(document_uri));
-  return document_wrapper;
-}
-
-DocumentWrapper CreateNamespaceTombstone(std::string_view document_namespace) {
-  DocumentWrapper document_wrapper;
-  document_wrapper.set_deleted(true);
-  DocumentProto* document = document_wrapper.mutable_document();
-  document->set_namespace_(std::string(document_namespace));
-  return document_wrapper;
-}
-
-DocumentWrapper CreateSchemaTypeTombstone(
-    std::string_view document_schema_type) {
-  DocumentWrapper document_wrapper;
-  document_wrapper.set_deleted(true);
-  DocumentProto* document = document_wrapper.mutable_document();
-  document->set_schema(std::string(document_schema_type));
-  return document_wrapper;
-}
-
 std::string MakeHeaderFilename(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kDocumentStoreHeaderFilename);
 }
 
-std::string MakeDocumentIdMapperFilename(const std::string& base_dir) {
-  return absl_ports::StrCat(base_dir, "/", kDocumentIdMapperFilename);
+std::string MakeUriHashMapperWorkingPath(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kUriHashMapperWorkingPath);
 }
 
-std::string MakeDocumentLogFilename(const std::string& base_dir) {
-  return absl_ports::StrCat(base_dir, "/", kDocumentLogFilename);
+std::string MakeDocumentIdMapperFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kDocumentIdMapperFilename);
 }
 
 std::string MakeScoreCacheFilename(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kScoreCacheFilename);
 }
 
+std::string MakeCorpusScoreCache(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kCorpusScoreCache);
+}
+
 std::string MakeFilterCacheFilename(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kFilterCacheFilename);
 }
@@ -123,27 +135,12 @@ std::string MakeNamespaceMapperFilename(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kNamespaceMapperFilename);
 }
 
-// TODO(adorokhine): This class internally uses an 8-byte fingerprint of the
-// Key and stores the key/value in a file-backed-trie that adds an ~80 byte
-// overhead per key. As we know that these fingerprints are always 8-bytes in
-// length and that they're random, we might be able to store them more
-// compactly.
-std::string MakeFingerprint(std::string_view name_space, std::string_view uri) {
-  // Using a 64-bit fingerprint to represent the key could lead to collisions.
-  // But, even with 200K unique keys, the probability of collision is about
-  // one-in-a-billion (https://en.wikipedia.org/wiki/Birthday_attack).
-  uint64_t fprint =
-      tc3farmhash::Fingerprint64(absl_ports::StrCat(name_space, uri));
+std::string MakeUsageStoreDirectoryName(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kUsageStoreDirectoryName);
+}
 
-  std::string encoded_fprint;
-  // DynamicTrie cannot handle keys with '0' as bytes. So, we encode it in
-  // base128 and add 1 to make sure that no byte is '0'. This increases the
-  // size of the encoded_fprint from 8-bytes to 10-bytes.
-  while (fprint) {
-    encoded_fprint.push_back((fprint & 0x7F) + 1);
-    fprint >>= 7;
-  }
-  return encoded_fprint;
+std::string MakeCorpusMapperFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kCorpusIdMapperFilename);
 }
 
 int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms,
@@ -167,76 +164,275 @@ int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms,
   return expiration_timestamp_ms;
 }
 
+InitializeStatsProto::RecoveryCause GetRecoveryCause(
+    const DocumentLogCreator::CreateResult& create_result,
+    bool force_recovery_and_revalidate_documents) {
+  if (force_recovery_and_revalidate_documents) {
+    return InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC;
+  } else if (create_result.log_create_result.has_data_loss()) {
+    return InitializeStatsProto::DATA_LOSS;
+  } else if (create_result.preexisting_file_version !=
+             DocumentLogCreator::kCurrentVersion) {
+    return InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT;
+  }
+  return InitializeStatsProto::NONE;
+}
+
+InitializeStatsProto::DocumentStoreDataStatus GetDataStatus(
+    DataLoss data_loss) {
+  switch (data_loss) {
+    case DataLoss::PARTIAL:
+      return InitializeStatsProto::PARTIAL_LOSS;
+    case DataLoss::COMPLETE:
+      return InitializeStatsProto::COMPLETE_LOSS;
+    case DataLoss::NONE:
+      return InitializeStatsProto::NO_DATA_LOSS;
+  }
+}
+
+std::unordered_map<NamespaceId, std::string> GetNamespaceIdsToNamespaces(
+    const KeyMapper<NamespaceId>* key_mapper) {
+  std::unordered_map<NamespaceId, std::string> namespace_ids_to_namespaces;
+
+  std::unique_ptr<typename KeyMapper<NamespaceId>::Iterator> itr =
+      key_mapper->GetIterator();
+  while (itr->Advance()) {
+    namespace_ids_to_namespaces.insert(
+        {itr->GetValue(), std::string(itr->GetKey())});
+  }
+  return namespace_ids_to_namespaces;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<
+    KeyMapper<DocumentId, fingerprint_util::FingerprintStringFormatter>>>
+CreateUriMapper(const Filesystem& filesystem, const std::string& base_dir,
+                bool pre_mapping_fbv, bool use_persistent_hash_map) {
+  std::string uri_hash_mapper_working_path =
+      MakeUriHashMapperWorkingPath(base_dir);
+  // Due to historic issue, we use document store's base_dir directly as
+  // DynamicTrieKeyMapper's working directory for uri mapper.
+  // DynamicTrieKeyMapper also creates a subdirectory "key_mapper_dir", so the
+  // actual files will be put under "<base_dir>/key_mapper_dir/".
+  bool dynamic_trie_key_mapper_dir_exists = filesystem.DirectoryExists(
+      absl_ports::StrCat(base_dir, "/key_mapper_dir").c_str());
+  bool persistent_hash_map_dir_exists =
+      filesystem.DirectoryExists(uri_hash_mapper_working_path.c_str());
+  if ((use_persistent_hash_map && dynamic_trie_key_mapper_dir_exists) ||
+      (!use_persistent_hash_map && persistent_hash_map_dir_exists)) {
+    // Return a failure here so that the caller can properly delete and rebuild
+    // this component.
+    return absl_ports::FailedPreconditionError("Key mapper type mismatch");
+  }
+
+  if (use_persistent_hash_map) {
+    return PersistentHashMapKeyMapper<
+        DocumentId, fingerprint_util::FingerprintStringFormatter>::
+        Create(filesystem, std::move(uri_hash_mapper_working_path),
+               pre_mapping_fbv,
+               /*max_num_entries=*/kUriHashKeyMapperMaxNumEntries,
+               /*average_kv_byte_size=*/kUriHashKeyMapperKVByteSize);
+  } else {
+    return DynamicTrieKeyMapper<DocumentId,
+                                fingerprint_util::FingerprintStringFormatter>::
+        Create(filesystem, base_dir, kUriDynamicTrieKeyMapperMaxSize);
+  }
+}
+
 }  // namespace
 
+std::string DocumentStore::MakeFingerprint(
+    NamespaceId namespace_id, std::string_view namespace_,
+    std::string_view uri_or_schema) const {
+  if (!namespace_id_fingerprint_) {
+    // Using a 64-bit fingerprint to represent the key could lead to collisions.
+    // But, even with 200K unique keys, the probability of collision is about
+    // one-in-a-billion (https://en.wikipedia.org/wiki/Birthday_attack).
+    uint64_t fprint = tc3farmhash::Fingerprint64(
+        absl_ports::StrCat(namespace_, uri_or_schema));
+    return fingerprint_util::GetFingerprintString(fprint);
+  } else {
+    return NamespaceFingerprintIdentifier(namespace_id, uri_or_schema)
+        .EncodeToCString();
+  }
+}
+
 DocumentStore::DocumentStore(const Filesystem* filesystem,
                              const std::string_view base_dir,
                              const Clock* clock,
-                             const SchemaStore* schema_store)
+                             const SchemaStore* schema_store,
+                             bool namespace_id_fingerprint,
+                             bool pre_mapping_fbv, bool use_persistent_hash_map,
+                             int32_t compression_level)
     : filesystem_(filesystem),
       base_dir_(base_dir),
       clock_(*clock),
       schema_store_(schema_store),
-      document_validator_(schema_store) {}
+      document_validator_(schema_store),
+      namespace_id_fingerprint_(namespace_id_fingerprint),
+      pre_mapping_fbv_(pre_mapping_fbv),
+      use_persistent_hash_map_(use_persistent_hash_map),
+      compression_level_(compression_level) {}
+
+libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
+    const DocumentProto& document, int32_t num_tokens,
+    PutDocumentStatsProto* put_document_stats) {
+  return Put(DocumentProto(document), num_tokens, put_document_stats);
+}
 
 libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
-    const DocumentProto& document) {
-  return Put(DocumentProto(document));
+    DocumentProto&& document, int32_t num_tokens,
+    PutDocumentStatsProto* put_document_stats) {
+  document.mutable_internal_fields()->set_length_in_tokens(num_tokens);
+  return InternalPut(std::move(document), put_document_stats);
 }
 
 DocumentStore::~DocumentStore() {
   if (initialized_) {
-    if (!PersistToDisk().ok()) {
+    if (!PersistToDisk(PersistType::FULL).ok()) {
       ICING_LOG(ERROR)
           << "Error persisting to disk in DocumentStore destructor";
     }
   }
 }
 
-libtextclassifier3::StatusOr<std::unique_ptr<DocumentStore>>
-DocumentStore::Create(const Filesystem* filesystem, const std::string& base_dir,
-                      const Clock* clock, const SchemaStore* schema_store) {
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
+    const Filesystem* filesystem, const std::string& base_dir,
+    const Clock* clock, const SchemaStore* schema_store,
+    bool force_recovery_and_revalidate_documents, bool namespace_id_fingerprint,
+    bool pre_mapping_fbv, bool use_persistent_hash_map,
+    int32_t compression_level, InitializeStatsProto* initialize_stats) {
   ICING_RETURN_ERROR_IF_NULL(filesystem);
   ICING_RETURN_ERROR_IF_NULL(clock);
   ICING_RETURN_ERROR_IF_NULL(schema_store);
 
-  auto document_store = std::unique_ptr<DocumentStore>(
-      new DocumentStore(filesystem, base_dir, clock, schema_store));
-  ICING_RETURN_IF_ERROR(document_store->Initialize());
-  return document_store;
+  auto document_store = std::unique_ptr<DocumentStore>(new DocumentStore(
+      filesystem, base_dir, clock, schema_store, namespace_id_fingerprint,
+      pre_mapping_fbv, use_persistent_hash_map, compression_level));
+  ICING_ASSIGN_OR_RETURN(
+      InitializeResult initialize_result,
+      document_store->Initialize(force_recovery_and_revalidate_documents,
+                                 initialize_stats));
+
+  CreateResult create_result;
+  create_result.document_store = std::move(document_store);
+  create_result.data_loss = initialize_result.data_loss;
+  create_result.derived_files_regenerated =
+      initialize_result.derived_files_regenerated;
+  return create_result;
 }
 
-libtextclassifier3::Status DocumentStore::Initialize() {
-  auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
-      filesystem_, MakeDocumentLogFilename(base_dir_),
-      FileBackedProtoLog<DocumentWrapper>::Options(
-          /*compress_in=*/true));
+/* static */ libtextclassifier3::Status DocumentStore::DiscardDerivedFiles(
+    const Filesystem* filesystem, const std::string& base_dir) {
+  // Header
+  const std::string header_filename = MakeHeaderFilename(base_dir);
+  if (!filesystem->DeleteFile(MakeHeaderFilename(base_dir).c_str())) {
+    return absl_ports::InternalError("Couldn't delete header file");
+  }
+
+  // Document key mapper. Doesn't hurt to delete both dynamic trie and
+  // persistent hash map without checking.
+  ICING_RETURN_IF_ERROR(
+      DynamicTrieKeyMapper<DocumentId>::Delete(*filesystem, base_dir));
+  ICING_RETURN_IF_ERROR(PersistentHashMapKeyMapper<DocumentId>::Delete(
+      *filesystem, MakeUriHashMapperWorkingPath(base_dir)));
+
+  // Document id mapper
+  ICING_RETURN_IF_ERROR(FileBackedVector<int64_t>::Delete(
+      *filesystem, MakeDocumentIdMapperFilename(base_dir)));
+
+  // Document associated score cache
+  ICING_RETURN_IF_ERROR(FileBackedVector<DocumentAssociatedScoreData>::Delete(
+      *filesystem, MakeScoreCacheFilename(base_dir)));
+
+  // Filter cache
+  ICING_RETURN_IF_ERROR(FileBackedVector<DocumentFilterData>::Delete(
+      *filesystem, MakeFilterCacheFilename(base_dir)));
+
+  // Namespace mapper
+  ICING_RETURN_IF_ERROR(DynamicTrieKeyMapper<NamespaceId>::Delete(
+      *filesystem, MakeNamespaceMapperFilename(base_dir)));
+
+  // Corpus mapper
+  ICING_RETURN_IF_ERROR(DynamicTrieKeyMapper<CorpusId>::Delete(
+      *filesystem, MakeCorpusMapperFilename(base_dir)));
+
+  // Corpus associated score cache
+  ICING_RETURN_IF_ERROR(FileBackedVector<CorpusAssociatedScoreData>::Delete(
+      *filesystem, MakeCorpusScoreCache(base_dir)));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<DocumentStore::InitializeResult>
+DocumentStore::Initialize(bool force_recovery_and_revalidate_documents,
+                          InitializeStatsProto* initialize_stats) {
+  auto create_result_or =
+      DocumentLogCreator::Create(filesystem_, base_dir_, compression_level_);
+
   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
   // that can support error logging.
   if (!create_result_or.ok()) {
     ICING_LOG(ERROR) << create_result_or.status().error_message()
-                     << "\nFailed to initialize DocumentLog";
+                     << "\nFailed to initialize DocumentLog.";
     return create_result_or.status();
   }
-  FileBackedProtoLog<DocumentWrapper>::CreateResult create_result =
+  DocumentLogCreator::CreateResult create_result =
       std::move(create_result_or).ValueOrDie();
-  document_log_ = std::move(create_result.proto_log);
 
-  if (create_result.data_loss) {
-    ICING_LOG(WARNING)
-        << "Data loss in document log, regenerating derived files.";
-    libtextclassifier3::Status status = RegenerateDerivedFiles();
+  document_log_ = std::move(create_result.log_create_result.proto_log);
+  InitializeStatsProto::RecoveryCause recovery_cause =
+      GetRecoveryCause(create_result, force_recovery_and_revalidate_documents);
+
+  bool derived_files_regenerated = false;
+  if (recovery_cause != InitializeStatsProto::NONE || create_result.new_file) {
+    ICING_LOG(INFO) << "Starting Document Store Recovery with cause="
+                    << recovery_cause << ", and create result { new_file="
+                    << create_result.new_file << ", preeisting_file_version="
+                    << create_result.preexisting_file_version << ", data_loss="
+                    << create_result.log_create_result.data_loss
+                    << "} and kCurrentVersion="
+                    << DocumentLogCreator::kCurrentVersion;
+    // We can't rely on any existing derived files. Recreate them from scratch.
+    // Currently happens if:
+    //   1) This is a new log and we don't have derived files yet
+    //   2) Client wanted us to force a regeneration.
+    //   3) Log has some data loss, can't rely on existing derived data.
+    std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
+    libtextclassifier3::Status status =
+        RegenerateDerivedFiles(force_recovery_and_revalidate_documents);
+    if (recovery_cause != InitializeStatsProto::NONE) {
+      // Only consider it a recovery if the client forced a recovery or there
+      // was data loss. Otherwise, this could just be the first time we're
+      // initializing and generating derived files.
+      derived_files_regenerated = true;
+      if (initialize_stats != nullptr) {
+        initialize_stats->set_document_store_recovery_latency_ms(
+            document_recovery_timer->GetElapsedMilliseconds());
+        initialize_stats->set_document_store_recovery_cause(recovery_cause);
+        initialize_stats->set_document_store_data_status(
+            GetDataStatus(create_result.log_create_result.data_loss));
+      }
+    }
     if (!status.ok()) {
       ICING_LOG(ERROR)
           << "Failed to regenerate derived files for DocumentStore";
       return status;
     }
   } else {
-    if (!InitializeDerivedFiles().ok()) {
-      ICING_VLOG(1)
+    if (!InitializeExistingDerivedFiles().ok()) {
+      ICING_LOG(WARNING)
           << "Couldn't find derived files or failed to initialize them, "
              "regenerating derived files for DocumentStore.";
-      libtextclassifier3::Status status = RegenerateDerivedFiles();
+      std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
+      derived_files_regenerated = true;
+      libtextclassifier3::Status status = RegenerateDerivedFiles(
+          /*force_recovery_and_revalidate_documents=*/false);
+      if (initialize_stats != nullptr) {
+        initialize_stats->set_document_store_recovery_cause(
+            InitializeStatsProto::IO_ERROR);
+        initialize_stats->set_document_store_recovery_latency_ms(
+            document_recovery_timer->GetElapsedMilliseconds());
+      }
       if (!status.ok()) {
         ICING_LOG(ERROR)
             << "Failed to regenerate derived files for DocumentStore";
@@ -246,11 +442,17 @@ libtextclassifier3::Status DocumentStore::Initialize() {
   }
 
   initialized_ = true;
+  if (initialize_stats != nullptr) {
+    initialize_stats->set_num_documents(document_id_mapper_->num_elements());
+  }
 
-  return libtextclassifier3::Status::OK;
+  InitializeResult initialize_result = {
+      .data_loss = create_result.log_create_result.data_loss,
+      .derived_files_regenerated = derived_files_regenerated};
+  return initialize_result;
 }
 
-libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() {
+libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
   if (!HeaderExists()) {
     // Without a header, we don't know if things are consistent between each
     // other so the caller should just regenerate everything from ground
@@ -265,15 +467,16 @@ libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() {
         absl_ports::StrCat("Couldn't read: ", MakeHeaderFilename(base_dir_)));
   }
 
-  if (header.magic != DocumentStore::Header::kMagic) {
+  if (header.magic !=
+      DocumentStore::Header::GetCurrentMagic(namespace_id_fingerprint_)) {
     return absl_ports::InternalError(absl_ports::StrCat(
         "Invalid header kMagic for file: ", MakeHeaderFilename(base_dir_)));
   }
 
   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
   // that can support error logging.
-  auto document_key_mapper_or =
-      KeyMapper<DocumentId>::Create(*filesystem_, base_dir_, kUriMapperMaxSize);
+  auto document_key_mapper_or = CreateUriMapper(
+      *filesystem_, base_dir_, pre_mapping_fbv_, use_persistent_hash_map_);
   if (!document_key_mapper_or.ok()) {
     ICING_LOG(ERROR) << document_key_mapper_or.status().error_message()
                      << "Failed to initialize KeyMapper";
@@ -305,9 +508,32 @@ libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() {
 
   ICING_ASSIGN_OR_RETURN(
       namespace_mapper_,
-      KeyMapper<NamespaceId>::Create(*filesystem_,
-                                     MakeNamespaceMapperFilename(base_dir_),
-                                     kNamespaceMapperMaxSize));
+      DynamicTrieKeyMapper<NamespaceId>::Create(
+          *filesystem_, MakeNamespaceMapperFilename(base_dir_),
+          kNamespaceMapperMaxSize));
+
+  ICING_ASSIGN_OR_RETURN(
+      usage_store_,
+      UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_)));
+
+  auto corpus_mapper_or =
+      DynamicTrieKeyMapper<CorpusId,
+                           fingerprint_util::FingerprintStringFormatter>::
+          Create(*filesystem_, MakeCorpusMapperFilename(base_dir_),
+                 kCorpusMapperMaxSize);
+  if (!corpus_mapper_or.ok()) {
+    return std::move(corpus_mapper_or).status();
+  }
+  corpus_mapper_ = std::move(corpus_mapper_or).ValueOrDie();
+
+  ICING_ASSIGN_OR_RETURN(corpus_score_cache_,
+                         FileBackedVector<CorpusAssociatedScoreData>::Create(
+                             *filesystem_, MakeCorpusScoreCache(base_dir_),
+                             MemoryMappedFile::READ_WRITE_AUTO_SYNC));
+
+  // Ensure the usage store is the correct size.
+  ICING_RETURN_IF_ERROR(
+      usage_store_->TruncateTo(document_id_mapper_->num_elements()));
 
   ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
   if (checksum.Get() != header.checksum) {
@@ -318,137 +544,128 @@ libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() {
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() {
+libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles(
+    bool revalidate_documents) {
   ICING_RETURN_IF_ERROR(ResetDocumentKeyMapper());
   ICING_RETURN_IF_ERROR(ResetDocumentIdMapper());
   ICING_RETURN_IF_ERROR(ResetDocumentAssociatedScoreCache());
   ICING_RETURN_IF_ERROR(ResetFilterCache());
   ICING_RETURN_IF_ERROR(ResetNamespaceMapper());
+  ICING_RETURN_IF_ERROR(ResetCorpusMapper());
+  ICING_RETURN_IF_ERROR(ResetCorpusAssociatedScoreCache());
+
+  // Creates a new UsageStore instance. Note that we don't reset the data in
+  // usage store here because we're not able to regenerate the usage scores.
+  ICING_ASSIGN_OR_RETURN(
+      usage_store_,
+      UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_)));
 
   // Iterates through document log
   auto iterator = document_log_->GetIterator();
   auto iterator_status = iterator.Advance();
+  libtextclassifier3::StatusOr<int64_t> element_size =
+      document_log_->GetElementsFileSize();
+  libtextclassifier3::StatusOr<int64_t> disk_usage =
+      document_log_->GetDiskUsage();
+  if (element_size.ok() && disk_usage.ok()) {
+    ICING_VLOG(1) << "Starting recovery of document store. Document store "
+                     "elements file size:"
+                  << element_size.ValueOrDie()
+                  << ", disk usage=" << disk_usage.ValueOrDie();
+  }
   while (iterator_status.ok()) {
-    ICING_ASSIGN_OR_RETURN(DocumentWrapper document_wrapper,
-                           document_log_->ReadProto(iterator.GetOffset()));
-    if (document_wrapper.deleted()) {
-      if (!document_wrapper.document().uri().empty()) {
-        // Individual document deletion.
-        auto document_id_or =
-            GetDocumentId(document_wrapper.document().namespace_(),
-                          document_wrapper.document().uri());
-        // Updates document_id mapper with deletion
-        if (document_id_or.ok()) {
-          ICING_RETURN_IF_ERROR(document_id_mapper_->Set(
-              document_id_or.ValueOrDie(), kDocDeletedFlag));
-        } else if (!absl_ports::IsNotFound(document_id_or.status())) {
-          // Real error
-          return absl_ports::Annotate(
-              document_id_or.status(),
-              absl_ports::StrCat("Failed to find document id. namespace: ",
-                                 document_wrapper.document().namespace_(),
-                                 ", uri: ", document_wrapper.document().uri()));
-        }
-      } else if (!document_wrapper.document().namespace_().empty()) {
-        // Namespace deletion.
-        ICING_RETURN_IF_ERROR(UpdateDerivedFilesNamespaceDeleted(
-            document_wrapper.document().namespace_()));
-
-      } else if (!document_wrapper.document().schema().empty()) {
-        // SchemaType deletion.
-        auto schema_type_id_or = schema_store_->GetSchemaTypeId(
-            document_wrapper.document().schema());
-
-        if (schema_type_id_or.ok()) {
-          ICING_RETURN_IF_ERROR(UpdateDerivedFilesSchemaTypeDeleted(
-              schema_type_id_or.ValueOrDie()));
-        } else {
-          // The deleted schema type doesn't have a SchemaTypeId we can refer
-          // to in the FilterCache.
-          //
-          // TODO(cassiewang): We could avoid reading out all the documents.
-          // When we see a schema type doesn't have a SchemaTypeId, assign the
-          // unknown schema type a unique, temporary SchemaTypeId and store
-          // that in the FilterCache. Then, when we see the schema type
-          // tombstone here, we can look up its temporary SchemaTypeId and
-          // just iterate through the FilterCache to mark those documents as
-          // deleted.
-          int size = document_id_mapper_->num_elements();
-          for (DocumentId document_id = 0; document_id < size; document_id++) {
-            auto document_or = Get(document_id);
-            if (absl_ports::IsNotFound(document_or.status())) {
-              // Skip nonexistent documents
-              continue;
-            } else if (!document_or.ok()) {
-              // Real error, pass up
-              return absl_ports::Annotate(
-                  document_or.status(),
-                  IcingStringUtil::StringPrintf(
-                      "Failed to retrieve Document for DocumentId %d",
-                      document_id));
-            }
-
-            // Guaranteed to have a document now.
-            DocumentProto document = document_or.ValueOrDie();
-
-            if (document.schema() == document_wrapper.document().schema()) {
-              ICING_RETURN_IF_ERROR(
-                  document_id_mapper_->Set(document_id, kDocDeletedFlag));
-            }
-          }
-        }
-      } else {
-        return absl_ports::InternalError(
-            "Encountered an invalid tombstone during recovery!");
-      }
-    } else {
-      // Updates key mapper and document_id mapper with the new document
-      DocumentId new_document_id = document_id_mapper_->num_elements();
-      ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
-          MakeFingerprint(document_wrapper.document().namespace_(),
-                          document_wrapper.document().uri()),
-          new_document_id));
-      ICING_RETURN_IF_ERROR(
-          document_id_mapper_->Set(new_document_id, iterator.GetOffset()));
-
-      ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
-          new_document_id,
-          DocumentAssociatedScoreData(
-              document_wrapper.document().score(),
-              document_wrapper.document().creation_timestamp_ms())));
-
-      SchemaTypeId schema_type_id;
-      auto schema_type_id_or =
-          schema_store_->GetSchemaTypeId(document_wrapper.document().schema());
-      if (absl_ports::IsNotFound(schema_type_id_or.status())) {
-        // Didn't find a SchemaTypeId. This means that the DocumentStore and
-        // the SchemaStore are out of sync. But DocumentStore can't do
-        // anything about it so just ignore this for now. This should be
-        // detected/handled by the owner of DocumentStore. Set it to some
-        // arbitrary invalid value for now, it'll get updated to the correct
-        // ID later.
-        schema_type_id = -1;
-      } else if (!schema_type_id_or.ok()) {
-        // Real error. Pass it up
-        return schema_type_id_or.status();
-      } else {
-        // We're guaranteed that SchemaTypeId is valid now
-        schema_type_id = schema_type_id_or.ValueOrDie();
-      }
+    ICING_VLOG(2) << "Attempting to read document at offset="
+                  << iterator.GetOffset();
+    libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or =
+        document_log_->ReadProto(iterator.GetOffset());
 
-      ICING_ASSIGN_OR_RETURN(
-          NamespaceId namespace_id,
-          namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(),
-                                      namespace_mapper_->num_keys()));
+    if (absl_ports::IsNotFound(document_wrapper_or.status())) {
+      // The erased document still occupies 1 document id.
+      DocumentId new_document_id = document_id_mapper_->num_elements();
+      ICING_RETURN_IF_ERROR(ClearDerivedData(new_document_id));
+      iterator_status = iterator.Advance();
+      continue;
+    } else if (!document_wrapper_or.ok()) {
+      return document_wrapper_or.status();
+    }
 
-      int64_t expiration_timestamp_ms = CalculateExpirationTimestampMs(
-          document_wrapper.document().creation_timestamp_ms(),
-          document_wrapper.document().ttl_ms());
+    DocumentWrapper document_wrapper =
+        std::move(document_wrapper_or).ValueOrDie();
+    // Revalidate that this document is still compatible if requested.
+    if (revalidate_documents) {
+      if (!document_validator_.Validate(document_wrapper.document()).ok()) {
+        // Document is no longer valid with the current schema. Mark as
+        // deleted
+        DocumentId new_document_id = document_id_mapper_->num_elements();
+        ICING_RETURN_IF_ERROR(document_log_->EraseProto(iterator.GetOffset()));
+        ICING_RETURN_IF_ERROR(ClearDerivedData(new_document_id));
+        continue;
+      }
+    }
 
-      ICING_RETURN_IF_ERROR(UpdateFilterCache(
-          new_document_id, DocumentFilterData(namespace_id, schema_type_id,
-                                              expiration_timestamp_ms)));
+    ICING_ASSIGN_OR_RETURN(
+        NamespaceId namespace_id,
+        namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(),
+                                    namespace_mapper_->num_keys()));
+
+    // Updates key mapper and document_id mapper with the new document
+    DocumentId new_document_id = document_id_mapper_->num_elements();
+    ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
+        MakeFingerprint(namespace_id, document_wrapper.document().namespace_(),
+                        document_wrapper.document().uri()),
+        new_document_id));
+    ICING_RETURN_IF_ERROR(
+        document_id_mapper_->Set(new_document_id, iterator.GetOffset()));
+
+    SchemaTypeId schema_type_id;
+    auto schema_type_id_or =
+        schema_store_->GetSchemaTypeId(document_wrapper.document().schema());
+    if (absl_ports::IsNotFound(schema_type_id_or.status())) {
+      // Didn't find a SchemaTypeId. This means that the DocumentStore and
+      // the SchemaStore are out of sync. But DocumentStore can't do
+      // anything about it so just ignore this for now. This should be
+      // detected/handled by the owner of DocumentStore. Set it to some
+      // arbitrary invalid value for now, it'll get updated to the correct
+      // ID later.
+      schema_type_id = -1;
+    } else if (!schema_type_id_or.ok()) {
+      // Real error. Pass it up
+      return schema_type_id_or.status();
+    } else {
+      // We're guaranteed that SchemaTypeId is valid now
+      schema_type_id = schema_type_id_or.ValueOrDie();
     }
+
+    // Update corpus maps
+    std::string corpus =
+        MakeFingerprint(namespace_id, document_wrapper.document().namespace_(),
+                        document_wrapper.document().schema());
+    ICING_ASSIGN_OR_RETURN(
+        CorpusId corpusId,
+        corpus_mapper_->GetOrPut(corpus, corpus_mapper_->num_keys()));
+
+    ICING_ASSIGN_OR_RETURN(CorpusAssociatedScoreData scoring_data,
+                           GetCorpusAssociatedScoreDataToUpdate(corpusId));
+    scoring_data.AddDocument(
+        document_wrapper.document().internal_fields().length_in_tokens());
+
+    ICING_RETURN_IF_ERROR(
+        UpdateCorpusAssociatedScoreCache(corpusId, scoring_data));
+
+    ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
+        new_document_id,
+        DocumentAssociatedScoreData(
+            corpusId, document_wrapper.document().score(),
+            document_wrapper.document().creation_timestamp_ms(),
+            document_wrapper.document().internal_fields().length_in_tokens())));
+
+    int64_t expiration_timestamp_ms = CalculateExpirationTimestampMs(
+        document_wrapper.document().creation_timestamp_ms(),
+        document_wrapper.document().ttl_ms());
+
+    ICING_RETURN_IF_ERROR(UpdateFilterCache(
+        new_document_id, DocumentFilterData(namespace_id, schema_type_id,
+                                            expiration_timestamp_ms)));
     iterator_status = iterator.Advance();
   }
 
@@ -460,6 +677,10 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() {
                                 "Failed to iterate through proto log.");
   }
 
+  // Shrink usage_store_ to the correct size.
+  ICING_RETURN_IF_ERROR(
+      usage_store_->TruncateTo(document_id_mapper_->num_elements()));
+
   // Write the header
   ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
   ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
@@ -468,22 +689,33 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() {
 }
 
 libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
+  // Only one type of KeyMapper (either DynamicTrieKeyMapper or
+  // PersistentHashMapKeyMapper) will actually exist at any moment, but it is ok
+  // to call Delete() for both since Delete() returns OK if any of them doesn't
+  // exist.
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   document_key_mapper_.reset();
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status =
-      KeyMapper<DocumentId>::Delete(*filesystem_, base_dir_);
+      DynamicTrieKeyMapper<DocumentId>::Delete(*filesystem_, base_dir_);
   if (!status.ok()) {
     ICING_LOG(ERROR) << status.error_message()
-                     << "Failed to delete old key mapper";
+                     << "Failed to delete old dynamic trie key mapper";
+    return status;
+  }
+  status = PersistentHashMapKeyMapper<DocumentId>::Delete(
+      *filesystem_, MakeUriHashMapperWorkingPath(base_dir_));
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message()
+                     << "Failed to delete old persistent hash map key mapper";
     return status;
   }
 
-  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // TODO(b/216487496): Implement a more robust version of TC_ASSIGN_OR_RETURN
   // that can support error logging.
-  auto document_key_mapper_or =
-      KeyMapper<DocumentId>::Create(*filesystem_, base_dir_, kUriMapperMaxSize);
+  auto document_key_mapper_or = CreateUriMapper(
+      *filesystem_, base_dir_, pre_mapping_fbv_, use_persistent_hash_map_);
   if (!document_key_mapper_or.ok()) {
     ICING_LOG(ERROR) << document_key_mapper_or.status().error_message()
                      << "Failed to re-init key mapper";
@@ -496,7 +728,7 @@ libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
 libtextclassifier3::Status DocumentStore::ResetDocumentIdMapper() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   document_id_mapper_.reset();
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = FileBackedVector<int64_t>::Delete(
       *filesystem_, MakeDocumentIdMapperFilename(base_dir_));
@@ -505,7 +737,7 @@ libtextclassifier3::Status DocumentStore::ResetDocumentIdMapper() {
                      << "Failed to delete old document_id mapper";
     return status;
   }
-  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // TODO(b/216487496): Implement a more robust version of TC_ASSIGN_OR_RETURN
   // that can support error logging.
   auto document_id_mapper_or = FileBackedVector<int64_t>::Create(
       *filesystem_, MakeDocumentIdMapperFilename(base_dir_),
@@ -531,6 +763,18 @@ libtextclassifier3::Status DocumentStore::ResetDocumentAssociatedScoreCache() {
   return libtextclassifier3::Status::OK;
 }
 
+libtextclassifier3::Status DocumentStore::ResetCorpusAssociatedScoreCache() {
+  // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
+  corpus_score_cache_.reset();
+  ICING_RETURN_IF_ERROR(FileBackedVector<CorpusAssociatedScoreData>::Delete(
+      *filesystem_, MakeCorpusScoreCache(base_dir_)));
+  ICING_ASSIGN_OR_RETURN(corpus_score_cache_,
+                         FileBackedVector<CorpusAssociatedScoreData>::Create(
+                             *filesystem_, MakeCorpusScoreCache(base_dir_),
+                             MemoryMappedFile::READ_WRITE_AUTO_SYNC));
+  return libtextclassifier3::Status::OK;
+}
+
 libtextclassifier3::Status DocumentStore::ResetFilterCache() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   filter_cache_.reset();
@@ -546,9 +790,9 @@ libtextclassifier3::Status DocumentStore::ResetFilterCache() {
 libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   namespace_mapper_.reset();
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
-  libtextclassifier3::Status status = KeyMapper<NamespaceId>::Delete(
+  libtextclassifier3::Status status = DynamicTrieKeyMapper<NamespaceId>::Delete(
       *filesystem_, MakeNamespaceMapperFilename(base_dir_));
   if (!status.ok()) {
     ICING_LOG(ERROR) << status.error_message()
@@ -557,9 +801,33 @@ libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() {
   }
   ICING_ASSIGN_OR_RETURN(
       namespace_mapper_,
-      KeyMapper<NamespaceId>::Create(*filesystem_,
-                                     MakeNamespaceMapperFilename(base_dir_),
-                                     kNamespaceMapperMaxSize));
+      DynamicTrieKeyMapper<NamespaceId>::Create(
+          *filesystem_, MakeNamespaceMapperFilename(base_dir_),
+          kNamespaceMapperMaxSize));
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::ResetCorpusMapper() {
+  // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
+  corpus_mapper_.reset();
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
+  // that can support error logging.
+  libtextclassifier3::Status status = DynamicTrieKeyMapper<CorpusId>::Delete(
+      *filesystem_, MakeCorpusMapperFilename(base_dir_));
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message()
+                     << "Failed to delete old corpus_id mapper";
+    return status;
+  }
+  auto corpus_mapper_or =
+      DynamicTrieKeyMapper<CorpusId,
+                           fingerprint_util::FingerprintStringFormatter>::
+          Create(*filesystem_, MakeCorpusMapperFilename(base_dir_),
+                 kCorpusMapperMaxSize);
+  if (!corpus_mapper_or.ok()) {
+    return std::move(corpus_mapper_or).status();
+  }
+  corpus_mapper_ = std::move(corpus_mapper_or).ValueOrDie();
   return libtextclassifier3::Status::OK;
 }
 
@@ -576,7 +844,15 @@ libtextclassifier3::StatusOr<Crc32> DocumentStore::ComputeChecksum() const {
   }
   Crc32 document_log_checksum = std::move(checksum_or).ValueOrDie();
 
-  Crc32 document_key_mapper_checksum = document_key_mapper_->ComputeChecksum();
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  checksum_or = document_key_mapper_->ComputeChecksum();
+  if (!checksum_or.ok()) {
+    ICING_LOG(ERROR) << checksum_or.status().error_message()
+                     << "Failed to compute checksum of DocumentKeyMapper";
+    return checksum_or.status();
+  }
+  Crc32 document_key_mapper_checksum = std::move(checksum_or).ValueOrDie();
 
   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
   // that can support error logging.
@@ -608,7 +884,40 @@ libtextclassifier3::StatusOr<Crc32> DocumentStore::ComputeChecksum() const {
   }
   Crc32 filter_cache_checksum = std::move(checksum_or).ValueOrDie();
 
-  Crc32 namespace_mapper_checksum = namespace_mapper_->ComputeChecksum();
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  checksum_or = namespace_mapper_->ComputeChecksum();
+  if (!checksum_or.ok()) {
+    ICING_LOG(ERROR) << checksum_or.status().error_message()
+                     << "Failed to compute checksum of namespace mapper";
+    return checksum_or.status();
+  }
+  Crc32 namespace_mapper_checksum = std::move(checksum_or).ValueOrDie();
+
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  checksum_or = corpus_mapper_->ComputeChecksum();
+  if (!checksum_or.ok()) {
+    ICING_LOG(ERROR) << checksum_or.status().error_message()
+                     << "Failed to compute checksum of corpus mapper";
+    return checksum_or.status();
+  }
+  Crc32 corpus_mapper_checksum = std::move(checksum_or).ValueOrDie();
+
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  checksum_or = corpus_score_cache_->ComputeChecksum();
+  if (!checksum_or.ok()) {
+    ICING_LOG(WARNING) << checksum_or.status().error_message()
+                       << "Failed to compute checksum of score cache";
+    return checksum_or.status();
+  }
+  Crc32 corpus_score_cache_checksum = std::move(checksum_or).ValueOrDie();
+
+  // NOTE: We purposely don't include usage_store checksum here because we can't
+  // regenerate it from ground truth documents. If it gets corrupted, we'll just
+  // clear all usage reports, but we shouldn't throw everything else in the
+  // document store out.
 
   total_checksum.Append(std::to_string(document_log_checksum.Get()));
   total_checksum.Append(std::to_string(document_key_mapper_checksum.Get()));
@@ -616,6 +925,8 @@ libtextclassifier3::StatusOr<Crc32> DocumentStore::ComputeChecksum() const {
   total_checksum.Append(std::to_string(score_cache_checksum.Get()));
   total_checksum.Append(std::to_string(filter_cache_checksum.Get()));
   total_checksum.Append(std::to_string(namespace_mapper_checksum.Get()));
+  total_checksum.Append(std::to_string(corpus_mapper_checksum.Get()));
+  total_checksum.Append(std::to_string(corpus_score_cache_checksum.Get()));
 
   return total_checksum;
 }
@@ -635,27 +946,37 @@ bool DocumentStore::HeaderExists() {
 libtextclassifier3::Status DocumentStore::UpdateHeader(const Crc32& checksum) {
   // Write the header
   DocumentStore::Header header;
-  header.magic = DocumentStore::Header::kMagic;
+  header.magic =
+      DocumentStore::Header::GetCurrentMagic(namespace_id_fingerprint_);
   header.checksum = checksum.Get();
 
   // This should overwrite the header.
-  if (!filesystem_->Write(MakeHeaderFilename(base_dir_).c_str(), &header,
-                          sizeof(header))) {
+  ScopedFd sfd(
+      filesystem_->OpenForWrite(MakeHeaderFilename(base_dir_).c_str()));
+  if (!sfd.is_valid() ||
+      !filesystem_->Write(sfd.get(), &header, sizeof(header)) ||
+      !filesystem_->DataSync(sfd.get())) {
     return absl_ports::InternalError(absl_ports::StrCat(
         "Failed to write DocStore header: ", MakeHeaderFilename(base_dir_)));
   }
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
-    DocumentProto&& document) {
+libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut(
+    DocumentProto&& document, PutDocumentStatsProto* put_document_stats) {
+  std::unique_ptr<Timer> put_timer = clock_.GetNewTimer();
   ICING_RETURN_IF_ERROR(document_validator_.Validate(document));
 
+  if (put_document_stats != nullptr) {
+    put_document_stats->set_document_size(document.ByteSizeLong());
+  }
+
   // Copy fields needed before they are moved
   std::string name_space = document.namespace_();
   std::string uri = document.uri();
   std::string schema = document.schema();
   int document_score = document.score();
+  int32_t length_in_tokens = document.internal_fields().length_in_tokens();
   int64_t creation_timestamp_ms = document.creation_timestamp_ms();
 
   // Sets the creation timestamp if caller hasn't specified.
@@ -688,19 +1009,40 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
 
   // Creates a new document id, updates key mapper and document_id mapper
   DocumentId new_document_id = document_id_mapper_->num_elements();
-  ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
-      MakeFingerprint(name_space, uri), new_document_id));
-  ICING_RETURN_IF_ERROR(document_id_mapper_->Set(new_document_id, file_offset));
-
-  ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
-      new_document_id,
-      DocumentAssociatedScoreData(document_score, creation_timestamp_ms)));
+  if (!IsDocumentIdValid(new_document_id)) {
+    return absl_ports::ResourceExhaustedError(
+        "Exceeded maximum number of documents. Try calling Optimize to reclaim "
+        "some space.");
+  }
 
   // Update namespace maps
   ICING_ASSIGN_OR_RETURN(
       NamespaceId namespace_id,
       namespace_mapper_->GetOrPut(name_space, namespace_mapper_->num_keys()));
 
+  // Updates key mapper and document_id mapper
+  ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
+      MakeFingerprint(namespace_id, name_space, uri), new_document_id));
+  ICING_RETURN_IF_ERROR(document_id_mapper_->Set(new_document_id, file_offset));
+
+  // Update corpus maps
+  ICING_ASSIGN_OR_RETURN(CorpusId corpusId,
+                         corpus_mapper_->GetOrPut(
+                             MakeFingerprint(namespace_id, name_space, schema),
+                             corpus_mapper_->num_keys()));
+
+  ICING_ASSIGN_OR_RETURN(CorpusAssociatedScoreData scoring_data,
+                         GetCorpusAssociatedScoreDataToUpdate(corpusId));
+  scoring_data.AddDocument(length_in_tokens);
+
+  ICING_RETURN_IF_ERROR(
+      UpdateCorpusAssociatedScoreCache(corpusId, scoring_data));
+
+  ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
+      new_document_id,
+      DocumentAssociatedScoreData(corpusId, document_score,
+                                  creation_timestamp_ms, length_in_tokens)));
+
   ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
                          schema_store_->GetSchemaTypeId(schema));
 
@@ -709,21 +1051,40 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
                                           expiration_timestamp_ms)));
 
   if (old_document_id_or.ok()) {
-    // Mark the old document id as deleted.
-    ICING_RETURN_IF_ERROR(document_id_mapper_->Set(
-        old_document_id_or.ValueOrDie(), kDocDeletedFlag));
+    // The old document exists, copy over the usage scores and delete the old
+    // document.
+    DocumentId old_document_id = old_document_id_or.ValueOrDie();
+
+    ICING_RETURN_IF_ERROR(
+        usage_store_->CloneUsageScores(/*from_document_id=*/old_document_id,
+                                       /*to_document_id=*/new_document_id));
+
+    // Delete the old document. It's fine if it's not found since it might have
+    // been deleted previously.
+    auto delete_status =
+        Delete(old_document_id, clock_.GetSystemTimeMilliseconds());
+    if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
+      // Real error, pass it up.
+      return delete_status;
+    }
+  }
+
+  if (put_document_stats != nullptr) {
+    put_document_stats->set_document_store_latency_ms(
+        put_timer->GetElapsedMilliseconds());
   }
 
   return new_document_id;
 }
 
 libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
-    const std::string_view name_space, const std::string_view uri) const {
+    const std::string_view name_space, const std::string_view uri,
+    bool clear_internal_fields) const {
   // TODO(b/147231617): Make a better way to replace the error message in an
   // existing Status.
   auto document_id_or = GetDocumentId(name_space, uri);
   if (absl_ports::IsNotFound(document_id_or.status())) {
-    ICING_LOG(ERROR) << document_id_or.status().error_message();
+    ICING_VLOG(1) << document_id_or.status().error_message();
     return libtextclassifier3::Status(
         document_id_or.status().CanonicalCode(),
         IcingStringUtil::StringPrintf("Document (%s, %s) not found.",
@@ -745,9 +1106,30 @@ libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
 }
 
 libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
-    DocumentId document_id) const {
-  ICING_ASSIGN_OR_RETURN(int64_t document_log_offset,
-                         DoesDocumentExistAndGetFileOffset(document_id));
+    DocumentId document_id, bool clear_internal_fields) const {
+  int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
+  auto document_filter_data_optional_ =
+      GetAliveDocumentFilterData(document_id, current_time_ms);
+  if (!document_filter_data_optional_) {
+    // The document doesn't exist. Let's check if the document id is invalid, we
+    // will return InvalidArgumentError. Otherwise we should return NOT_FOUND
+    // error.
+    if (!IsDocumentIdValid(document_id)) {
+      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+          "Document id '%d' invalid.", document_id));
+    }
+    return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+        "Document id '%d' doesn't exist", document_id));
+  }
+
+  auto document_log_offset_or = document_id_mapper_->Get(document_id);
+  if (!document_log_offset_or.ok()) {
+    // Since we've just checked that our document_id is valid a few lines
+    // above, there's no reason this should fail and an error should never
+    // happen.
+    return absl_ports::InternalError("Failed to find document offset.");
+  }
+  int64_t document_log_offset = *document_log_offset_or.ValueOrDie();
 
   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
   // that can support error logging.
@@ -759,30 +1141,57 @@ libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
   }
   DocumentWrapper document_wrapper =
       std::move(document_wrapper_or).ValueOrDie();
+  if (clear_internal_fields) {
+    document_wrapper.mutable_document()->clear_internal_fields();
+  }
 
   return std::move(*document_wrapper.mutable_document());
 }
 
 libtextclassifier3::StatusOr<DocumentId> DocumentStore::GetDocumentId(
     const std::string_view name_space, const std::string_view uri) const {
-  auto document_id_or =
-      document_key_mapper_->Get(MakeFingerprint(name_space, uri));
-  if (!document_id_or.ok()) {
-    return absl_ports::Annotate(
-        document_id_or.status(),
-        absl_ports::StrCat("Failed to find DocumentId by key: ", name_space,
-                           ", ", uri));
+  auto namespace_id_or = namespace_mapper_->Get(name_space);
+  libtextclassifier3::Status status = namespace_id_or.status();
+  if (status.ok()) {
+    NamespaceId namespace_id = namespace_id_or.ValueOrDie();
+    auto document_id_or = document_key_mapper_->Get(
+        MakeFingerprint(namespace_id, name_space, uri));
+    status = document_id_or.status();
+    if (status.ok()) {
+      // Guaranteed to have a DocumentId now
+      return document_id_or.ValueOrDie();
+    }
+  }
+  return absl_ports::Annotate(
+      status, absl_ports::StrCat(
+                  "Failed to find DocumentId by key: ", name_space, ", ", uri));
+}
+
+libtextclassifier3::StatusOr<DocumentId> DocumentStore::GetDocumentId(
+    const NamespaceFingerprintIdentifier& namespace_fingerprint_identifier)
+    const {
+  if (!namespace_id_fingerprint_) {
+    return absl_ports::FailedPreconditionError(
+        "Cannot lookup document id by namespace id + fingerprint without "
+        "enabling it on uri_mapper");
   }
 
-  // Guaranteed to have a DocumentId now
-  return document_id_or.ValueOrDie();
+  auto document_id_or = document_key_mapper_->Get(
+      namespace_fingerprint_identifier.EncodeToCString());
+  if (document_id_or.ok()) {
+    return document_id_or.ValueOrDie();
+  }
+  return absl_ports::Annotate(
+      std::move(document_id_or).status(),
+      "Failed to find DocumentId by namespace id + fingerprint");
 }
 
 std::vector<std::string> DocumentStore::GetAllNamespaces() const {
   std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace =
-      namespace_mapper_->GetValuesToKeys();
+      GetNamespaceIdsToNamespaces(namespace_mapper_.get());
 
   std::unordered_set<NamespaceId> existing_namespace_ids;
+  int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
   for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
        ++document_id) {
     // filter_cache_->Get can only fail if document_id is < 0
@@ -795,7 +1204,7 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const {
     }
     const DocumentFilterData* data = status_or_data.ValueOrDie();
 
-    if (DoesDocumentExist(document_id)) {
+    if (GetAliveDocumentFilterData(document_id, current_time_ms)) {
       existing_namespace_ids.insert(data->namespace_id());
     }
   }
@@ -808,44 +1217,54 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const {
   return existing_namespaces;
 }
 
-libtextclassifier3::StatusOr<int64_t>
-DocumentStore::DoesDocumentExistAndGetFileOffset(DocumentId document_id) const {
-  if (!IsDocumentIdValid(document_id)) {
-    return absl_ports::InvalidArgumentError(
-        IcingStringUtil::StringPrintf("DocumentId %d is invalid", document_id));
+std::optional<DocumentFilterData> DocumentStore::GetAliveDocumentFilterData(
+    DocumentId document_id, int64_t current_time_ms) const {
+  if (IsDeleted(document_id)) {
+    return std::nullopt;
   }
+  return GetNonExpiredDocumentFilterData(document_id, current_time_ms);
+}
 
+bool DocumentStore::IsDeleted(DocumentId document_id) const {
   auto file_offset_or = document_id_mapper_->Get(document_id);
-
-  bool deleted =
-      file_offset_or.ok() && *file_offset_or.ValueOrDie() == kDocDeletedFlag;
-  if (deleted || absl_ports::IsOutOfRange(file_offset_or.status())) {
-    // Document has been deleted or doesn't exist
-    return absl_ports::NotFoundError(
-        IcingStringUtil::StringPrintf("Document %d not found", document_id));
+  if (!file_offset_or.ok()) {
+    // This would only happen if document_id is out of range of the
+    // document_id_mapper, meaning we got some invalid document_id. Callers
+    // should already have checked that their document_id is valid or used
+    // DoesDocumentExist(WithStatus). Regardless, return true since the
+    // document doesn't exist.
+    return true;
   }
+  int64_t file_offset = *file_offset_or.ValueOrDie();
+  return file_offset == kDocDeletedFlag;
+}
 
-  ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
-                         filter_cache_->Get(document_id));
-  if (clock_.GetSystemTimeMilliseconds() >=
-      filter_data->expiration_timestamp_ms()) {
-    // Past the expiration time, so also return NOT FOUND since it *shouldn't*
-    // exist anymore.
-    return absl_ports::NotFoundError(
-        IcingStringUtil::StringPrintf("Document %d not found", document_id));
+// Returns DocumentFilterData if the document is not expired. Otherwise,
+// std::nullopt.
+std::optional<DocumentFilterData>
+DocumentStore::GetNonExpiredDocumentFilterData(DocumentId document_id,
+                                               int64_t current_time_ms) const {
+  auto filter_data_or = filter_cache_->GetCopy(document_id);
+  if (!filter_data_or.ok()) {
+    // This would only happen if document_id is out of range of the
+    // filter_cache, meaning we got some invalid document_id. Callers should
+    // already have checked that their document_id is valid or used
+    // DoesDocumentExist(WithStatus). Regardless, return true since the
+    // document doesn't exist.
+    return std::nullopt;
   }
+  DocumentFilterData document_filter_data = filter_data_or.ValueOrDie();
 
-  ICING_RETURN_IF_ERROR(file_offset_or.status());
-  return *file_offset_or.ValueOrDie();
-}
-
-bool DocumentStore::DoesDocumentExist(DocumentId document_id) const {
-  // If we can successfully get the document log offset, the document exists.
-  return DoesDocumentExistAndGetFileOffset(document_id).ok();
+  // Check if it's past the expiration time
+  if (current_time_ms >= document_filter_data.expiration_timestamp_ms()) {
+    return std::nullopt;
+  }
+  return document_filter_data;
 }
 
 libtextclassifier3::Status DocumentStore::Delete(
-    const std::string_view name_space, const std::string_view uri) {
+    const std::string_view name_space, const std::string_view uri,
+    int64_t current_time_ms) {
   // Try to get the DocumentId first
   auto document_id_or = GetDocumentId(name_space, uri);
   if (!document_id_or.ok()) {
@@ -854,36 +1273,33 @@ libtextclassifier3::Status DocumentStore::Delete(
         absl_ports::StrCat("Failed to delete Document. namespace: ", name_space,
                            ", uri: ", uri));
   }
+  return Delete(document_id_or.ValueOrDie(), current_time_ms);
+}
 
-  // Check if the DocumentId's Document still exists.
-  DocumentId document_id = document_id_or.ValueOrDie();
-  auto file_offset_or = DoesDocumentExistAndGetFileOffset(document_id);
-  if (!file_offset_or.ok()) {
-    return absl_ports::Annotate(
-        file_offset_or.status(),
-        absl_ports::StrCat("Failed to delete Document. namespace: ", name_space,
-                           ", uri: ", uri));
+libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id,
+                                                 int64_t current_time_ms) {
+  auto document_filter_data_optional_ =
+      GetAliveDocumentFilterData(document_id, current_time_ms);
+  if (!document_filter_data_optional_) {
+    // The document doesn't exist. We should return InvalidArgumentError if the
+    // document id is invalid. Otherwise we should return NOT_FOUND error.
+    if (!IsDocumentIdValid(document_id)) {
+      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+          "Document id '%d' invalid.", document_id));
+    }
+    return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+        "Document id '%d' doesn't exist", document_id));
   }
 
-  // Update ground truth first.
-  // To delete a proto we don't directly remove it. Instead, we mark it as
-  // deleted first by appending a tombstone of it and actually remove it from
-  // file later in Optimize()
-  // TODO(b/144458732): Implement a more robust version of ICING_RETURN_IF_ERROR
-  // that can support error logging.
-  libtextclassifier3::Status status =
-      document_log_->WriteProto(CreateDocumentTombstone(name_space, uri))
-          .status();
-  if (!status.ok()) {
-    return absl_ports::Annotate(
-        status, absl_ports::StrCat("Failed to delete Document. namespace: ",
-                                   name_space, ", uri: ", uri));
+  auto document_log_offset_or = document_id_mapper_->Get(document_id);
+  if (!document_log_offset_or.ok()) {
+    return absl_ports::InternalError("Failed to find document offset.");
   }
+  int64_t document_log_offset = *document_log_offset_or.ValueOrDie();
 
-  ICING_RETURN_IF_ERROR(
-      document_id_mapper_->Set(document_id_or.ValueOrDie(), kDocDeletedFlag));
-
-  return libtextclassifier3::Status::OK;
+  // Erases document proto.
+  ICING_RETURN_IF_ERROR(document_log_->EraseProto(document_log_offset));
+  return ClearDerivedData(document_id);
 }
 
 libtextclassifier3::StatusOr<NamespaceId> DocumentStore::GetNamespaceId(
@@ -891,163 +1307,269 @@ libtextclassifier3::StatusOr<NamespaceId> DocumentStore::GetNamespaceId(
   return namespace_mapper_->Get(name_space);
 }
 
+libtextclassifier3::StatusOr<CorpusId> DocumentStore::GetCorpusId(
+    const std::string_view name_space, const std::string_view schema) const {
+  ICING_ASSIGN_OR_RETURN(NamespaceId namespace_id,
+                         namespace_mapper_->Get(name_space));
+  return corpus_mapper_->Get(MakeFingerprint(namespace_id, name_space, schema));
+}
+
+libtextclassifier3::StatusOr<int32_t> DocumentStore::GetResultGroupingEntryId(
+    ResultSpecProto::ResultGroupingType result_group_type,
+    const std::string_view name_space, const std::string_view schema) const {
+  auto namespace_id = GetNamespaceId(name_space);
+  auto schema_type_id = schema_store_->GetSchemaTypeId(schema);
+  switch (result_group_type) {
+    case ResultSpecProto::NONE:
+      return absl_ports::InvalidArgumentError(
+          "Cannot group by ResultSpecProto::NONE");
+    case ResultSpecProto::SCHEMA_TYPE:
+      if (schema_type_id.ok()) {
+        return schema_type_id.ValueOrDie();
+      }
+      break;
+    case ResultSpecProto::NAMESPACE:
+      if (namespace_id.ok()) {
+        return namespace_id.ValueOrDie();
+      }
+      break;
+    case ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE:
+      if (namespace_id.ok() && schema_type_id.ok()) {
+        // TODO(b/258715421): Temporary workaround to get a
+        //                    ResultGroupingEntryId given the Namespace string
+        //                    and Schema string.
+        return namespace_id.ValueOrDie() << 16 | schema_type_id.ValueOrDie();
+      }
+      break;
+  }
+  return absl_ports::NotFoundError("Cannot generate ResultGrouping Entry Id");
+}
+
+libtextclassifier3::StatusOr<int32_t> DocumentStore::GetResultGroupingEntryId(
+    ResultSpecProto::ResultGroupingType result_group_type,
+    const NamespaceId namespace_id, const SchemaTypeId schema_type_id) const {
+  switch (result_group_type) {
+    case ResultSpecProto::NONE:
+      return absl_ports::InvalidArgumentError(
+          "Cannot group by ResultSpecProto::NONE");
+    case ResultSpecProto::SCHEMA_TYPE:
+      return schema_type_id;
+    case ResultSpecProto::NAMESPACE:
+      return namespace_id;
+    case ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE:
+      // TODO(b/258715421): Temporary workaround to get a ResultGroupingEntryId
+      //                    given the Namespace Id and SchemaType Id.
+      return namespace_id << 16 | schema_type_id;
+  }
+  return absl_ports::NotFoundError("Cannot generate ResultGrouping Entry Id");
+}
+
 libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
 DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const {
-  auto score_data_or = score_cache_->Get(document_id);
+  auto score_data_or = score_cache_->GetCopy(document_id);
   if (!score_data_or.ok()) {
     ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id
                      << " from score_cache_";
-    return score_data_or.status();
+    return absl_ports::NotFoundError(
+        std::move(score_data_or).status().error_message());
   }
-  return *std::move(score_data_or).ValueOrDie();
+
+  DocumentAssociatedScoreData document_associated_score_data =
+      std::move(score_data_or).ValueOrDie();
+  return document_associated_score_data;
 }
 
-libtextclassifier3::StatusOr<DocumentFilterData>
-DocumentStore::GetDocumentFilterData(DocumentId document_id) const {
-  auto filter_data_or = filter_cache_->Get(document_id);
-  if (!filter_data_or.ok()) {
-    ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id
-                     << " from filter_cache_";
-    return filter_data_or.status();
+libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
+DocumentStore::GetCorpusAssociatedScoreData(CorpusId corpus_id) const {
+  auto score_data_or = corpus_score_cache_->GetCopy(corpus_id);
+  if (!score_data_or.ok()) {
+    return score_data_or.status();
   }
-  return *std::move(filter_data_or).ValueOrDie();
+
+  CorpusAssociatedScoreData corpus_associated_score_data =
+      std::move(score_data_or).ValueOrDie();
+  return corpus_associated_score_data;
 }
 
-libtextclassifier3::Status DocumentStore::DeleteByNamespace(
-    std::string_view name_space) {
-  auto namespace_id_or = namespace_mapper_->Get(name_space);
-  if (!namespace_id_or.ok()) {
-    return absl_ports::Annotate(
-        namespace_id_or.status(),
-        absl_ports::StrCat("Failed to delete by namespace. namespace: ",
-                           name_space));
+libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
+DocumentStore::GetCorpusAssociatedScoreDataToUpdate(CorpusId corpus_id) const {
+  auto corpus_scoring_data_or = GetCorpusAssociatedScoreData(corpus_id);
+  if (corpus_scoring_data_or.ok()) {
+    return std::move(corpus_scoring_data_or).ValueOrDie();
+  }
+  CorpusAssociatedScoreData scoringData;
+  // OUT_OF_RANGE is the StatusCode returned when a corpus id is added to
+  // corpus_score_cache_ for the first time.
+  if (corpus_scoring_data_or.status().CanonicalCode() ==
+      libtextclassifier3::StatusCode::OUT_OF_RANGE) {
+    return scoringData;
   }
+  return corpus_scoring_data_or.status();
+}
 
-  // Update ground truth first.
-  // To delete an entire namespace, we append a tombstone that only contains
-  // the deleted bit and the name of the deleted namespace.
-  // TODO(b/144458732): Implement a more robust version of
-  // ICING_RETURN_IF_ERROR that can support error logging.
-  libtextclassifier3::Status status =
-      document_log_->WriteProto(CreateNamespaceTombstone(name_space)).status();
-  if (!status.ok()) {
-    ICING_LOG(ERROR) << status.error_message()
-                     << "Failed to delete namespace. namespace = "
-                     << name_space;
-    return status;
+// TODO(b/273826815): Decide on and adopt a consistent pattern for handling
+// NOT_FOUND 'errors' returned by our internal classes.
+std::optional<UsageStore::UsageScores> DocumentStore::GetUsageScores(
+    DocumentId document_id, int64_t current_time_ms) const {
+  std::optional<DocumentFilterData> opt =
+      GetAliveDocumentFilterData(document_id, current_time_ms);
+  if (!opt) {
+    return std::nullopt;
+  }
+  if (document_id >= usage_store_->num_elements()) {
+    return std::nullopt;
   }
+  auto usage_scores_or = usage_store_->GetUsageScores(document_id);
+  if (!usage_scores_or.ok()) {
+    ICING_LOG(ERROR) << "Error retrieving usage for " << document_id << ": "
+                     << usage_scores_or.status().error_message();
+    return std::nullopt;
+  }
+  return std::move(usage_scores_or).ValueOrDie();
+}
 
-  ICING_ASSIGN_OR_RETURN(bool updated_existing_document,
-                         UpdateDerivedFilesNamespaceDeleted(name_space));
-  if (!updated_existing_document) {
-    // Treat the fact that no existing documents had this namespace to be the
-    // same as this namespace not existing at all.
-    return absl_ports::NotFoundError(
-        absl_ports::StrCat("Namespace '", name_space, "' doesn't exist"));
+libtextclassifier3::Status DocumentStore::ReportUsage(
+    const UsageReport& usage_report) {
+  ICING_ASSIGN_OR_RETURN(DocumentId document_id,
+                         GetDocumentId(usage_report.document_namespace(),
+                                       usage_report.document_uri()));
+  // We can use the internal version here because we got our document_id from
+  // our internal data structures. We would have thrown some error if the
+  // namespace and/or uri were incorrect.
+  int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
+  if (!GetAliveDocumentFilterData(document_id, current_time_ms)) {
+    // Document was probably deleted or expired.
+    return absl_ports::NotFoundError(absl_ports::StrCat(
+        "Couldn't report usage on a nonexistent document: (namespace: '",
+        usage_report.document_namespace(), "', uri: '",
+        usage_report.document_uri(), "')"));
   }
-  return libtextclassifier3::Status::OK;
+
+  return usage_store_->AddUsageReport(usage_report, document_id);
 }
 
-libtextclassifier3::StatusOr<bool>
-DocumentStore::UpdateDerivedFilesNamespaceDeleted(std::string_view name_space) {
+DocumentStore::DeleteByGroupResult DocumentStore::DeleteByNamespace(
+    std::string_view name_space) {
+  DeleteByGroupResult result;
   auto namespace_id_or = namespace_mapper_->Get(name_space);
   if (!namespace_id_or.ok()) {
-    return namespace_id_or.status();
+    result.status = absl_ports::Annotate(
+        namespace_id_or.status(),
+        absl_ports::StrCat("Failed to find namespace: ", name_space));
+    return result;
   }
-
-  // Guaranteed to have a NamespaceId now.
   NamespaceId namespace_id = namespace_id_or.ValueOrDie();
+  auto num_deleted_or = BatchDelete(namespace_id, kInvalidSchemaTypeId);
+  if (!num_deleted_or.ok()) {
+    result.status = std::move(num_deleted_or).status();
+    return result;
+  }
 
-  // Tracks if there were any existing documents with this namespace that we
-  // will mark as deleted.
-  bool updated_existing_document = false;
-
-  // Traverse FilterCache and delete all docs that match namespace_id
-  for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
-       ++document_id) {
-    // filter_cache_->Get can only fail if document_id is < 0
-    // or >= filter_cache_->num_elements. So, this error SHOULD NEVER HAPPEN.
-    ICING_ASSIGN_OR_RETURN(const DocumentFilterData* data,
-                           filter_cache_->Get(document_id));
-    if (data->namespace_id() == namespace_id) {
-      if (DoesDocumentExist(document_id)) {
-        updated_existing_document = true;
-      }
-
-      // docid_mapper_->Set can only fail if document_id is < 0
-      // or >= docid_mapper_->num_elements. So the only possible way to get an
-      // error here would be if filter_cache_->num_elements >
-      // docid_mapper_->num_elements, which SHOULD NEVER HAPPEN.
-      ICING_RETURN_IF_ERROR(
-          document_id_mapper_->Set(document_id, kDocDeletedFlag));
-    }
+  result.num_docs_deleted = num_deleted_or.ValueOrDie();
+  if (result.num_docs_deleted <= 0) {
+    // Treat the fact that no existing documents had this namespace to be the
+    // same as this namespace not existing at all.
+    result.status = absl_ports::NotFoundError(
+        absl_ports::StrCat("Namespace '", name_space, "' doesn't exist"));
+    return result;
   }
 
-  return updated_existing_document;
+  return result;
 }
 
-libtextclassifier3::Status DocumentStore::DeleteBySchemaType(
+DocumentStore::DeleteByGroupResult DocumentStore::DeleteBySchemaType(
     std::string_view schema_type) {
+  DeleteByGroupResult result;
   auto schema_type_id_or = schema_store_->GetSchemaTypeId(schema_type);
   if (!schema_type_id_or.ok()) {
-    return absl_ports::Annotate(
+    result.status = absl_ports::Annotate(
         schema_type_id_or.status(),
-        absl_ports::StrCat("Failed to delete by schema type. schema_type: ",
+        absl_ports::StrCat("Failed to find schema type. schema_type: ",
                            schema_type));
+    return result;
   }
-
-  // Update ground truth first.
-  // To delete an entire schema type, we append a tombstone that only contains
-  // the deleted bit and the name of the deleted schema type.
-  // TODO(b/144458732): Implement a more robust version of
-  // ICING_RETURN_IF_ERROR that can support error logging.
-  libtextclassifier3::Status status =
-      document_log_->WriteProto(CreateSchemaTypeTombstone(schema_type))
-          .status();
-  if (!status.ok()) {
-    ICING_LOG(ERROR) << status.error_message()
-                     << "Failed to delete schema_type. schema_type = "
-                     << schema_type;
-    return status;
-  }
-
-  // Guaranteed to have a SchemaTypeId now
   SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
+  auto num_deleted_or = BatchDelete(kInvalidNamespaceId, schema_type_id);
+  if (!num_deleted_or.ok()) {
+    result.status = std::move(num_deleted_or).status();
+    return result;
+  }
 
-  ICING_RETURN_IF_ERROR(UpdateDerivedFilesSchemaTypeDeleted(schema_type_id));
+  result.num_docs_deleted = num_deleted_or.ValueOrDie();
+  if (result.num_docs_deleted <= 0) {
+    result.status = absl_ports::NotFoundError(absl_ports::StrCat(
+        "No documents found with schema type '", schema_type, "'"));
+    return result;
+  }
 
-  return libtextclassifier3::Status::OK;
+  return result;
 }
 
-libtextclassifier3::Status DocumentStore::UpdateDerivedFilesSchemaTypeDeleted(
-    SchemaTypeId schema_type_id) {
-  // Traverse FilterCache and delete all docs that match schema_type_id.
+libtextclassifier3::StatusOr<int> DocumentStore::BatchDelete(
+    NamespaceId namespace_id, SchemaTypeId schema_type_id) {
+  // Tracks if there were any existing documents with this namespace that we
+  // will mark as deleted.
+  int num_updated_documents = 0;
+
+  // Traverse FilterCache and delete all docs that match namespace_id and
+  // schema_type_id.
+  int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
   for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
        ++document_id) {
     // filter_cache_->Get can only fail if document_id is < 0
     // or >= filter_cache_->num_elements. So, this error SHOULD NEVER HAPPEN.
     ICING_ASSIGN_OR_RETURN(const DocumentFilterData* data,
                            filter_cache_->Get(document_id));
-    if (data->schema_type_id() == schema_type_id) {
-      // docid_mapper_->Set can only fail if document_id is < 0
-      // or >= docid_mapper_->num_elements. So the only possible way to get an
-      // error here would be if filter_cache_->num_elements >
-      // docid_mapper_->num_elements, which SHOULD NEVER HAPPEN.
-      ICING_RETURN_IF_ERROR(
-          document_id_mapper_->Set(document_id, kDocDeletedFlag));
+
+    // Check namespace only when the input namespace id is valid.
+    if (namespace_id != kInvalidNamespaceId &&
+        (data->namespace_id() == kInvalidNamespaceId ||
+         data->namespace_id() != namespace_id)) {
+      // The document has already been hard-deleted or isn't from the desired
+      // namespace.
+      continue;
+    }
+
+    // Check schema type only when the input schema type id is valid.
+    if (schema_type_id != kInvalidSchemaTypeId &&
+        (data->schema_type_id() == kInvalidSchemaTypeId ||
+         data->schema_type_id() != schema_type_id)) {
+      // The document has already been hard-deleted or doesn't have the
+      // desired schema type.
+      continue;
+    }
+
+    // The document has the desired namespace and schema type, it either
+    // exists or has expired.
+    libtextclassifier3::Status delete_status =
+        Delete(document_id, current_time_ms);
+    if (absl_ports::IsNotFound(delete_status)) {
+      continue;
+    } else if (!delete_status.ok()) {
+      // Real error, pass up.
+      return delete_status;
     }
+    ++num_updated_documents;
   }
 
-  return libtextclassifier3::Status::OK;
+  return num_updated_documents;
 }
 
-libtextclassifier3::Status DocumentStore::PersistToDisk() {
+libtextclassifier3::Status DocumentStore::PersistToDisk(
+    PersistType::Code persist_type) {
+  if (persist_type == PersistType::LITE) {
+    // only persist the document log.
+    return document_log_->PersistToDisk();
+  }
   ICING_RETURN_IF_ERROR(document_log_->PersistToDisk());
   ICING_RETURN_IF_ERROR(document_key_mapper_->PersistToDisk());
   ICING_RETURN_IF_ERROR(document_id_mapper_->PersistToDisk());
   ICING_RETURN_IF_ERROR(score_cache_->PersistToDisk());
   ICING_RETURN_IF_ERROR(filter_cache_->PersistToDisk());
   ICING_RETURN_IF_ERROR(namespace_mapper_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(usage_store_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(corpus_mapper_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(corpus_score_cache_->PersistToDisk());
 
   // Update the combined checksum and write to header file.
   ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
@@ -1056,23 +1578,140 @@ libtextclassifier3::Status DocumentStore::PersistToDisk() {
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::StatusOr<int64_t> DocumentStore::GetDiskUsage() const {
-  ICING_ASSIGN_OR_RETURN(const int64_t document_log_disk_usage,
-                         document_log_->GetDiskUsage());
-  ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_disk_usage,
-                         document_key_mapper_->GetDiskUsage());
-  ICING_ASSIGN_OR_RETURN(const int64_t document_id_mapper_disk_usage,
-                         document_id_mapper_->GetDiskUsage());
-  ICING_ASSIGN_OR_RETURN(const int64_t score_cache_disk_usage,
-                         score_cache_->GetDiskUsage());
-  ICING_ASSIGN_OR_RETURN(const int64_t filter_cache_disk_usage,
-                         filter_cache_->GetDiskUsage());
-  ICING_ASSIGN_OR_RETURN(const int64_t namespace_mapper_disk_usage,
-                         namespace_mapper_->GetDiskUsage());
+int64_t GetValueOrDefault(const libtextclassifier3::StatusOr<int64_t>& value_or,
+                          int64_t default_value) {
+  return (value_or.ok()) ? value_or.ValueOrDie() : default_value;
+}
 
-  return document_log_disk_usage + document_key_mapper_disk_usage +
-         document_id_mapper_disk_usage + score_cache_disk_usage +
-         filter_cache_disk_usage + namespace_mapper_disk_usage;
+DocumentStorageInfoProto DocumentStore::GetMemberStorageInfo() const {
+  DocumentStorageInfoProto storage_info;
+  storage_info.set_document_log_size(
+      GetValueOrDefault(document_log_->GetDiskUsage(), -1));
+  storage_info.set_key_mapper_size(
+      GetValueOrDefault(document_key_mapper_->GetDiskUsage(), -1));
+  storage_info.set_document_id_mapper_size(
+      GetValueOrDefault(document_id_mapper_->GetDiskUsage(), -1));
+  storage_info.set_score_cache_size(
+      GetValueOrDefault(score_cache_->GetDiskUsage(), -1));
+  storage_info.set_filter_cache_size(
+      GetValueOrDefault(filter_cache_->GetDiskUsage(), -1));
+  storage_info.set_namespace_id_mapper_size(
+      GetValueOrDefault(namespace_mapper_->GetDiskUsage(), -1));
+  storage_info.set_corpus_mapper_size(
+      GetValueOrDefault(corpus_mapper_->GetDiskUsage(), -1));
+  storage_info.set_corpus_score_cache_size(
+      GetValueOrDefault(corpus_score_cache_->GetDiskUsage(), -1));
+  return storage_info;
+}
+
+DocumentStorageInfoProto DocumentStore::CalculateDocumentStatusCounts(
+    DocumentStorageInfoProto storage_info) const {
+  int total_num_alive = 0;
+  int total_num_expired = 0;
+  int total_num_deleted = 0;
+  std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace =
+      GetNamespaceIdsToNamespaces(namespace_mapper_.get());
+  std::unordered_map<std::string, NamespaceStorageInfoProto>
+      namespace_to_storage_info;
+
+  int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
+  for (DocumentId document_id = 0;
+       document_id < document_id_mapper_->num_elements(); ++document_id) {
+    // Check if it's deleted first.
+    if (IsDeleted(document_id)) {
+      // We don't have the namespace id of hard deleted documents anymore, so
+      // we can't add to our namespace storage info.
+      ++total_num_deleted;
+      continue;
+    }
+
+    // At this point, the document is either alive or expired, we can get
+    // namespace info for it.
+    auto filter_data_or = filter_cache_->Get(document_id);
+    if (!filter_data_or.ok()) {
+      ICING_VLOG(1) << "Error trying to get filter data for document store "
+                       "storage info counts.";
+      continue;
+    }
+    const DocumentFilterData* filter_data = filter_data_or.ValueOrDie();
+    auto itr = namespace_id_to_namespace.find(filter_data->namespace_id());
+    if (itr == namespace_id_to_namespace.end()) {
+      ICING_VLOG(1) << "Error trying to find namespace for document store "
+                       "storage info counts.";
+      continue;
+    }
+    const std::string& name_space = itr->second;
+
+    // Always set the namespace, if the NamespaceStorageInfoProto didn't exist
+    // before, we'll get back a default instance of it.
+    NamespaceStorageInfoProto& namespace_storage_info =
+        namespace_to_storage_info[name_space];
+    namespace_storage_info.set_namespace_(name_space);
+
+    // Get usage scores
+    auto usage_scores_or = usage_store_->GetUsageScores(document_id);
+    if (!usage_scores_or.ok()) {
+      ICING_VLOG(1) << "Error trying to get usage scores for document store "
+                       "storage info counts.";
+      continue;
+    }
+    UsageStore::UsageScores usage_scores = usage_scores_or.ValueOrDie();
+
+    // Update our stats
+    if (!GetNonExpiredDocumentFilterData(document_id, current_time_ms)) {
+      ++total_num_expired;
+      namespace_storage_info.set_num_expired_documents(
+          namespace_storage_info.num_expired_documents() + 1);
+      if (usage_scores.usage_type1_count > 0) {
+        namespace_storage_info.set_num_expired_documents_usage_type1(
+            namespace_storage_info.num_expired_documents_usage_type1() + 1);
+      }
+      if (usage_scores.usage_type2_count > 0) {
+        namespace_storage_info.set_num_expired_documents_usage_type2(
+            namespace_storage_info.num_expired_documents_usage_type2() + 1);
+      }
+      if (usage_scores.usage_type3_count > 0) {
+        namespace_storage_info.set_num_expired_documents_usage_type3(
+            namespace_storage_info.num_expired_documents_usage_type3() + 1);
+      }
+    } else {
+      ++total_num_alive;
+      namespace_storage_info.set_num_alive_documents(
+          namespace_storage_info.num_alive_documents() + 1);
+      if (usage_scores.usage_type1_count > 0) {
+        namespace_storage_info.set_num_alive_documents_usage_type1(
+            namespace_storage_info.num_alive_documents_usage_type1() + 1);
+      }
+      if (usage_scores.usage_type2_count > 0) {
+        namespace_storage_info.set_num_alive_documents_usage_type2(
+            namespace_storage_info.num_alive_documents_usage_type2() + 1);
+      }
+      if (usage_scores.usage_type3_count > 0) {
+        namespace_storage_info.set_num_alive_documents_usage_type3(
+            namespace_storage_info.num_alive_documents_usage_type3() + 1);
+      }
+    }
+  }
+
+  for (auto& itr : namespace_to_storage_info) {
+    storage_info.mutable_namespace_storage_info()->Add(std::move(itr.second));
+  }
+  storage_info.set_num_alive_documents(total_num_alive);
+  storage_info.set_num_deleted_documents(total_num_deleted);
+  storage_info.set_num_expired_documents(total_num_expired);
+  return storage_info;
+}
+
+DocumentStorageInfoProto DocumentStore::GetStorageInfo() const {
+  DocumentStorageInfoProto storage_info = GetMemberStorageInfo();
+  int64_t directory_size = filesystem_->GetDiskUsage(base_dir_.c_str());
+  if (directory_size != Filesystem::kBadFileSize) {
+    storage_info.set_document_store_size(directory_size);
+  } else {
+    storage_info.set_document_store_size(-1);
+  }
+  storage_info.set_num_namespaces(namespace_mapper_->num_keys());
+  return CalculateDocumentStatusCounts(std::move(storage_info));
 }
 
 libtextclassifier3::Status DocumentStore::UpdateSchemaStore(
@@ -1082,6 +1721,7 @@ libtextclassifier3::Status DocumentStore::UpdateSchemaStore(
   document_validator_.UpdateSchemaStore(schema_store);
 
   int size = document_id_mapper_->num_elements();
+  int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
   for (DocumentId document_id = 0; document_id < size; document_id++) {
     auto document_or = Get(document_id);
     if (absl_ports::IsNotFound(document_or.status())) {
@@ -1103,12 +1743,16 @@ libtextclassifier3::Status DocumentStore::UpdateSchemaStore(
       // Update the SchemaTypeId for this entry
       ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
                              schema_store_->GetSchemaTypeId(document.schema()));
-      filter_cache_->mutable_array()[document_id].set_schema_type_id(
-          schema_type_id);
+      ICING_ASSIGN_OR_RETURN(
+          typename FileBackedVector<DocumentFilterData>::MutableView
+              doc_filter_data_view,
+          filter_cache_->GetMutable(document_id));
+      doc_filter_data_view.Get().set_schema_type_id(schema_type_id);
     } else {
       // Document is no longer valid with the new SchemaStore. Mark as
       // deleted
-      auto delete_status = Delete(document.namespace_(), document.uri());
+      auto delete_status =
+          Delete(document.namespace_(), document.uri(), current_time_ms);
       if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
         // Real error, pass up
         return delete_status;
@@ -1131,50 +1775,20 @@ libtextclassifier3::Status DocumentStore::OptimizedUpdateSchemaStore(
   schema_store_ = schema_store;
   document_validator_.UpdateSchemaStore(schema_store);
 
-  // Append a tombstone for each deleted schema type. This way, we don't have
-  // to read out each document, check if the schema type has been deleted, and
-  // append a tombstone per-document.
-  for (const auto& schema_type :
-       set_schema_result.schema_types_deleted_by_name) {
-    // TODO(b/144458732): Implement a more robust version of
-    // ICING_RETURN_IF_ERROR that can support error logging.
-    libtextclassifier3::Status status =
-        document_log_->WriteProto(CreateSchemaTypeTombstone(schema_type))
-            .status();
-    if (!status.ok()) {
-      ICING_LOG(ERROR) << status.error_message()
-                       << "Failed to delete schema_type. schema_type = "
-                       << schema_type;
-      return status;
-    }
-  }
-
   int size = document_id_mapper_->num_elements();
+  int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
   for (DocumentId document_id = 0; document_id < size; document_id++) {
-    auto exists_or = DoesDocumentExistAndGetFileOffset(document_id);
-    if (absl_ports::IsNotFound(exists_or.status())) {
+    if (!GetAliveDocumentFilterData(document_id, current_time_ms)) {
       // Skip nonexistent documents
       continue;
-    } else if (!exists_or.ok()) {
-      // Real error, pass up
-      return absl_ports::Annotate(
-          exists_or.status(),
-          IcingStringUtil::StringPrintf("Failed to retrieve DocumentId %d",
-                                        document_id));
     }
 
     // Guaranteed that the document exists now.
     ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
                            filter_cache_->Get(document_id));
 
-    if (set_schema_result.schema_types_deleted_by_id.count(
-            filter_data->schema_type_id()) != 0) {
-      // We already created a tombstone for this deleted type. Just update the
-      // derived files now.
-      ICING_RETURN_IF_ERROR(
-          document_id_mapper_->Set(document_id, kDocDeletedFlag));
-      continue;
-    }
+    bool delete_document = set_schema_result.schema_types_deleted_by_id.count(
+                               filter_data->schema_type_id()) != 0;
 
     // Check if we need to update the FilterCache entry for this document. It
     // may have been assigned a different SchemaTypeId in the new SchemaStore.
@@ -1195,20 +1809,23 @@ libtextclassifier3::Status DocumentStore::OptimizedUpdateSchemaStore(
         ICING_ASSIGN_OR_RETURN(
             SchemaTypeId schema_type_id,
             schema_store_->GetSchemaTypeId(document.schema()));
-        filter_cache_->mutable_array()[document_id].set_schema_type_id(
-            schema_type_id);
+        ICING_ASSIGN_OR_RETURN(
+            typename FileBackedVector<DocumentFilterData>::MutableView
+                doc_filter_data_view,
+            filter_cache_->GetMutable(document_id));
+        doc_filter_data_view.Get().set_schema_type_id(schema_type_id);
       }
-
       if (revalidate_document) {
-        if (!document_validator_.Validate(document).ok()) {
-          // Document is no longer valid with the new SchemaStore. Mark as
-          // deleted
-          auto delete_status = Delete(document.namespace_(), document.uri());
-          if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
-            // Real error, pass up
-            return delete_status;
-          }
-        }
+        delete_document = !document_validator_.Validate(document).ok();
+      }
+    }
+
+    if (delete_document) {
+      // Document is no longer valid with the new SchemaStore. Mark as deleted
+      auto delete_status = Delete(document_id, current_time_ms);
+      if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
+        // Real error, pass up
+        return delete_status;
       }
     }
   }
@@ -1221,24 +1838,44 @@ libtextclassifier3::Status DocumentStore::Optimize() {
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status DocumentStore::OptimizeInto(
-    const std::string& new_directory) {
+libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
+DocumentStore::OptimizeInto(const std::string& new_directory,
+                            const LanguageSegmenter* lang_segmenter,
+                            OptimizeStatsProto* stats) const {
   // Validates directory
   if (new_directory == base_dir_) {
     return absl_ports::InvalidArgumentError(
         "New directory is the same as the current one.");
   }
 
-  ICING_ASSIGN_OR_RETURN(auto new_doc_store,
-                         DocumentStore::Create(filesystem_, new_directory,
-                                               &clock_, schema_store_));
+  ICING_ASSIGN_OR_RETURN(
+      auto doc_store_create_result,
+      DocumentStore::Create(filesystem_, new_directory, &clock_, schema_store_,
+                            /*force_recovery_and_revalidate_documents=*/false,
+                            namespace_id_fingerprint_, pre_mapping_fbv_,
+                            use_persistent_hash_map_, compression_level_,
+                            /*initialize_stats=*/nullptr));
+  std::unique_ptr<DocumentStore> new_doc_store =
+      std::move(doc_store_create_result.document_store);
 
   // Writes all valid docs into new document store (new directory)
-  int size = document_id_mapper_->num_elements();
-  for (DocumentId document_id = 0; document_id < size; document_id++) {
-    auto document_or = Get(document_id);
+  int document_cnt = document_id_mapper_->num_elements();
+  int num_deleted_documents = 0;
+  int num_expired_documents = 0;
+  UsageStore::UsageScores default_usage;
+
+  OptimizeResult result;
+  result.document_id_old_to_new.resize(document_cnt, kInvalidDocumentId);
+  int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
+  for (DocumentId document_id = 0; document_id < document_cnt; document_id++) {
+    auto document_or = Get(document_id, /*clear_internal_fields=*/false);
     if (absl_ports::IsNotFound(document_or.status())) {
-      // Skip nonexistent documents
+      if (IsDeleted(document_id)) {
+        ++num_deleted_documents;
+      } else if (!GetNonExpiredDocumentFilterData(document_id,
+                                                  current_time_ms)) {
+        ++num_expired_documents;
+      }
       continue;
     } else if (!document_or.ok()) {
       // Real error, pass up
@@ -1249,20 +1886,104 @@ libtextclassifier3::Status DocumentStore::OptimizeInto(
     }
 
     // Guaranteed to have a document now.
-    DocumentProto document_to_keep = document_or.ValueOrDie();
-    // TODO(b/144458732): Implement a more robust version of
-    // ICING_RETURN_IF_ERROR that can support error logging.
-    libtextclassifier3::Status status =
-        new_doc_store->Put(std::move(document_to_keep)).status();
-    if (!status.ok()) {
-      ICING_LOG(ERROR) << status.error_message()
+    DocumentProto document_to_keep = std::move(document_or).ValueOrDie();
+
+    libtextclassifier3::StatusOr<DocumentId> new_document_id_or;
+    if (document_to_keep.internal_fields().length_in_tokens() == 0) {
+      auto tokenized_document_or = TokenizedDocument::Create(
+          schema_store_, lang_segmenter, document_to_keep);
+      if (!tokenized_document_or.ok()) {
+        return absl_ports::Annotate(
+            tokenized_document_or.status(),
+            IcingStringUtil::StringPrintf(
+                "Failed to tokenize Document for DocumentId %d", document_id));
+      }
+      TokenizedDocument tokenized_document(
+          std::move(tokenized_document_or).ValueOrDie());
+      new_document_id_or = new_doc_store->Put(
+          std::move(document_to_keep), tokenized_document.num_string_tokens());
+    } else {
+      // TODO(b/144458732): Implement a more robust version of
+      // TC_ASSIGN_OR_RETURN that can support error logging.
+      new_document_id_or =
+          new_doc_store->InternalPut(std::move(document_to_keep));
+    }
+    if (!new_document_id_or.ok()) {
+      ICING_LOG(ERROR) << new_document_id_or.status().error_message()
                        << "Failed to write into new document store";
-      return status;
+      return new_document_id_or.status();
+    }
+
+    result.document_id_old_to_new[document_id] =
+        new_document_id_or.ValueOrDie();
+
+    // Copy over usage scores.
+    ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
+                           usage_store_->GetUsageScores(document_id));
+    if (!(usage_scores == default_usage)) {
+      // If the usage scores for this document are the default (no usage), then
+      // don't bother setting it. No need to possibly allocate storage if
+      // there's nothing interesting to store.
+      DocumentId new_document_id = new_document_id_or.ValueOrDie();
+      ICING_RETURN_IF_ERROR(
+          new_doc_store->SetUsageScores(new_document_id, usage_scores));
     }
   }
 
-  ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk());
-  return libtextclassifier3::Status::OK;
+  // Construct namespace_id_old_to_new
+  int namespace_cnt = namespace_mapper_->num_keys();
+  std::unordered_map<NamespaceId, std::string> old_namespaces =
+      GetNamespaceIdsToNamespaces(namespace_mapper_.get());
+  if (namespace_cnt != old_namespaces.size()) {
+    // This really shouldn't happen. If it really happens, then:
+    // - It won't block DocumentStore optimization, so don't return error here.
+    // - Instead, write a warning log here and hint the caller to rebuild index.
+    ICING_LOG(WARNING) << "Unexpected old namespace count " << namespace_cnt
+                       << " vs " << old_namespaces.size();
+    result.should_rebuild_index = true;
+  } else {
+    result.namespace_id_old_to_new.resize(namespace_cnt, kInvalidNamespaceId);
+    for (const auto& [old_namespace_id, ns] : old_namespaces) {
+      if (old_namespace_id >= result.namespace_id_old_to_new.size()) {
+        // This really shouldn't happen. If it really happens, then:
+        // - It won't block DocumentStore optimization, so don't return error
+        //   here.
+        // - Instead, write a warning log here and hint the caller to rebuild
+        //   index.
+        ICING_LOG(WARNING) << "Found unexpected namespace id "
+                           << old_namespace_id << ". Should be in range 0 to "
+                           << result.namespace_id_old_to_new.size()
+                           << " (exclusive).";
+        result.namespace_id_old_to_new.clear();
+        result.should_rebuild_index = true;
+        break;
+      }
+
+      auto new_namespace_id_or = new_doc_store->namespace_mapper_->Get(ns);
+      if (!new_namespace_id_or.ok()) {
+        if (absl_ports::IsNotFound(new_namespace_id_or.status())) {
+          continue;
+        }
+        // Real error, return it.
+        return std::move(new_namespace_id_or).status();
+      }
+
+      NamespaceId new_namespace_id = new_namespace_id_or.ValueOrDie();
+      // Safe to use bracket to assign given that we've checked the range above.
+      result.namespace_id_old_to_new[old_namespace_id] = new_namespace_id;
+    }
+  }
+
+  if (stats != nullptr) {
+    stats->set_num_original_documents(document_cnt);
+    stats->set_num_deleted_documents(num_deleted_documents);
+    stats->set_num_expired_documents(num_expired_documents);
+    stats->set_num_original_namespaces(namespace_cnt);
+    stats->set_num_deleted_namespaces(
+        namespace_cnt - new_doc_store->namespace_mapper_->num_keys());
+  }
+  ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk(PersistType::FULL));
+  return result;
 }
 
 libtextclassifier3::StatusOr<DocumentStore::OptimizeInfo>
@@ -1271,9 +1992,10 @@ DocumentStore::GetOptimizeInfo() const {
 
   // Figure out our ratio of optimizable/total docs.
   int32_t num_documents = document_id_mapper_->num_elements();
+  int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
   for (DocumentId document_id = kMinDocumentId; document_id < num_documents;
        ++document_id) {
-    if (!DoesDocumentExist(document_id)) {
+    if (!GetAliveDocumentFilterData(document_id, current_time_ms)) {
       ++optimize_info.optimizable_docs;
     }
 
@@ -1298,26 +2020,39 @@ DocumentStore::GetOptimizeInfo() const {
                          score_cache_->GetElementsFileSize());
   ICING_ASSIGN_OR_RETURN(const int64_t filter_cache_file_size,
                          filter_cache_->GetElementsFileSize());
+  ICING_ASSIGN_OR_RETURN(const int64_t corpus_score_cache_file_size,
+                         corpus_score_cache_->GetElementsFileSize());
+
+  // Usage store might be sparse, but we'll still use file size for more
+  // accurate counting.
+  ICING_ASSIGN_OR_RETURN(const int64_t usage_store_file_size,
+                         usage_store_->GetElementsFileSize());
 
-  // We use a combined disk usage and file size for the KeyMapper because it's
-  // backed by a trie, which has some sparse property bitmaps.
+  // We use a combined disk usage and file size for the DynamicTrieKeyMapper
+  // because it's backed by a trie, which has some sparse property bitmaps.
   ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_size,
                          document_key_mapper_->GetElementsSize());
 
-  // We don't include the namespace mapper because it's not clear if we could
-  // recover any space even if Optimize were called. Deleting 100s of documents
-  // could still leave a few documents of a namespace, and then there would be
-  // no change.
+  // We don't include the namespace_mapper or the corpus_mapper because it's
+  // not clear if we could recover any space even if Optimize were called.
+  // Deleting 100s of documents could still leave a few documents of a
+  // namespace, and then there would be no change.
 
   int64_t total_size = document_log_file_size + document_key_mapper_size +
                        document_id_mapper_file_size + score_cache_file_size +
-                       filter_cache_file_size;
+                       filter_cache_file_size + corpus_score_cache_file_size +
+                       usage_store_file_size;
 
   optimize_info.estimated_optimizable_bytes =
       total_size * optimize_info.optimizable_docs / optimize_info.total_docs;
   return optimize_info;
 }
 
+libtextclassifier3::Status DocumentStore::UpdateCorpusAssociatedScoreCache(
+    CorpusId corpus_id, const CorpusAssociatedScoreData& score_data) {
+  return corpus_score_cache_->Set(corpus_id, score_data);
+}
+
 libtextclassifier3::Status DocumentStore::UpdateDocumentAssociatedScoreCache(
     DocumentId document_id, const DocumentAssociatedScoreData& score_data) {
   return score_cache_->Set(document_id, score_data);
@@ -1328,5 +2063,92 @@ libtextclassifier3::Status DocumentStore::UpdateFilterCache(
   return filter_cache_->Set(document_id, filter_data);
 }
 
+libtextclassifier3::Status DocumentStore::ClearDerivedData(
+    DocumentId document_id) {
+  // We intentionally leave the data in key_mapper_ because locating that data
+  // requires fetching namespace and uri. Leaving data in key_mapper_ should
+  // be fine because the data is hashed.
+
+  ICING_RETURN_IF_ERROR(document_id_mapper_->Set(document_id, kDocDeletedFlag));
+
+  // Resets the score cache entry
+  ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
+      document_id, DocumentAssociatedScoreData(kInvalidCorpusId,
+                                               /*document_score=*/-1,
+                                               /*creation_timestamp_ms=*/-1,
+                                               /*length_in_tokens=*/0)));
+
+  // Resets the filter cache entry
+  ICING_RETURN_IF_ERROR(UpdateFilterCache(
+      document_id, DocumentFilterData(kInvalidNamespaceId, kInvalidSchemaTypeId,
+                                      /*expiration_timestamp_ms=*/-1)));
+
+  // Clears the usage scores.
+  return usage_store_->DeleteUsageScores(document_id);
+}
+
+libtextclassifier3::Status DocumentStore::SetUsageScores(
+    DocumentId document_id, const UsageStore::UsageScores& usage_scores) {
+  return usage_store_->SetUsageScores(document_id, usage_scores);
+}
+
+libtextclassifier3::StatusOr<
+    google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>>
+DocumentStore::CollectCorpusInfo() const {
+  google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo> corpus_info;
+  libtextclassifier3::StatusOr<const SchemaProto*> schema_proto_or =
+      schema_store_->GetSchema();
+  if (!schema_proto_or.ok()) {
+    return corpus_info;
+  }
+  // Maps from CorpusId to the corresponding protocol buffer in the result.
+  std::unordered_map<CorpusId, DocumentDebugInfoProto::CorpusInfo*> info_map;
+  std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace =
+      GetNamespaceIdsToNamespaces(namespace_mapper_.get());
+  const SchemaProto* schema_proto = schema_proto_or.ValueOrDie();
+  int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
+  for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
+       ++document_id) {
+    if (!GetAliveDocumentFilterData(document_id, current_time_ms)) {
+      continue;
+    }
+    ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
+                           filter_cache_->Get(document_id));
+    ICING_ASSIGN_OR_RETURN(const DocumentAssociatedScoreData* score_data,
+                           score_cache_->Get(document_id));
+    const std::string& name_space =
+        namespace_id_to_namespace[filter_data->namespace_id()];
+    const std::string& schema =
+        schema_proto->types()[filter_data->schema_type_id()].schema_type();
+    auto iter = info_map.find(score_data->corpus_id());
+    if (iter == info_map.end()) {
+      DocumentDebugInfoProto::CorpusInfo* entry = corpus_info.Add();
+      entry->set_namespace_(name_space);
+      entry->set_schema(schema);
+      iter = info_map.insert({score_data->corpus_id(), entry}).first;
+    }
+    iter->second->set_total_documents(iter->second->total_documents() + 1);
+    iter->second->set_total_token(iter->second->total_token() +
+                                  score_data->length_in_tokens());
+  }
+  return corpus_info;
+}
+
+libtextclassifier3::StatusOr<DocumentDebugInfoProto>
+DocumentStore::GetDebugInfo(int verbosity) const {
+  DocumentDebugInfoProto debug_info;
+  *debug_info.mutable_document_storage_info() = GetStorageInfo();
+  ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum());
+  debug_info.set_crc(crc.Get());
+  if (verbosity > 0) {
+    ICING_ASSIGN_OR_RETURN(
+        google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>
+            corpus_info,
+        CollectCorpusInfo());
+    *debug_info.mutable_corpus_info() = std::move(corpus_info);
+  }
+  return debug_info;
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 3f4b72f..c228e8b 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -26,17 +26,32 @@
 #include "icing/file/file-backed-proto-log.h"
 #include "icing/file/file-backed-vector.h"
 #include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/usage.pb.h"
 #include "icing/schema/schema-store.h"
+#include "icing/store/corpus-associated-scoring-data.h"
+#include "icing/store/corpus-id.h"
 #include "icing/store/document-associated-score-data.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
 #include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
 #include "icing/store/namespace-id.h"
+#include "icing/store/usage-store.h"
+#include "icing/tokenization/language-segmenter.h"
 #include "icing/util/clock.h"
 #include "icing/util/crc32.h"
+#include "icing/util/data-loss.h"
 #include "icing/util/document-validator.h"
+#include "icing/util/fingerprint-util.h"
 
 namespace icing {
 namespace lib {
@@ -45,13 +60,19 @@ namespace lib {
 class DocumentStore {
  public:
   struct Header {
-    static constexpr int32_t kMagic = 0x746f7265;
+    static int32_t GetCurrentMagic(bool namespace_id_fingerprint) {
+      return namespace_id_fingerprint ? kNewMagic : kOldMagic;
+    }
 
     // Holds the magic as a quick sanity check against file corruption.
     int32_t magic;
 
     // Checksum of the DocumentStore's sub-component's checksums.
     uint32_t checksum;
+
+   private:
+    static constexpr int32_t kOldMagic = 0x746f7265;
+    static constexpr int32_t kNewMagic = 0x1b99c8b0;
   };
 
   struct OptimizeInfo {
@@ -68,6 +89,31 @@ class DocumentStore {
     int32_t optimizable_docs = 0;
   };
 
+  struct DeleteByGroupResult {
+    // Status representing whether or not the operation succeeded. See the
+    // comments above the function that returns this result to determine what
+    // possible statuses could be returned.
+    libtextclassifier3::Status status;
+
+    int num_docs_deleted = 0;
+  };
+
+  struct CreateResult {
+    // A successfully initialized document store.
+    std::unique_ptr<DocumentStore> document_store;
+
+    // The data status after initializing from a previous state. Data loss can
+    // happen if the file is corrupted or some previously added data was
+    // unpersisted. This may be used to signal that any derived data off of the
+    // document store may need to be regenerated.
+    DataLoss data_loss;
+
+    // A boolean flag indicating if derived files of the document store have
+    // been regenerated or not. This is usually a signal for callers to detect
+    // if any id assignment has changed (e.g. NamespaceId).
+    bool derived_files_regenerated;
+  };
+
   // Not copyable
   DocumentStore(const DocumentStore&) = delete;
   DocumentStore& operator=(const DocumentStore&) = delete;
@@ -80,53 +126,89 @@ class DocumentStore {
   // previously initialized with this directory, it will reload the files saved
   // by the last instance.
   //
-  // Does not take any ownership, and all pointers must refer to valid objects
-  // that outlive the one constructed.
+  // force_recovery_and_revalidate_documents=true will pre-emptively throw out
+  // the derived files and validate each document while recreating them. This
+  // can be used to indicate that the schema (and type ids) may have changed and
+  // those changes might not have been applied to the document store.
+  //
+  // If initialize_stats is present, the fields related to DocumentStore will be
+  // populated.
+  //
+  // Does not take any ownership, and all pointers except initialize_stats must
+  // refer to valid objects that outlive the one constructed.
   //
   // TODO(cassiewang): Consider returning a status indicating that derived files
   // were regenerated. This may be helpful in logs.
   //
   // Returns:
-  //   A DocumentStore on success
+  //   A DocumentStore::CreateResult on success
   //   FAILED_PRECONDITION on any null pointer input
   //   INTERNAL_ERROR on IO error
-  static libtextclassifier3::StatusOr<std::unique_ptr<DocumentStore>> Create(
+  static libtextclassifier3::StatusOr<DocumentStore::CreateResult> Create(
       const Filesystem* filesystem, const std::string& base_dir,
-      const Clock* clock, const SchemaStore* schema_store);
+      const Clock* clock, const SchemaStore* schema_store,
+      bool force_recovery_and_revalidate_documents,
+      bool namespace_id_fingerprint, bool pre_mapping_fbv,
+      bool use_persistent_hash_map, int32_t compression_level,
+      InitializeStatsProto* initialize_stats);
+
+  // Discards all derived data in the document store.
+  //
+  // Returns:
+  //   OK on success or nothing to discard
+  //   INTERNAL_ERROR on any I/O errors
+  static libtextclassifier3::Status DiscardDerivedFiles(
+      const Filesystem* filesystem, const std::string& base_dir);
 
   // Returns the maximum DocumentId that the DocumentStore has assigned. If
   // there has not been any DocumentIds assigned, i.e. the DocumentStore is
   // empty, then kInvalidDocumentId is returned. This does not filter out
-  // DocumentIds of deleted documents.
-  const DocumentId last_added_document_id() const {
+  // DocumentIds of deleted or expired documents.
+  DocumentId last_added_document_id() const {
     if (document_id_mapper_->num_elements() == 0) {
       return kInvalidDocumentId;
     }
     return document_id_mapper_->num_elements() - 1;
   }
 
+  // Returns the number of documents. The result does not filter out DocumentIds
+  // of deleted or expired documents.
+  int num_documents() const { return document_id_mapper_->num_elements(); }
+
   // Puts the document into document store.
   //
+  // If put_document_stats is present, the fields related to DocumentStore will
+  // be populated.
+  //
   // Returns:
   //   A newly generated document id on success
+  //   RESOURCE_EXHAUSED if exceeds maximum number of allowed documents
   //   FAILED_PRECONDITION if schema hasn't been set yet
   //   NOT_FOUND if the schema_type or a property config of the document doesn't
   //     exist in schema
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<DocumentId> Put(const DocumentProto& document);
-  libtextclassifier3::StatusOr<DocumentId> Put(DocumentProto&& document);
+  libtextclassifier3::StatusOr<DocumentId> Put(
+      const DocumentProto& document, int32_t num_tokens = 0,
+      PutDocumentStatsProto* put_document_stats = nullptr);
+  libtextclassifier3::StatusOr<DocumentId> Put(
+      DocumentProto&& document, int32_t num_tokens = 0,
+      PutDocumentStatsProto* put_document_stats = nullptr);
 
   // Finds and returns the document identified by the given key (namespace +
-  // uri)
+  // uri). If 'clear_internal_fields' is true, document level data that's
+  // generated internally by DocumentStore is cleared.
   //
   // Returns:
   //   The document found on success
   //   NOT_FOUND if the key doesn't exist or document has been deleted
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<DocumentProto> Get(std::string_view name_space,
-                                                  std::string_view uri) const;
+  libtextclassifier3::StatusOr<DocumentProto> Get(
+      std::string_view name_space, std::string_view uri,
+      bool clear_internal_fields = true) const;
 
-  // Finds and returns the document identified by the given document id
+  // Finds and returns the document identified by the given document id. If
+  // 'clear_internal_fields' is true, document level data that's generated
+  // internally by DocumentStore is cleared.
   //
   // Returns:
   //   The document found on success
@@ -134,30 +216,42 @@ class DocumentStore {
   //                    maximum value
   //   NOT_FOUND if the document doesn't exist or has been deleted
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<DocumentProto> Get(DocumentId document_id) const;
+  libtextclassifier3::StatusOr<DocumentProto> Get(
+      DocumentId document_id, bool clear_internal_fields = true) const;
 
   // Returns all namespaces which have at least 1 active document (not deleted
   // or expired). Order of namespaces is undefined.
   std::vector<std::string> GetAllNamespaces() const;
 
-  // Check if a document exists. Existence means it hasn't been deleted and it
-  // hasn't expired yet.
+  // Deletes the document identified by the given namespace and uri. The
+  // document proto will be erased immediately.
+  //
+  // NOTE:
+  //    Space is not reclaimed for deleted documents until Optimize() is
+  //    called.
   //
   // Returns:
-  //   boolean whether a document exists or not
-  bool DoesDocumentExist(DocumentId document_id) const;
+  //   OK on success
+  //   NOT_FOUND if no document exists with namespace, uri
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status Delete(std::string_view name_space,
+                                    std::string_view uri,
+                                    int64_t current_time_ms);
 
-  // Deletes the document identified by the given namespace and uri
+  // Deletes the document identified by the given document_id. The document
+  // proto will be erased immediately.
   //
-  // NOTE: Space is not reclaimed for deleted documents until Optimize() is
-  // called.
+  // NOTE:
+  //    Space is not reclaimed for deleted documents until Optimize() is
+  //    called.
   //
   // Returns:
   //   OK on success
-  //   NOT_FOUND if no document exists with namespace, uri
+  //   NOT_FOUND if the document doesn't exist (i.e. deleted or expired)
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status Delete(std::string_view name_space,
-                                    std::string_view uri);
+  //   INVALID_ARGUMENT if document_id is invalid.
+  libtextclassifier3::Status Delete(DocumentId document_id,
+                                    int64_t current_time_ms);
 
   // Returns the NamespaceId of the string namespace
   //
@@ -168,74 +262,163 @@ class DocumentStore {
   libtextclassifier3::StatusOr<NamespaceId> GetNamespaceId(
       std::string_view name_space) const;
 
+  // Helper method to find a DocumentId that is associated with the given
+  // namespace and uri.
+  //
+  // NOTE: The DocumentId may refer to a invalid document (deleted
+  // or expired). Callers can call DoesDocumentExist(document_id) to ensure it
+  // refers to a valid Document.
+  //
+  // Returns:
+  //   A DocumentId on success
+  //   NOT_FOUND if the key doesn't exist
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<DocumentId> GetDocumentId(
+      std::string_view name_space, std::string_view uri) const;
+
+  // Helper method to find a DocumentId that is associated with the given
+  // NamespaceFingerprintIdentifier.
+  //
+  // NOTE: The DocumentId may refer to a invalid document (deleted
+  // or expired). Callers can call DoesDocumentExist(document_id) to ensure it
+  // refers to a valid Document.
+  //
+  // Returns:
+  //   A DocumentId on success
+  //   NOT_FOUND if the key doesn't exist
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<DocumentId> GetDocumentId(
+      const NamespaceFingerprintIdentifier& namespace_fingerprint_identifier)
+      const;
+
+  // Returns the CorpusId associated with the given namespace and schema.
+  //
+  // Returns:
+  //   A CorpusId on success
+  //   NOT_FOUND if the key doesn't exist
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<CorpusId> GetCorpusId(
+      const std::string_view name_space, const std::string_view schema) const;
+
+  // Returns the ResultGroupingEntryId associated with the given namespace
+  // and schema.
+  //
+  // NOTE: ResultGroupingEntryIds that are generated by calls with different
+  // ResultGroupingTypes should not be compared. Returned ResultGroupingEntryIds
+  // are only guarenteed to be unique within their own ResultGroupingType.
+  //
+  // Returns:
+  //   A ResultGroupingEntryId on success
+  //   NOT_FOUND if the key doesn't exist
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int32_t> GetResultGroupingEntryId(
+      ResultSpecProto::ResultGroupingType result_group_type,
+      const std::string_view name_space, const std::string_view schema) const;
+
+  // Returns the ResultGrouping Entry Id associated with the given NamespaceId
+  // and SchemaTypeId
+  //
+  // NOTE: ResultGroupingEntryIds that are generated by calls with different
+  // ResultGroupingTypes should not be compared. Returned ResultGroupingEntryIds
+  // are only guarenteed to be unique within their own ResultGroupingType.
+  //
+  // Returns:
+  //   A ResultGroupingEntryId on success
+  //   NOT_FOUND if the key doesn't exist
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int32_t> GetResultGroupingEntryId(
+      ResultSpecProto::ResultGroupingType result_group_type,
+      const NamespaceId namespace_id, const SchemaTypeId schema_type_id) const;
+
   // Returns the DocumentAssociatedScoreData of the document specified by the
   // DocumentId.
   //
-  // NOTE: This does not check if the document exists and will return the
-  // DocumentFilterData of the document even if it has been deleted. Users
-  // should check DoesDocumentExist(document_id) if they only want existing
-  // documents' DocumentFilterData.
-  //
   // Returns:
   //   DocumentAssociatedScoreData on success
-  //   OUT_OF_RANGE if document_id is negative or exceeds previously seen
-  //                DocumentIds
+  //   NOT_FOUND if the document or the score data is not found
   libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
   GetDocumentAssociatedScoreData(DocumentId document_id) const;
 
-  // Returns the DocumentFilterData of the document specified by the DocumentId.
+  // Returns the CorpusAssociatedScoreData of the corpus specified by the
+  // corpus_id.
   //
-  // NOTE: This does not check if the document exists and will return the
-  // DocumentFilterData of the document even if it has been deleted. Users
-  // should check DoesDocumentExist(document_id) if they only want existing
-  // documents' DocumentFilterData.
+  // NOTE: This does not check if the corpus exists and will return the
+  // CorpusAssociatedScoreData of the corpus even if all documents belonging to
+  // that corpus have been deleted.
   //
   // Returns:
-  //   DocumentFilterData on success
-  //   OUT_OF_RANGE if document_id is negative or exceeds previously seen
-  //                DocumentIds
-  libtextclassifier3::StatusOr<DocumentFilterData> GetDocumentFilterData(
-      DocumentId document_id) const;
+  //   CorpusAssociatedScoreData on success
+  //   OUT_OF_RANGE if corpus_id is negative or exceeds previously seen
+  //                CorpusIds
+  libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
+  GetCorpusAssociatedScoreData(CorpusId corpus_id) const;
 
-  // Deletes all documents belonging to the given namespace.
+  // Gets the document filter data if a document exists. Otherwise, will get a
+  // false optional.
   //
-  // NOTE: Space is not reclaimed for deleted documents until Optimize() is
-  // called.
+  // Existence means it hasn't been deleted and it hasn't expired yet.
+  //
+  // Returns:
+  //   True:DocumentFilterData  if the given document exists.
+  //   False                    if the given document doesn't exist.
+  std::optional<DocumentFilterData> GetAliveDocumentFilterData(
+      DocumentId document_id, int64_t current_time_ms) const;
+
+  // Gets the usage scores of a document.
+  //
+  // Returns:
+  //   UsageScores on success
+  //   nullopt if there are no usage scores stored for the requested docid.
+  std::optional<UsageStore::UsageScores> GetUsageScores(
+      DocumentId document_id, int64_t current_time_ms) const;
+
+  // Reports usage. The corresponding usage scores of the specified document in
+  // the report will be updated.
+  //
+  // Returns:
+  //   OK on success
+  //   NOT_FOUND if the [namesapce + uri] key in the report doesn't exist
+  //   INTERNAL_ERROR on I/O errors.
+  libtextclassifier3::Status ReportUsage(const UsageReport& usage_report);
+
+  // Deletes all documents belonging to the given namespace. The documents will
+  // be erased immediately.
+  //
+  // NOTE:
+  //    Space is not reclaimed for deleted documents until Optimize() is
+  //    called.
   //
   // Returns:
   //   OK on success
   //   NOT_FOUND if namespace doesn't exist
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status DeleteByNamespace(std::string_view name_space);
+  DeleteByGroupResult DeleteByNamespace(std::string_view name_space);
 
-  // Deletes all documents belonging to the given schema type
+  // Deletes all documents belonging to the given schema type. The documents
+  // will be erased immediately.
   //
-  // NOTE: Space is not reclaimed for deleted documents until Optimize() is
-  // called.
+  // NOTE:
+  //    Space is not reclaimed for deleted documents until Optimize() is
+  //    called.
   //
   // Returns:
   //   OK on success
   //   NOT_FOUND if schema_type doesn't exist
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status DeleteBySchemaType(std::string_view schema_type);
+  DeleteByGroupResult DeleteBySchemaType(std::string_view schema_type);
 
   // Syncs all the data and metadata changes to disk.
   //
   // Returns:
   //   OK on success
   //   INTERNAL on I/O error
-  libtextclassifier3::Status PersistToDisk();
+  libtextclassifier3::Status PersistToDisk(PersistType::Code persist_type);
 
-  // Calculates and returns the disk usage in bytes. Rounds up to the nearest
-  // block size.
+  // Calculates the StorageInfo for the Document Store.
   //
-  // Returns:
-  //   Disk usage on success
-  //   INTERNAL_ERROR on IO error
-  //
-  // TODO(samzheng): consider returning a struct which has the breakdown of each
-  // component.
-  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+  // If an IO error occurs while trying to calculate the value for a field, then
+  // that field will be set to -1.
+  DocumentStorageInfoProto GetStorageInfo() const;
 
   // Update any derived data off of the SchemaStore with the new SchemaStore.
   // This may include pointers, SchemaTypeIds, etc.
@@ -277,20 +460,39 @@ class DocumentStore {
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::Status Optimize();
 
+  struct OptimizeResult {
+    // A vector that maps old document id to new document id.
+    std::vector<DocumentId> document_id_old_to_new;
+
+    // A vector that maps old namespace id to new namespace id. Will be empty if
+    // should_rebuild_index is set to true.
+    std::vector<NamespaceId> namespace_id_old_to_new;
+
+    // A boolean flag that hints the caller (usually IcingSearchEngine) if it
+    // should rebuild index instead of adopting the id changes via the 2 vectors
+    // above. It will be set to true if finding any id inconsistency.
+    bool should_rebuild_index = false;
+  };
   // Copy data from current base directory into a new directory. Any outdated or
-  // deleted data won't be copied. During the process, document ids will be
-  // reassigned so any files / classes that are based on old document ids may be
-  // outdated.
+  // deleted data won't be copied. During the process, document/namespace ids
+  // will be reassigned so any files / classes that are based on old
+  // document/namespace ids may be outdated.
+  //
+  // stats will be set if non-null.
   //
   // NOTE: The tasks in this method are too expensive to be executed in
   // real-time. The caller should decide how frequently and when to call this
   // method based on device usage.
   //
   // Returns:
-  //   OK on success
+  //   OptimizeResult which contains a vector mapping from old document id to
+  //   new document id and another vector mapping from old namespace id to new
+  //   namespace id, on success
   //   INVALID_ARGUMENT if new_directory is same as current base directory
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status OptimizeInto(const std::string& new_directory);
+  libtextclassifier3::StatusOr<OptimizeResult> OptimizeInto(
+      const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
+      OptimizeStatsProto* stats = nullptr) const;
 
   // Calculates status for a potential Optimize call. Includes how many docs
   // there are vs how many would be optimized away. And also includes an
@@ -309,10 +511,25 @@ class DocumentStore {
   //   INTERNAL_ERROR on compute error
   libtextclassifier3::StatusOr<Crc32> ComputeChecksum() const;
 
+  // Get debug information for the document store.
+  // verbosity <= 0, simplest debug information
+  // verbosity > 0, also return the total number of documents and tokens in each
+  // (namespace, schema type) pair.
+  //
+  // Returns:
+  //   DocumentDebugInfoProto on success
+  //   INTERNAL_ERROR on IO errors, crc compute error
+  libtextclassifier3::StatusOr<DocumentDebugInfoProto> GetDebugInfo(
+      int verbosity) const;
+
  private:
   // Use DocumentStore::Create() to instantiate.
-  DocumentStore(const Filesystem* filesystem, std::string_view base_dir,
-                const Clock* clock, const SchemaStore* schema_store);
+  explicit DocumentStore(const Filesystem* filesystem,
+                         std::string_view base_dir, const Clock* clock,
+                         const SchemaStore* schema_store,
+                         bool namespace_id_fingerprint, bool pre_mapping_fbv,
+                         bool use_persistent_hash_map,
+                         int32_t compression_level);
 
   const Filesystem* const filesystem_;
   const std::string base_dir_;
@@ -325,20 +542,39 @@ class DocumentStore {
   // Used to validate incoming documents
   DocumentValidator document_validator_;
 
+  // Whether to use namespace id or namespace name to build up fingerprint for
+  // document_key_mapper_ and corpus_mapper_.
+  bool namespace_id_fingerprint_;
+
+  // Flag indicating whether memory map max possible file size for underlying
+  // FileBackedVector before growing the actual file size.
+  bool pre_mapping_fbv_;
+
+  // Flag indicating whether use persistent hash map as the key mapper (if
+  // false, then fall back to dynamic trie key mapper). Note: we only use
+  // persistent hash map for uri mapper if it is true.
+  bool use_persistent_hash_map_;
+
+  const int32_t compression_level_;
+
   // A log used to store all documents, it serves as a ground truth of doc
   // store. key_mapper_ and document_id_mapper_ can be regenerated from it.
-  std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log_;
+  std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_;
 
   // Key (namespace + uri) to DocumentId mapping
-  std::unique_ptr<KeyMapper<DocumentId>> document_key_mapper_;
+  std::unique_ptr<
+      KeyMapper<DocumentId, fingerprint_util::FingerprintStringFormatter>>
+      document_key_mapper_;
 
   // DocumentId to file offset mapping
   std::unique_ptr<FileBackedVector<int64_t>> document_id_mapper_;
 
   // A cache of document associated scores. The ground truth of the scores is
   // DocumentProto stored in document_log_. This cache contains:
+  //   - CorpusId
   //   - Document score
   //   - Document creation timestamp in seconds
+  //   - Document length in number of tokens
   std::unique_ptr<FileBackedVector<DocumentAssociatedScoreData>> score_cache_;
 
   // A cache of data, indexed by DocumentId, used to filter documents. Currently
@@ -348,11 +584,31 @@ class DocumentStore {
   //   - Expiration timestamp in seconds
   std::unique_ptr<FileBackedVector<DocumentFilterData>> filter_cache_;
 
+  // A cache of corpus associated scores. The ground truth of the scores is
+  // DocumentProto stored in document_log_. This cache contains:
+  //   - Number of documents belonging to the corpus score
+  //   - The sum of the documents' lengths, in number of tokens.
+  std::unique_ptr<FileBackedVector<CorpusAssociatedScoreData>>
+      corpus_score_cache_;
+
   // Maps namespaces to a densely-assigned unique id. Namespaces are assigned an
   // id when the first document belonging to that namespace is added to the
   // DocumentStore. Namespaces may be removed from the mapper during compaction.
   std::unique_ptr<KeyMapper<NamespaceId>> namespace_mapper_;
 
+  // Maps a corpus, i.e. a (namespace, schema type) pair, to a densely-assigned
+  // unique id. A coprus is assigned an
+  // id when the first document belonging to that corpus is added to the
+  // DocumentStore. Corpus ids may be removed from the mapper during compaction.
+  std::unique_ptr<
+      KeyMapper<CorpusId, fingerprint_util::FingerprintStringFormatter>>
+      corpus_mapper_;
+
+  // A storage class that caches all usage scores. Usage scores are not
+  // considered as ground truth. Usage scores are associated with document ids
+  // so they need to be updated when document ids change.
+  std::unique_ptr<UsageStore> usage_store_;
+
   // Used internally to indicate whether the class has been initialized. This is
   // to guard against cases where the object has been created, but Initialize
   // fails in the constructor. If we have successfully exited the constructor,
@@ -360,16 +616,31 @@ class DocumentStore {
   // worry about this field.
   bool initialized_ = false;
 
-  libtextclassifier3::Status Initialize();
+  struct InitializeResult {
+    DataLoss data_loss;
+
+    // A boolean flag indicating if derived files of the document store have
+    // been regenerated or not. This is usually a signal for callers to detect
+    // if any id assignment has changed (e.g. NamespaceId).
+    bool derived_files_regenerated;
+  };
+  libtextclassifier3::StatusOr<InitializeResult> Initialize(
+      bool force_recovery_and_revalidate_documents,
+      InitializeStatsProto* initialize_stats);
 
   // Creates sub-components and verifies the integrity of each sub-component.
+  // This assumes that the the underlying files already exist, and will return
+  // an error if it doesn't find what it's expecting.
   //
   // Returns an error if subcomponents failed to initialize successfully.
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status InitializeDerivedFiles();
+  libtextclassifier3::Status InitializeExistingDerivedFiles();
 
   // Re-generates all files derived from the ground truth: the document log.
   //
+  // revalidate_documents=true will also cause each document to be revalidated
+  // the schema as it is read out of the document log.
+  //
   // NOTE: if this function fails, the only thing we can do is to retry it until
   // it succeeds or prevent the initialization of a DocumentStore. The
   // DocumentStore object wouldn't work reliably if this fails.
@@ -380,7 +651,7 @@ class DocumentStore {
   //   document_id
   //      mapper.
   //   3. Create header and store the updated combined checksum
-  libtextclassifier3::Status RegenerateDerivedFiles();
+  libtextclassifier3::Status RegenerateDerivedFiles(bool revalidate_documents);
 
   // Resets the unique_ptr to the document_key_mapper, deletes the underlying
   // file, and re-creates a new instance of the document_key_mapper .
@@ -400,6 +671,12 @@ class DocumentStore {
   // Returns OK or any IO errors.
   libtextclassifier3::Status ResetDocumentAssociatedScoreCache();
 
+  // Resets the unique_ptr to the corpus_score_cache, deletes the underlying
+  // file, and re-creates a new instance of the corpus_score_cache.
+  //
+  // Returns OK or any IO errors.
+  libtextclassifier3::Status ResetCorpusAssociatedScoreCache();
+
   // Resets the unique_ptr to the filter_cache, deletes the underlying file, and
   // re-creates a new instance of the filter_cache.
   //
@@ -412,82 +689,137 @@ class DocumentStore {
   // Returns OK or any IO errors.
   libtextclassifier3::Status ResetNamespaceMapper();
 
+  // Resets the unique_ptr to the corpus_mapper, deletes the underlying file,
+  // and re-creates a new instance of the corpus_mapper.
+  //
+  // Returns OK or any IO errors.
+  libtextclassifier3::Status ResetCorpusMapper();
+
   // Checks if the header exists already. This does not create the header file
   // if it doesn't exist.
   bool HeaderExists();
 
-  // Update and replace the header file. Creates the header file if it doesn't
-  // exist.
+  // Update, replace and persist the header file. Creates the header file if it
+  // doesn't exist.
   //
   // Returns:
   //   OK on success
   //   INTERNAL on I/O error
   libtextclassifier3::Status UpdateHeader(const Crc32& checksum);
 
-  // Update derived files that `name_space` has been deleted. This is primarily
-  // useful if we're trying to update derived files when we've already seen a
-  // namespace tombstone, and don't need to write another tombstone.
-  //
-  // NOTE: Space is not reclaimed in the derived files until Optimize() is
-  // called.
-  //
-  // Returns:
-  //   bool on whether an existing document was actually updated to be deleted
-  //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<bool> UpdateDerivedFilesNamespaceDeleted(
-      std::string_view name_space);
+  libtextclassifier3::StatusOr<DocumentId> InternalPut(
+      DocumentProto&& document,
+      PutDocumentStatsProto* put_document_stats = nullptr);
 
-  // Update derived files that the schema type schema_type_id has been deleted.
-  // This is primarily useful if we're trying to update derived files when we've
-  // already seen a schema type tombstone, and don't need to write another
-  // tombstone.
+  // Helper function to do batch deletes. Documents with the given
+  // "namespace_id" and "schema_type_id" will be deleted. If callers don't need
+  // to specify the namespace or schema type, pass in kInvalidNamespaceId or
+  // kInvalidSchemaTypeId. The document protos with their derived data will be
+  // erased / cleared immediately.
   //
   // NOTE: Space is not reclaimed in the derived files until Optimize() is
   // called.
   //
   // Returns:
-  //   OK on success
+  //   Number of documents that were actually updated to be deleted
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status UpdateDerivedFilesSchemaTypeDeleted(
-      SchemaTypeId schema_type_id);
+  libtextclassifier3::StatusOr<int> BatchDelete(NamespaceId namespace_id,
+                                                SchemaTypeId schema_type_id);
 
-  // Helper method to find a DocumentId that is associated with the given
-  // namespace and uri.
+  // Returns the CorpusAssociatedScoreData of the corpus specified by the
+  // corpus_id.
   //
-  // NOTE: The DocumentId may refer to a invalid document (deleted
-  // or expired). Callers can call DoesDocumentExist(document_id) to ensure it
-  // refers to a valid Document.
+  // If the corpus_id has never been seen before, it returns a
+  // CorpusAssociatedScoreData with properties set to default values.
+  //
+  // NOTE: This does not check if the corpus exists and will return the
+  // CorpusAssociatedScoreData of the corpus even if all documents belonging to
+  // that corpus have been deleted.
   //
   // Returns:
-  //   A DocumentId on success
-  //   NOT_FOUND if the key doesn't exist
-  //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<DocumentId> GetDocumentId(
-      std::string_view name_space, std::string_view uri) const;
+  //   CorpusAssociatedScoreData on success
+  libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
+  GetCorpusAssociatedScoreDataToUpdate(CorpusId corpus_id) const;
 
-  // Helper method to validate the document id and return the file offset of the
-  // associated document in document_log_.
-  //
-  // This can be a more informative call than just DoesDocumentExist because it
-  // can return more status errors on whether the Document actually doesn't
-  // exist or if there was an internal error while accessing files.
+  // Check if a document exists. Existence means it hasn't been deleted and it
+  // hasn't expired yet.
   //
   // Returns:
-  //   The file offset on success
+  //   OK if the document exists
   //   INVALID_ARGUMENT if document_id is less than 0 or greater than the
   //                    maximum value
   //   NOT_FOUND if the document doesn't exist (i.e. deleted or expired)
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<int64_t> DoesDocumentExistAndGetFileOffset(
+  libtextclassifier3::Status DoesDocumentExistWithStatus(
       DocumentId document_id) const;
 
+  // Checks if a document has been deleted
+  //
+  // This is for internal-use only because we assume that the document_id is
+  // already valid. If you're unsure if the document_id is valid, use
+  // DoesDocumentExist(document_id) instead, which will perform those additional
+  // checks.
+  bool IsDeleted(DocumentId document_id) const;
+
+  // Checks if a document has expired.
+  //
+  // This is for internal-use only because we assume that the document_id is
+  // already valid. If you're unsure if the document_id is valid, use
+  // DoesDocumentExist(document_id) instead, which will perform those additional
+  // checks.
+
+  // Returns:
+  //   True:DocumentFilterData  if the given document isn't expired.
+  //   False                    if the given doesn't document is expired.
+  std::optional<DocumentFilterData> GetNonExpiredDocumentFilterData(
+      DocumentId document_id, int64_t current_time_ms) const;
+
   // Updates the entry in the score cache for document_id.
   libtextclassifier3::Status UpdateDocumentAssociatedScoreCache(
       DocumentId document_id, const DocumentAssociatedScoreData& score_data);
 
+  // Updates the entry in the corpus score cache for corpus_id.
+  libtextclassifier3::Status UpdateCorpusAssociatedScoreCache(
+      CorpusId corpus_id, const CorpusAssociatedScoreData& score_data);
+
   // Updates the entry in the filter cache for document_id.
   libtextclassifier3::Status UpdateFilterCache(
       DocumentId document_id, const DocumentFilterData& filter_data);
+
+  // Helper method to clear the derived data of a document
+  libtextclassifier3::Status ClearDerivedData(DocumentId document_id);
+
+  // Sets usage scores for the given document.
+  libtextclassifier3::Status SetUsageScores(
+      DocumentId document_id, const UsageStore::UsageScores& usage_scores);
+
+  // Returns:
+  //   - on success, a DocumentStorageInfoProto with the fields relating to the
+  //     size of Document Store member variables populated.
+  //   - INTERNAL on failure to get file size
+  DocumentStorageInfoProto GetMemberStorageInfo() const;
+
+  // Returns:
+  //   - on success, the storage_info that was passed in but with the number of
+  //     alive, deleted and expired documents also set.
+  //   - OUT_OF_RANGE, this should never happen. This could only be returned if
+  //     the document_id_mapper somehow became larger than the filter cache.
+  DocumentStorageInfoProto CalculateDocumentStatusCounts(
+      DocumentStorageInfoProto storage_info) const;
+
+  // Returns:
+  //   - on success, a RepeatedPtrField for CorpusInfo collected.
+  //   - OUT_OF_RANGE, this should never happen.
+  libtextclassifier3::StatusOr<
+      google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>>
+  CollectCorpusInfo() const;
+
+  // Build fingerprint for the keys of document_key_mapper_ and corpus_mapper_.
+  // Note that namespace_id_fingerprint_ controls the way that a fingerprint is
+  // built.
+  std::string MakeFingerprint(NamespaceId namespace_id,
+                              std::string_view namespace_,
+                              std::string_view uri_or_schema) const;
 };
 
 }  // namespace lib
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
new file mode 100644
index 0000000..46d76d8
--- /dev/null
+++ b/icing/store/document-store_benchmark.cc
@@ -0,0 +1,342 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <unistd.h>
+
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <ostream>
+#include <random>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <vector>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/clock.h"
+
+// Run on a Linux workstation:
+//    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/store:document-store_benchmark
+//
+//    $ blaze-bin/icing/store/document-store_benchmark
+//    --benchmark_filter=all --benchmark_memory_usage
+//
+// Run on an Android device:
+//    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//    --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/store:document-store_benchmark
+//
+//    $ adb push blaze-bin/icing/store/document-store_benchmark
+//    /data/local/tmp/
+//
+//    $ adb shell /data/local/tmp/document-store_benchmark
+//    --benchmark_filter=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+class DestructibleDirectory {
+ public:
+  explicit DestructibleDirectory(const Filesystem& filesystem,
+                                 const std::string& dir)
+      : filesystem_(filesystem), dir_(dir) {
+    filesystem_.CreateDirectoryRecursively(dir_.c_str());
+  }
+  ~DestructibleDirectory() {
+    filesystem_.DeleteDirectoryRecursively(dir_.c_str());
+  }
+
+ private:
+  Filesystem filesystem_;
+  std::string dir_;
+};
+
+DocumentProto CreateDocument(const std::string namespace_,
+                             const std::string uri) {
+  return DocumentBuilder()
+      .SetKey(namespace_, uri)
+      .SetSchema("email")
+      .AddStringProperty("subject", "subject foo")
+      .AddStringProperty("body", "body bar")
+      .Build();
+}
+
+SchemaProto CreateSchema() {
+  return SchemaBuilder()
+      .AddType(SchemaTypeConfigBuilder()
+                   .SetType("email")
+                   .AddProperty(
+                       PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+                   .AddProperty(
+                       PropertyConfigBuilder()
+                           .SetName("body")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL)))
+      .Build();
+}
+
+std::unique_ptr<SchemaStore> CreateSchemaStore(Filesystem filesystem,
+                                               const std::string directory,
+                                               const Clock* clock) {
+  const std::string schema_store_dir = directory + "/schema";
+  filesystem.CreateDirectoryRecursively(schema_store_dir.data());
+  std::unique_ptr<SchemaStore> schema_store =
+      SchemaStore::Create(&filesystem, schema_store_dir, clock).ValueOrDie();
+
+  auto set_schema_status = schema_store->SetSchema(
+      CreateSchema(), /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false);
+  if (!set_schema_status.ok()) {
+    ICING_LOG(ERROR) << set_schema_status.status().error_message();
+  }
+
+  return schema_store;
+}
+
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+    const Filesystem* filesystem, const std::string& base_dir,
+    const Clock* clock, const SchemaStore* schema_store) {
+  return DocumentStore::Create(
+      filesystem, base_dir, clock, schema_store,
+      /*force_recovery_and_revalidate_documents=*/false,
+      /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+      /*use_persistent_hash_map=*/false,
+      PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+      /*initialize_stats=*/nullptr);
+}
+
+void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  DestructibleDirectory ddir(filesystem, directory);
+
+  std::string document_store_dir = directory + "/store";
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem, document_store_dir, &clock,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  int max_document_id = 300000;
+  for (int i = 0; i < max_document_id; ++i) {
+    // Put and delete a lot of documents to fill up our derived files with
+    // stuff.
+    ICING_ASSERT_OK(document_store->Put(
+        CreateDocument("namespace", /*uri=*/std::to_string(i))));
+    ICING_ASSERT_OK(document_store->Delete("namespace",
+                                           /*uri=*/std::to_string(i),
+                                           clock.GetSystemTimeMilliseconds()));
+  }
+
+  std::default_random_engine random;
+  std::uniform_int_distribution<> dist(1, max_document_id);
+  for (auto s : state) {
+    // Check random document ids to see if they exist. Hopefully to simulate
+    // page faulting in different sections of our mmapped derived files.
+    int document_id = dist(random);
+    benchmark::DoNotOptimize(document_store->GetAliveDocumentFilterData(
+        document_id, clock.GetSystemTimeMilliseconds()));
+  }
+}
+BENCHMARK(BM_DoesDocumentExistBenchmark);
+
+void BM_Put(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  DestructibleDirectory ddir(filesystem, directory);
+
+  std::string document_store_dir = directory + "/store";
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem, document_store_dir, &clock,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document = CreateDocument("namespace", "uri");
+
+  for (auto s : state) {
+    // It's ok that this is the same document over and over. We'll create a new
+    // document_id for it and still insert the proto into the underlying log.
+    benchmark::DoNotOptimize(document_store->Put(document));
+  }
+}
+BENCHMARK(BM_Put);
+
+void BM_GetSameDocument(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  DestructibleDirectory ddir(filesystem, directory);
+
+  std::string document_store_dir = directory + "/store";
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem, document_store_dir, &clock,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK(document_store->Put(CreateDocument("namespace", "uri")));
+
+  for (auto s : state) {
+    benchmark::DoNotOptimize(document_store->Get("namespace", "uri"));
+  }
+}
+BENCHMARK(BM_GetSameDocument);
+
+void BM_Delete(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  DestructibleDirectory ddir(filesystem, directory);
+
+  std::string document_store_dir = directory + "/store";
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem, document_store_dir, &clock,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document = CreateDocument("namespace", "uri");
+
+  for (auto s : state) {
+    state.PauseTiming();
+    ICING_ASSERT_OK(document_store->Put(document));
+    state.ResumeTiming();
+
+    benchmark::DoNotOptimize(document_store->Delete(
+        "namespace", "uri", clock.GetSystemTimeMilliseconds()));
+  }
+}
+BENCHMARK(BM_Delete);
+
+void BM_Create(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  std::string document_store_dir = directory + "/store";
+
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  // Create an initial document store and put some data in.
+  {
+    DestructibleDirectory ddir(filesystem, directory);
+
+    filesystem.CreateDirectoryRecursively(document_store_dir.data());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem, document_store_dir, &clock,
+                            schema_store.get()));
+    std::unique_ptr<DocumentStore> document_store =
+        std::move(create_result.document_store);
+
+    DocumentProto document = CreateDocument("namespace", "uri");
+    ICING_ASSERT_OK(document_store->Put(document));
+    ICING_ASSERT_OK(document_store->PersistToDisk(PersistType::FULL));
+  }
+
+  // Recreating it with some content to checksum over.
+  DestructibleDirectory ddir(filesystem, directory);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+
+  for (auto s : state) {
+    benchmark::DoNotOptimize(CreateDocumentStore(
+        &filesystem, document_store_dir, &clock, schema_store.get()));
+  }
+}
+BENCHMARK(BM_Create);
+
+void BM_ComputeChecksum(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  DestructibleDirectory ddir(filesystem, directory);
+
+  std::string document_store_dir = directory + "/store";
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem, document_store_dir, &clock,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document = CreateDocument("namespace", "uri");
+  ICING_ASSERT_OK(document_store->Put(document));
+  ICING_ASSERT_OK(document_store->PersistToDisk(PersistType::LITE));
+
+  for (auto s : state) {
+    benchmark::DoNotOptimize(document_store->ComputeChecksum());
+  }
+}
+BENCHMARK(BM_ComputeChecksum);
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index ad56b9a..2d4cd99 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -15,10 +15,13 @@
 #include "icing/store/document-store.h"
 
 #include <cstdint>
+#include <filesystem>
 #include <limits>
 #include <memory>
+#include <optional>
 #include <string>
 
+#include "icing/text_classifier/lib3/utils/base/status.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
@@ -28,23 +31,44 @@
 #include "icing/file/memory-mapped-file.h"
 #include "icing/file/mock-filesystem.h"
 #include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/logging.pb.h"
 #include "icing/proto/schema.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
+#include "icing/store/corpus-associated-scoring-data.h"
+#include "icing/store/corpus-id.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
+#include "icing/store/document-log-creator.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
 #include "icing/store/namespace-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
 #include "icing/util/crc32.h"
+#include "unicode/uloc.h"
 
 namespace icing {
 namespace lib {
 
+namespace {
+
 using ::icing::lib::portable_equals_proto::EqualsProto;
 using ::testing::_;
+using ::testing::ElementsAre;
 using ::testing::Eq;
+using ::testing::Ge;
 using ::testing::Gt;
 using ::testing::HasSubstr;
 using ::testing::IsEmpty;
@@ -54,15 +78,70 @@ using ::testing::Not;
 using ::testing::Return;
 using ::testing::UnorderedElementsAre;
 
-class DocumentStoreTest : public ::testing::Test {
+const NamespaceStorageInfoProto& GetNamespaceStorageInfo(
+    const DocumentStorageInfoProto& storage_info,
+    const std::string& name_space) {
+  for (const NamespaceStorageInfoProto& namespace_storage_info :
+       storage_info.namespace_storage_info()) {
+    if (namespace_storage_info.namespace_() == name_space) {
+      return namespace_storage_info;
+    }
+  }
+  // Didn't find our namespace, fail the test.
+  EXPECT_TRUE(false) << "Failed to find namespace '" << name_space
+                     << "' in DocumentStorageInfoProto.";
+  static const auto& default_namespace_storage_info =
+      *new NamespaceStorageInfoProto();
+  return default_namespace_storage_info;
+}
+
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+                              int64_t timestamp_ms,
+                              UsageReport::UsageType usage_type) {
+  UsageReport usage_report;
+  usage_report.set_document_namespace(name_space);
+  usage_report.set_document_uri(uri);
+  usage_report.set_usage_timestamp_ms(timestamp_ms);
+  usage_report.set_usage_type(usage_type);
+  return usage_report;
+}
+
+PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
+    Filesystem filesystem, const std::string& file_path) {
+  PortableFileBackedProtoLog<DocumentWrapper>::Header header;
+  filesystem.PRead(file_path.c_str(), &header,
+                   sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
+                   /*offset=*/0);
+  return header;
+}
+
+void WriteDocumentLogHeader(
+    Filesystem filesystem, const std::string& file_path,
+    PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
+  filesystem.Write(file_path.c_str(), &header,
+                   sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
+}
+
+struct DocumentStoreTestParam {
+  bool namespace_id_fingerprint;
+  bool pre_mapping_fbv;
+  bool use_persistent_hash_map;
+
+  explicit DocumentStoreTestParam(bool namespace_id_fingerprint_in,
+                                  bool pre_mapping_fbv_in,
+                                  bool use_persistent_hash_map_in)
+      : namespace_id_fingerprint(namespace_id_fingerprint_in),
+        pre_mapping_fbv(pre_mapping_fbv_in),
+        use_persistent_hash_map(use_persistent_hash_map_in) {}
+};
+
+class DocumentStoreTest
+    : public ::testing::TestWithParam<DocumentStoreTestParam> {
  protected:
   DocumentStoreTest()
       : test_dir_(GetTestTempDir() + "/icing"),
         document_store_dir_(test_dir_ + "/document_store"),
         schema_store_dir_(test_dir_ + "/schema_store") {
-    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
-    filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
-    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
     test_document1_ =
         DocumentBuilder()
             .SetKey("icing", "email/1")
@@ -88,37 +167,86 @@ class DocumentStoreTest : public ::testing::Test {
   }
 
   void SetUp() override {
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type("email");
-
-    auto subject = type_config->add_properties();
-    subject->set_property_name("subject");
-    subject->set_data_type(PropertyConfigProto::DataType::STRING);
-    subject->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    subject->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    subject->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
-
-    auto body = type_config->add_properties();
-    body->set_property_name("body");
-    body->set_data_type(PropertyConfigProto::DataType::STRING);
-    body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    body->mutable_indexing_config()->set_term_match_type(
-        TermMatchType::EXACT_ONLY);
-    body->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      // If we've specified using the reverse-JNI method for segmentation (i.e.
+      // not ICU), then we won't have the ICU data file included to set up.
+      // Technically, we could choose to use reverse-JNI for segmentation AND
+      // include an ICU data file, but that seems unlikely and our current BUILD
+      // setup doesn't do this.
+      // File generated via icu_data_file rule in //icing/BUILD.
+      std::string icu_data_file_path =
+          GetTestFilePath("icing/icu.dat");
+      ICING_ASSERT_OK(
+          icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+    }
 
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("email")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("subject")
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("body")
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
     ICING_ASSERT_OK_AND_ASSIGN(
-        schema_store_, SchemaStore::Create(&filesystem_, schema_store_dir_));
-    ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    ASSERT_THAT(schema_store_->SetSchema(
+                    schema, /*ignore_errors_and_delete_documents=*/false,
+                    /*allow_circular_schema_definitions=*/false),
+                IsOk());
+
+    language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        lang_segmenter_,
+        language_segmenter_factory::Create(std::move(segmenter_options)));
   }
 
   void TearDown() override {
+    lang_segmenter_.reset();
+    schema_store_.reset();
     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
   }
 
+  void CorruptDocStoreHeaderChecksumFile() {
+    // Change the DocStore's header combined checksum so that it won't match the
+    // recalculated checksum on initialization. This will force a regeneration
+    // of derived files from ground truth.
+    const std::string header_file =
+        absl_ports::StrCat(document_store_dir_, "/document_store_header");
+    DocumentStore::Header header;
+    header.magic = DocumentStore::Header::GetCurrentMagic(
+        GetParam().namespace_id_fingerprint);
+    header.checksum = 10;  // Arbitrary garbage checksum
+    filesystem_.DeleteFile(header_file.c_str());
+    filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+  }
+
+  libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
+      const Filesystem* filesystem, const std::string& base_dir,
+      const Clock* clock, const SchemaStore* schema_store) {
+    return DocumentStore::Create(
+        filesystem, base_dir, clock, schema_store,
+        /*force_recovery_and_revalidate_documents=*/false,
+        GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+        GetParam().use_persistent_hash_map,
+        PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+        /*initialize_stats=*/nullptr);
+  }
+
   const Filesystem filesystem_;
   const std::string test_dir_;
   FakeClock fake_clock_;
@@ -127,6 +255,7 @@ class DocumentStoreTest : public ::testing::Test {
   DocumentProto test_document1_;
   DocumentProto test_document2_;
   std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
 
   // Document1 values
   const int document1_score_ = 1;
@@ -142,34 +271,36 @@ class DocumentStoreTest : public ::testing::Test {
   const int64_t document2_expiration_timestamp_ = 3;  // creation + ttl
 };
 
-TEST_F(DocumentStoreTest, CreationWithNullPointerShouldFail) {
-  EXPECT_THAT(DocumentStore::Create(/*filesystem=*/nullptr, document_store_dir_,
-                                    &fake_clock_, schema_store_.get()),
+TEST_P(DocumentStoreTest, CreationWithNullPointerShouldFail) {
+  EXPECT_THAT(CreateDocumentStore(/*filesystem=*/nullptr, document_store_dir_,
+                                  &fake_clock_, schema_store_.get()),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 
-  EXPECT_THAT(DocumentStore::Create(&filesystem_, document_store_dir_,
-                                    /*clock=*/nullptr, schema_store_.get()),
+  EXPECT_THAT(CreateDocumentStore(&filesystem_, document_store_dir_,
+                                  /*clock=*/nullptr, schema_store_.get()),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 
-  EXPECT_THAT(DocumentStore::Create(&filesystem_, document_store_dir_,
-                                    &fake_clock_, /*schema_store=*/nullptr),
+  EXPECT_THAT(CreateDocumentStore(&filesystem_, document_store_dir_,
+                                  &fake_clock_, /*schema_store=*/nullptr),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
-TEST_F(DocumentStoreTest, CreationWithBadFilesystemShouldFail) {
+TEST_P(DocumentStoreTest, CreationWithBadFilesystemShouldFail) {
   MockFilesystem mock_filesystem;
   ON_CALL(mock_filesystem, OpenForWrite(_)).WillByDefault(Return(false));
 
-  EXPECT_THAT(DocumentStore::Create(&mock_filesystem, document_store_dir_,
-                                    &fake_clock_, schema_store_.get()),
+  EXPECT_THAT(CreateDocumentStore(&mock_filesystem, document_store_dir_,
+                                  &fake_clock_, schema_store_.get()),
               StatusIs(libtextclassifier3::StatusCode::INTERNAL));
 }
 
-TEST_F(DocumentStoreTest, PutAndGetInSameNamespaceOk) {
+TEST_P(DocumentStoreTest, PutAndGetInSameNamespaceOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   // Both documents have namespace of "icing"
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
@@ -183,11 +314,13 @@ TEST_F(DocumentStoreTest, PutAndGetInSameNamespaceOk) {
               IsOkAndHolds(EqualsProto(test_document2_)));
 }
 
-TEST_F(DocumentStoreTest, PutAndGetAcrossNamespacesOk) {
+TEST_P(DocumentStoreTest, PutAndGetAcrossNamespacesOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   // Can handle different namespaces with same url
   DocumentProto foo_document = DocumentBuilder()
@@ -214,11 +347,13 @@ TEST_F(DocumentStoreTest, PutAndGetAcrossNamespacesOk) {
 
 // Validates that putting an document with the same key will overwrite previous
 // document and old doc ids are not getting reused.
-TEST_F(DocumentStoreTest, PutSameKey) {
+TEST_P(DocumentStoreTest, PutSameKey) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   // Creates two documents with the same key (namespace + uri)
   DocumentProto document1 = DocumentProto(test_document1_);
@@ -241,53 +376,64 @@ TEST_F(DocumentStoreTest, PutSameKey) {
   EXPECT_THAT(doc_store->Put(document3), IsOkAndHolds(Not(document_id1)));
 }
 
-TEST_F(DocumentStoreTest, IsDocumentExisting) {
+TEST_P(DocumentStoreTest, IsDocumentExistingWithoutStatus) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              doc_store->Put(DocumentProto(test_document1_)));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
                              doc_store->Put(DocumentProto(test_document2_)));
 
-  EXPECT_THAT(doc_store->DoesDocumentExist(document_id1), IsTrue());
-  EXPECT_THAT(doc_store->DoesDocumentExist(document_id2), IsTrue());
+  EXPECT_TRUE(doc_store->GetAliveDocumentFilterData(
+      document_id1, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_TRUE(doc_store->GetAliveDocumentFilterData(
+      document_id2, fake_clock_.GetSystemTimeMilliseconds()));
 
   DocumentId invalid_document_id_negative = -1;
-  EXPECT_THAT(doc_store->DoesDocumentExist(invalid_document_id_negative),
-              IsFalse());
+  EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(
+      invalid_document_id_negative, fake_clock_.GetSystemTimeMilliseconds()));
 
   DocumentId invalid_document_id_greater_than_max = kMaxDocumentId + 2;
-  EXPECT_THAT(
-      doc_store->DoesDocumentExist(invalid_document_id_greater_than_max),
-      IsFalse());
+  EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(
+      invalid_document_id_greater_than_max,
+      fake_clock_.GetSystemTimeMilliseconds()));
 
-  EXPECT_THAT(doc_store->DoesDocumentExist(kInvalidDocumentId), IsFalse());
+  EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(
+      kInvalidDocumentId, fake_clock_.GetSystemTimeMilliseconds()));
 
   DocumentId invalid_document_id_out_of_range = document_id2 + 1;
-  EXPECT_THAT(doc_store->DoesDocumentExist(invalid_document_id_out_of_range),
-              IsFalse());
+  EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(
+      invalid_document_id_out_of_range,
+      fake_clock_.GetSystemTimeMilliseconds()));
 }
 
-TEST_F(DocumentStoreTest, GetDeletedDocumentNotFound) {
+TEST_P(DocumentStoreTest, GetDeletedDocumentNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   ICING_EXPECT_OK(document_store->Put(DocumentProto(test_document1_)));
   EXPECT_THAT(
       document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
       IsOkAndHolds(EqualsProto(test_document1_)));
 
-  ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
-                                         test_document1_.uri()));
+  ICING_EXPECT_OK(document_store->Delete(
+      test_document1_.namespace_(), test_document1_.uri(),
+      fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(
       document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
       StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, GetExpiredDocumentNotFound) {
+TEST_P(DocumentStoreTest, GetExpiredDocumentNotFound) {
   DocumentProto document = DocumentBuilder()
                                .SetKey("namespace", "uri")
                                .SetSchema("email")
@@ -296,9 +442,12 @@ TEST_F(DocumentStoreTest, GetExpiredDocumentNotFound) {
                                .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   ICING_EXPECT_OK(document_store->Put(document));
   EXPECT_THAT(document_store->Get("namespace", "uri"),
               IsOkAndHolds(EqualsProto(document)));
@@ -319,11 +468,14 @@ TEST_F(DocumentStoreTest, GetExpiredDocumentNotFound) {
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, GetInvalidDocumentId) {
+TEST_P(DocumentStoreTest, GetInvalidDocumentId) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
                              doc_store->Put(DocumentProto(test_document1_)));
 
@@ -343,62 +495,90 @@ TEST_F(DocumentStoreTest, GetInvalidDocumentId) {
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, DeleteOk) {
+TEST_P(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
-  // Get() after Delete() returns NOT_FOUND
-  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
-                             doc_store->Put(DocumentProto(test_document1_)));
-  EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
-  EXPECT_THAT(doc_store->Get(document_id),
+  // Validates that deleting something non-existing won't append anything to
+  // ground truth
+  int64_t document_log_size_before = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
+
+  EXPECT_THAT(document_store->Delete("nonexistent_namespace", "nonexistent_uri",
+                                     fake_clock_.GetSystemTimeMilliseconds()),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  int64_t document_log_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
+  EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
 }
 
-TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
+TEST_P(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   // Validates that deleting something non-existing won't append anything to
   // ground truth
-  int64_t ground_truth_size_before = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-
-  EXPECT_THAT(
-      document_store->Delete("nonexistent_namespace", "nonexistent_uri"),
-      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  int64_t document_log_size_before = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
+
+  libtextclassifier3::Status status = document_store->Delete(
+      "android$contacts/", "661", fake_clock_.GetSystemTimeMilliseconds());
+  EXPECT_THAT(status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  for (char c : status.error_message()) {
+    EXPECT_THAT(std::isprint(c), IsTrue());
+  }
 
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-  EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+  int64_t document_log_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
+  EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
 }
 
-TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
+TEST_P(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   ICING_EXPECT_OK(document_store->Put(test_document1_));
 
   // First time is OK
-  ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
-                                         test_document1_.uri()));
+  ICING_EXPECT_OK(document_store->Delete(
+      test_document1_.namespace_(), test_document1_.uri(),
+      fake_clock_.GetSystemTimeMilliseconds()));
 
   // Deleting it again is NOT_FOUND
   EXPECT_THAT(document_store->Delete(test_document1_.namespace_(),
-                                     test_document1_.uri()),
+                                     test_document1_.uri(),
+                                     fake_clock_.GetSystemTimeMilliseconds()),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, DeleteByNamespaceOk) {
+TEST_P(DocumentStoreTest, DeleteByNamespaceOk) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   DocumentProto document1 = test_document1_;
   document1.set_namespace_("namespace.1");
@@ -422,7 +602,10 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceOk) {
 
   // DELETE namespace.1. document1 and document 4 should be deleted. document2
   // and document3 should still be retrievable.
-  ICING_EXPECT_OK(doc_store->DeleteByNamespace("namespace.1"));
+  DocumentStore::DeleteByGroupResult group_result =
+      doc_store->DeleteByNamespace("namespace.1");
+  EXPECT_THAT(group_result.status, IsOk());
+  EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
   EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(doc_store->Get(document2.namespace_(), document2.uri()),
@@ -433,42 +616,53 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceOk) {
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
+TEST_P(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   // Validates that deleting something non-existing won't append anything to
   // ground truth
-  int64_t ground_truth_size_before = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  int64_t document_log_size_before = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
 
-  EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace"),
+  EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace").status,
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-  EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+  int64_t document_log_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
+  EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
 }
 
-TEST_F(DocumentStoreTest, DeleteByNamespaceNoExistingDocumentsNotFound) {
+TEST_P(DocumentStoreTest, DeleteByNamespaceNoExistingDocumentsNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   ICING_EXPECT_OK(document_store->Put(test_document1_));
-  ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
-                                         test_document1_.uri()));
+  ICING_EXPECT_OK(document_store->Delete(
+      test_document1_.namespace_(), test_document1_.uri(),
+      fake_clock_.GetSystemTimeMilliseconds()));
 
   // At this point, there are no existing documents with the namespace, even
   // though Icing's derived files know about this namespace. We should still
   // return NOT_FOUND since nothing existing has this namespace.
-  EXPECT_THAT(document_store->DeleteByNamespace(test_document1_.namespace_()),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(
+      document_store->DeleteByNamespace(test_document1_.namespace_()).status,
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
+TEST_P(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
   DocumentProto document1 = test_document1_;
   document1.set_namespace_("namespace.1");
   document1.set_uri("uri1");
@@ -485,12 +679,15 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
   document4.set_namespace_("namespace.1");
   document4.set_uri("uri2");
 
-  int64_t ground_truth_size_before;
+  int64_t document_log_size_before;
   {
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<DocumentStore> doc_store,
-        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                              schema_store_.get()));
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
     ICING_ASSERT_OK(doc_store->Put(document1));
     ICING_ASSERT_OK(doc_store->Put(document2));
     ICING_ASSERT_OK(doc_store->Put(document3));
@@ -498,33 +695,32 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
 
     // DELETE namespace.1. document1 and document 4 should be deleted. document2
     // and document3 should still be retrievable.
-    ICING_EXPECT_OK(doc_store->DeleteByNamespace("namespace.1"));
-
-    ground_truth_size_before = filesystem_.GetFileSize(
-        absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+    DocumentStore::DeleteByGroupResult group_result =
+        doc_store->DeleteByNamespace("namespace.1");
+    EXPECT_THAT(group_result.status, IsOk());
+    EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
+
+    document_log_size_before = filesystem_.GetFileSize(
+        absl_ports::StrCat(document_store_dir_, "/",
+                           DocumentLogCreator::GetDocumentLogFilename())
+            .c_str());
   }  // Destructors should update checksum and persist all data to file.
 
-  // Change the DocStore's header combined checksum so that it won't match the
-  // recalculated checksum on initialization. This will force a regeneration of
-  // derived files from ground truth.
-  const std::string header_file =
-      absl_ports::StrCat(document_store_dir_, "/document_store_header");
-  DocumentStore::Header header;
-  header.magic = DocumentStore::Header::kMagic;
-  header.checksum = 10;  // Arbitrary garbage checksum
-  filesystem_.DeleteFile(header_file.c_str());
-  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+  CorruptDocStoreHeaderChecksumFile();
   // Successfully recover from a corrupt derived file issue.
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   // Make sure we didn't add anything to the ground truth after we recovered.
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-  EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+  int64_t document_log_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
+  EXPECT_EQ(document_log_size_before, document_log_size_after);
 
   EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -536,28 +732,31 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
-  type_config = schema.add_types();
-  type_config->set_schema_type("person");
+TEST_P(DocumentStoreTest, DeleteBySchemaTypeOk) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .AddType(SchemaTypeConfigBuilder().SetType("person"))
+          .Build();
 
   std::string schema_store_dir = schema_store_dir_ + "_custom";
   filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir));
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
 
-  ICING_ASSERT_OK(schema_store->SetSchema(schema));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   DocumentProto email_document_1 = DocumentBuilder()
                                        .SetKey("namespace1", "1")
@@ -593,7 +792,10 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
 
   // Delete the "email" type and ensure that it works across both
   // email_document's namespaces. And that other documents aren't affected.
-  ICING_EXPECT_OK(document_store->DeleteBySchemaType("email"));
+  DocumentStore::DeleteByGroupResult group_result =
+      document_store->DeleteBySchemaType("email");
+  EXPECT_THAT(group_result.status, IsOk());
+  EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
   EXPECT_THAT(document_store->Get(email_1_document_id),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(document_store->Get(email_2_document_id),
@@ -604,7 +806,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
               IsOkAndHolds(EqualsProto(person_document)));
 
   // Delete the "message" type and check that other documents aren't affected
-  ICING_EXPECT_OK(document_store->DeleteBySchemaType("message"));
+  group_result = document_store->DeleteBySchemaType("message");
+  EXPECT_THAT(group_result.status, IsOk());
+  EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
   EXPECT_THAT(document_store->Get(email_1_document_id),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(document_store->Get(email_2_document_id),
@@ -615,56 +819,67 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
               IsOkAndHolds(EqualsProto(person_document)));
 }
 
-TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
+TEST_P(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   // Validates that deleting something non-existing won't append anything to
   // ground truth
-  int64_t ground_truth_size_before = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  int64_t document_log_size_before = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
 
-  EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type"),
+  EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type").status,
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  int64_t document_log_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
 
-  EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+  EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
 }
 
-TEST_F(DocumentStoreTest, DeleteBySchemaTypeNoExistingDocumentsOk) {
+TEST_P(DocumentStoreTest, DeleteBySchemaTypeNoExistingDocumentsNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   ICING_EXPECT_OK(document_store->Put(test_document1_));
-  ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
-                                         test_document1_.uri()));
+  ICING_EXPECT_OK(document_store->Delete(
+      test_document1_.namespace_(), test_document1_.uri(),
+      fake_clock_.GetSystemTimeMilliseconds()));
 
-  // At this point, there are no existing documents with the schema type, but we
-  // still return OK because the SchemaStore is the ground truth on schemas and
-  // knows about the type
-  ICING_EXPECT_OK(document_store->DeleteBySchemaType(test_document1_.schema()));
+  EXPECT_THAT(
+      document_store->DeleteBySchemaType(test_document1_.schema()).status,
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
+TEST_P(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   std::string schema_store_dir = schema_store_dir_ + "_custom";
   filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir));
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
 
-  ICING_ASSERT_OK(schema_store->SetSchema(schema));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   DocumentId email_document_id;
   DocumentId message_document_id;
@@ -680,12 +895,14 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
                                        .SetSchema("message")
                                        .SetCreationTimestampMs(1)
                                        .Build();
-  int64_t ground_truth_size_before;
+  int64_t document_log_size_before;
   {
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<DocumentStore> document_store,
-        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                              schema_store.get()));
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+    std::unique_ptr<DocumentStore> document_store =
+        std::move(create_result.document_store);
 
     ICING_ASSERT_OK_AND_ASSIGN(email_document_id,
                                document_store->Put(email_document));
@@ -693,33 +910,32 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
                                document_store->Put(message_document));
 
     // Delete "email". "message" documents should still be retrievable.
-    ICING_EXPECT_OK(document_store->DeleteBySchemaType("email"));
-
-    ground_truth_size_before = filesystem_.GetFileSize(
-        absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+    DocumentStore::DeleteByGroupResult group_result =
+        document_store->DeleteBySchemaType("email");
+    EXPECT_THAT(group_result.status, IsOk());
+    EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
+
+    document_log_size_before = filesystem_.GetFileSize(
+        absl_ports::StrCat(document_store_dir_, "/",
+                           DocumentLogCreator::GetDocumentLogFilename())
+            .c_str());
   }  // Destructors should update checksum and persist all data to file.
 
-  // Change the DocumentStore's header combined checksum so that it won't match
-  // the recalculated checksum on initialization. This will force a regeneration
-  // of derived files from ground truth.
-  const std::string header_file =
-      absl_ports::StrCat(document_store_dir_, "/document_store_header");
-  DocumentStore::Header header;
-  header.magic = DocumentStore::Header::kMagic;
-  header.checksum = 10;  // Arbitrary garbage checksum
-  filesystem_.DeleteFile(header_file.c_str());
-  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+  CorruptDocStoreHeaderChecksumFile();
   // Successfully recover from a corrupt derived file issue.
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   // Make sure we didn't add anything to the ground truth after we recovered.
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-  EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+  int64_t document_log_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
+  EXPECT_EQ(document_log_size_before, document_log_size_after);
 
   EXPECT_THAT(document_store->Get(email_document_id),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -727,21 +943,37 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
               IsOkAndHolds(EqualsProto(message_document)));
 }
 
-TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
+TEST_P(DocumentStoreTest, PutDeleteThenPut) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+  ICING_EXPECT_OK(doc_store->Put(test_document1_));
+  ICING_EXPECT_OK(doc_store->Delete(test_document1_.namespace_(),
+                                    test_document1_.uri(),
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+  ICING_EXPECT_OK(doc_store->Put(test_document1_));
+}
+
+TEST_P(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   std::string schema_store_dir = schema_store_dir_ + "_custom";
   filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir));
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
 
-  ICING_ASSERT_OK(schema_store->SetSchema(schema));
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   DocumentId email_document_id;
   DocumentId message_document_id;
@@ -757,12 +989,14 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
                                        .SetSchema("message")
                                        .SetCreationTimestampMs(1)
                                        .Build();
-  int64_t ground_truth_size_before;
+  int64_t document_log_size_before;
   {
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<DocumentStore> document_store,
-        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                              schema_store.get()));
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+    std::unique_ptr<DocumentStore> document_store =
+        std::move(create_result.document_store);
 
     ICING_ASSERT_OK_AND_ASSIGN(email_document_id,
                                document_store->Put(email_document));
@@ -770,45 +1004,46 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
                                document_store->Put(message_document));
 
     // Delete "email". "message" documents should still be retrievable.
-    ICING_EXPECT_OK(document_store->DeleteBySchemaType("email"));
+    DocumentStore::DeleteByGroupResult group_result =
+        document_store->DeleteBySchemaType("email");
+    EXPECT_THAT(group_result.status, IsOk());
+    EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
 
     EXPECT_THAT(document_store->Get(email_document_id),
                 StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
     EXPECT_THAT(document_store->Get(message_document_id),
                 IsOkAndHolds(EqualsProto(message_document)));
 
-    ground_truth_size_before = filesystem_.GetFileSize(
-        absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+    document_log_size_before = filesystem_.GetFileSize(
+        absl_ports::StrCat(document_store_dir_, "/",
+                           DocumentLogCreator::GetDocumentLogFilename())
+            .c_str());
   }  // Destructors should update checksum and persist all data to file.
 
-  // Change the DocumentStore's header combined checksum so that it won't match
-  // the recalculated checksum on initialization. This will force a regeneration
-  // of derived files from ground truth.
-  const std::string header_file =
-      absl_ports::StrCat(document_store_dir_, "/document_store_header");
-  DocumentStore::Header header;
-  header.magic = DocumentStore::Header::kMagic;
-  header.checksum = 10;  // Arbitrary garbage checksum
-  filesystem_.DeleteFile(header_file.c_str());
-  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
-  SchemaProto new_schema;
-  type_config = new_schema.add_types();
-  type_config->set_schema_type("message");
+  CorruptDocStoreHeaderChecksumFile();
 
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
   ICING_EXPECT_OK(schema_store->SetSchema(
-      new_schema, /*ignore_errors_and_delete_documents=*/true));
+      new_schema, /*ignore_errors_and_delete_documents=*/true,
+      /*allow_circular_schema_definitions=*/false));
 
   // Successfully recover from a corrupt derived file issue.
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   // Make sure we didn't add anything to the ground truth after we recovered.
-  int64_t ground_truth_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-  EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+  int64_t document_log_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
+  EXPECT_EQ(document_log_size_before, document_log_size_after);
 
   EXPECT_THAT(document_store->Get(email_document_id),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -816,11 +1051,13 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
               IsOkAndHolds(EqualsProto(message_document)));
 }
 
-TEST_F(DocumentStoreTest, OptimizeInto) {
+TEST_P(DocumentStoreTest, OptimizeIntoSingleNamespace) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   DocumentProto document1 = DocumentBuilder()
                                 .SetKey("namespace", "uri1")
@@ -850,33 +1087,50 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
   ICING_ASSERT_OK(doc_store->Put(document2));
   ICING_ASSERT_OK(doc_store->Put(document3));
 
-  std::string original_document_log = document_store_dir_ + "/document_log";
+  std::string original_document_log = absl_ports::StrCat(
+      document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
+
   int64_t original_size =
       filesystem_.GetFileSize(original_document_log.c_str());
 
   // Optimizing into the same directory is not allowed
-  EXPECT_THAT(doc_store->OptimizeInto(document_store_dir_),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
-                       HasSubstr("directory is the same")));
+  EXPECT_THAT(
+      doc_store->OptimizeInto(document_store_dir_, lang_segmenter_.get()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("directory is the same")));
 
   std::string optimized_dir = document_store_dir_ + "_optimize";
-  std::string optimized_document_log = optimized_dir + "/document_log";
+  std::string optimized_document_log =
+      optimized_dir + "/" + DocumentLogCreator::GetDocumentLogFilename();
 
   // Validates that the optimized document log has the same size if nothing is
-  // deleted
+  // deleted. Also namespace ids remain the same.
   ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
   ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
-  ICING_ASSERT_OK(doc_store->OptimizeInto(optimized_dir));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::OptimizeResult optimize_result1,
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+  EXPECT_THAT(optimize_result1.document_id_old_to_new, ElementsAre(0, 1, 2));
+  EXPECT_THAT(optimize_result1.namespace_id_old_to_new, ElementsAre(0));
+  EXPECT_THAT(optimize_result1.should_rebuild_index, IsFalse());
   int64_t optimized_size1 =
       filesystem_.GetFileSize(optimized_document_log.c_str());
   EXPECT_EQ(original_size, optimized_size1);
 
   // Validates that the optimized document log has a smaller size if something
-  // is deleted
+  // is deleted. Namespace ids remain the same.
   ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
   ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
-  ICING_ASSERT_OK(doc_store->Delete("namespace", "uri1"));
-  ICING_ASSERT_OK(doc_store->OptimizeInto(optimized_dir));
+  ICING_ASSERT_OK(doc_store->Delete("namespace", "uri1",
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+  // DocumentId 0 is removed.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::OptimizeResult optimize_result2,
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+  EXPECT_THAT(optimize_result2.document_id_old_to_new,
+              ElementsAre(kInvalidDocumentId, 0, 1));
+  EXPECT_THAT(optimize_result2.namespace_id_old_to_new, ElementsAre(0));
+  EXPECT_THAT(optimize_result2.should_rebuild_index, IsFalse());
   int64_t optimized_size2 =
       filesystem_.GetFileSize(optimized_document_log.c_str());
   EXPECT_THAT(original_size, Gt(optimized_size2));
@@ -886,32 +1140,307 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
   fake_clock_.SetSystemTimeMilliseconds(300);
 
   // Validates that the optimized document log has a smaller size if something
-  // expired
+  // expired. Namespace ids remain the same.
+  ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+  ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+  // DocumentId 0 is removed, and DocumentId 2 is expired.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::OptimizeResult optimize_result3,
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+  EXPECT_THAT(optimize_result3.document_id_old_to_new,
+              ElementsAre(kInvalidDocumentId, 0, kInvalidDocumentId));
+  EXPECT_THAT(optimize_result3.namespace_id_old_to_new, ElementsAre(0));
+  EXPECT_THAT(optimize_result3.should_rebuild_index, IsFalse());
+  int64_t optimized_size3 =
+      filesystem_.GetFileSize(optimized_document_log.c_str());
+  EXPECT_THAT(optimized_size2, Gt(optimized_size3));
+
+  // Delete the last document
+  ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+  ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+  ICING_ASSERT_OK(doc_store->Delete("namespace", "uri2",
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+  // DocumentId 0 and 1 is removed, and DocumentId 2 is expired. Since no
+  // document with the namespace is added into new document store, the namespace
+  // id will be invalid.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::OptimizeResult optimize_result4,
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+  EXPECT_THAT(
+      optimize_result4.document_id_old_to_new,
+      ElementsAre(kInvalidDocumentId, kInvalidDocumentId, kInvalidDocumentId));
+  EXPECT_THAT(optimize_result4.namespace_id_old_to_new,
+              ElementsAre(kInvalidNamespaceId));
+  EXPECT_THAT(optimize_result4.should_rebuild_index, IsFalse());
+  int64_t optimized_size4 =
+      filesystem_.GetFileSize(optimized_document_log.c_str());
+  EXPECT_THAT(optimized_size3, Gt(optimized_size4));
+}
+
+TEST_P(DocumentStoreTest, OptimizeIntoMultipleNamespaces) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document0 = DocumentBuilder()
+                                .SetKey("namespace1", "uri0")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(100)
+                                .SetTtlMs(1000)
+                                .Build();
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(100)
+                                .SetTtlMs(1000)
+                                .Build();
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(100)
+                                .SetTtlMs(1000)
+                                .Build();
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace1", "uri3")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(100)
+                                .SetTtlMs(1000)
+                                .Build();
+
+  DocumentProto document4 = DocumentBuilder()
+                                .SetKey("namespace3", "uri4")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(100)
+                                .SetTtlMs(1000)
+                                .Build();
+
+  // Nothing should have expired yet.
+  fake_clock_.SetSystemTimeMilliseconds(100);
+
+  ICING_ASSERT_OK(doc_store->Put(document0));
+  ICING_ASSERT_OK(doc_store->Put(document1));
+  ICING_ASSERT_OK(doc_store->Put(document2));
+  ICING_ASSERT_OK(doc_store->Put(document3));
+  ICING_ASSERT_OK(doc_store->Put(document4));
+
+  std::string original_document_log = absl_ports::StrCat(
+      document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
+
+  int64_t original_size =
+      filesystem_.GetFileSize(original_document_log.c_str());
+
+  std::string optimized_dir = document_store_dir_ + "_optimize";
+  std::string optimized_document_log =
+      optimized_dir + "/" + DocumentLogCreator::GetDocumentLogFilename();
+
+  // Validates that the optimized document log has the same size if nothing is
+  // deleted. Also namespace ids remain the same.
+  ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+  ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::OptimizeResult optimize_result1,
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+  EXPECT_THAT(optimize_result1.document_id_old_to_new,
+              ElementsAre(0, 1, 2, 3, 4));
+  EXPECT_THAT(optimize_result1.namespace_id_old_to_new, ElementsAre(0, 1, 2));
+  EXPECT_THAT(optimize_result1.should_rebuild_index, IsFalse());
+  int64_t optimized_size1 =
+      filesystem_.GetFileSize(optimized_document_log.c_str());
+  EXPECT_EQ(original_size, optimized_size1);
+
+  // Validates that the optimized document log has a smaller size if something
+  // is deleted.
   ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
   ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
-  ICING_ASSERT_OK(doc_store->OptimizeInto(optimized_dir));
+  // Delete DocumentId 0 with namespace1.
+  // - Before: ["namespace1#uri0", "namespace1#uri1", "namespace2#uri2",
+  //   "namespace1#uri3", "namespace3#uri4"]
+  // - After: [nil, "namespace1#uri1", "namespace2#uri2", "namespace1#uri3",
+  //   "namespace3#uri4"]
+  // In this case, new_doc_store will assign namespace ids in ["namespace1",
+  // "namespace2", "namespace3"] order. Since new_doc_store has the same order
+  // of namespace id assignment, namespace ids remain the same.
+  ICING_ASSERT_OK(doc_store->Delete("namespace1", "uri0",
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::OptimizeResult optimize_result2,
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+  EXPECT_THAT(optimize_result2.document_id_old_to_new,
+              ElementsAre(kInvalidDocumentId, 0, 1, 2, 3));
+  EXPECT_THAT(optimize_result2.namespace_id_old_to_new, ElementsAre(0, 1, 2));
+  EXPECT_THAT(optimize_result2.should_rebuild_index, IsFalse());
+  int64_t optimized_size2 =
+      filesystem_.GetFileSize(optimized_document_log.c_str());
+  EXPECT_THAT(original_size, Gt(optimized_size2));
+
+  // Validates that the optimized document log has a smaller size if something
+  // is deleted.
+  ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+  ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+  // Delete DocumentId 1 with namespace1.
+  // - Before: [nil, "namespace1#uri1", "namespace2#uri2", "namespace1#uri3",
+  //   "namespace3#uri4"]
+  // - After: [nil, nil, "namespace2#uri2", "namespace1#uri3",
+  //   "namespace3#uri4"]
+  // In this case, new_doc_store will assign namespace ids in ["namespace2",
+  // "namespace1", "namespace3"] order, so namespace_id_old_to_new should
+  // reflect the change.
+  ICING_ASSERT_OK(doc_store->Delete("namespace1", "uri1",
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::OptimizeResult optimize_result3,
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+  EXPECT_THAT(optimize_result3.document_id_old_to_new,
+              ElementsAre(kInvalidDocumentId, kInvalidDocumentId, 0, 1, 2));
+  EXPECT_THAT(optimize_result3.namespace_id_old_to_new, ElementsAre(1, 0, 2));
+  EXPECT_THAT(optimize_result3.should_rebuild_index, IsFalse());
   int64_t optimized_size3 =
       filesystem_.GetFileSize(optimized_document_log.c_str());
   EXPECT_THAT(optimized_size2, Gt(optimized_size3));
+
+  // Validates that the optimized document log has a smaller size if something
+  // is deleted.
+  ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+  ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+  // Delete DocumentId 3 with namespace1.
+  // - Before: [nil, nil, "namespace2#uri2", "namespace1#uri3",
+  //   "namespace3#uri4"]
+  // - After: [nil, nil, "namespace2#uri2", nil, "namespace3#uri4"]
+  // In this case, new_doc_store will assign namespace ids in ["namespace2",
+  // "namespace3"] order and "namespace1" will be never assigned, so
+  // namespace_id_old_to_new should reflect the change.
+  ICING_ASSERT_OK(doc_store->Delete("namespace1", "uri3",
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::OptimizeResult optimize_result4,
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+  EXPECT_THAT(optimize_result4.document_id_old_to_new,
+              ElementsAre(kInvalidDocumentId, kInvalidDocumentId, 0,
+                          kInvalidDocumentId, 1));
+  EXPECT_THAT(optimize_result4.namespace_id_old_to_new,
+              ElementsAre(kInvalidNamespaceId, 0, 1));
+  EXPECT_THAT(optimize_result4.should_rebuild_index, IsFalse());
+  int64_t optimized_size4 =
+      filesystem_.GetFileSize(optimized_document_log.c_str());
+  EXPECT_THAT(optimized_size3, Gt(optimized_size4));
+
+  // Validates that the optimized document log has a smaller size if something
+  // is deleted.
+  ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+  ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+  // Delete DocumentId 4 with namespace3.
+  // - Before: [nil, nil, "namespace2#uri2", nil, "namespace3#uri4"]
+  // - After: [nil, nil, "namespace2#uri2", nil, nil]
+  // In this case, new_doc_store will assign namespace ids in ["namespace2"]
+  // order and "namespace1", "namespace3" will be never assigned, so
+  // namespace_id_old_to_new should reflect the change.
+  ICING_ASSERT_OK(doc_store->Delete("namespace3", "uri4",
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::OptimizeResult optimize_result5,
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+  EXPECT_THAT(optimize_result5.document_id_old_to_new,
+              ElementsAre(kInvalidDocumentId, kInvalidDocumentId, 0,
+                          kInvalidDocumentId, kInvalidDocumentId));
+  EXPECT_THAT(optimize_result5.namespace_id_old_to_new,
+              ElementsAre(kInvalidNamespaceId, 0, kInvalidNamespaceId));
+  EXPECT_THAT(optimize_result5.should_rebuild_index, IsFalse());
+  int64_t optimized_size5 =
+      filesystem_.GetFileSize(optimized_document_log.c_str());
+  EXPECT_THAT(optimized_size4, Gt(optimized_size5));
+
+  // Validates that the optimized document log has a smaller size if something
+  // is deleted.
+  ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+  ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+  // Delete DocumentId 2 with namespace2.
+  // - Before: [nil, nil, "namespace2#uri2", nil, nil]
+  // - After: [nil, nil, nil, nil, nil]
+  // In this case, all documents were deleted, so there will be no namespace ids
+  // either. namespace_id_old_to_new should reflect the change.
+  ICING_ASSERT_OK(doc_store->Delete("namespace2", "uri2",
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::OptimizeResult optimize_result6,
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+  EXPECT_THAT(
+      optimize_result6.document_id_old_to_new,
+      ElementsAre(kInvalidDocumentId, kInvalidDocumentId, kInvalidDocumentId,
+                  kInvalidDocumentId, kInvalidDocumentId));
+  EXPECT_THAT(optimize_result6.namespace_id_old_to_new,
+              ElementsAre(kInvalidNamespaceId, kInvalidNamespaceId,
+                          kInvalidNamespaceId));
+  EXPECT_THAT(optimize_result6.should_rebuild_index, IsFalse());
+  int64_t optimized_size6 =
+      filesystem_.GetFileSize(optimized_document_log.c_str());
+  EXPECT_THAT(optimized_size5, Gt(optimized_size6));
 }
 
-TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
+TEST_P(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+  std::string optimized_dir = document_store_dir_ + "_optimize";
+  ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+  ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::OptimizeResult optimize_result,
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+  EXPECT_THAT(optimize_result.document_id_old_to_new, IsEmpty());
+  EXPECT_THAT(optimize_result.namespace_id_old_to_new, IsEmpty());
+  EXPECT_THAT(optimize_result.should_rebuild_index, IsFalse());
+}
+
+TEST_P(DocumentStoreTest, ShouldRecoverFromDataLoss) {
   DocumentId document_id1, document_id2;
   {
     // Can put and delete fine.
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<DocumentStore> doc_store,
-        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                              schema_store_.get()));
-    ICING_ASSERT_OK_AND_ASSIGN(document_id1,
-                               doc_store->Put(DocumentProto(test_document1_)));
-    ICING_ASSERT_OK_AND_ASSIGN(document_id2,
-                               doc_store->Put(DocumentProto(test_document2_)));
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id1,
+        doc_store->Put(DocumentProto(test_document1_), /*num_tokens=*/4));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id2,
+        doc_store->Put(DocumentProto(test_document2_), /*num_tokens=*/4));
     EXPECT_THAT(doc_store->Get(document_id1),
                 IsOkAndHolds(EqualsProto(test_document1_)));
     EXPECT_THAT(doc_store->Get(document_id2),
                 IsOkAndHolds(EqualsProto(test_document2_)));
-    EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
+    // Checks derived score cache
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id1),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document1_score_, document1_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id2),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+                IsOkAndHolds(CorpusAssociatedScoreData(
+                    /*num_docs=*/2, /*sum_length_in_tokens=*/8)));
+
+    // Delete document 1
+    EXPECT_THAT(doc_store->Delete("icing", "email/1",
+                                  fake_clock_.GetSystemTimeMilliseconds()),
+                IsOk());
     EXPECT_THAT(doc_store->Get(document_id1),
                 StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
     EXPECT_THAT(doc_store->Get(document_id2),
@@ -924,59 +1453,98 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
   DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
   const std::string serialized_document = document.SerializeAsString();
 
-  const std::string document_log_file =
-      absl_ports::StrCat(document_store_dir_, "/document_log");
+  const std::string document_log_file = absl_ports::StrCat(
+      document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
   int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
   filesystem_.PWrite(document_log_file.c_str(), file_size,
                      serialized_document.data(), serialized_document.size());
 
   // Successfully recover from a data loss issue.
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
   EXPECT_THAT(doc_store->Get(document_id1),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(doc_store->Get(document_id2),
               IsOkAndHolds(EqualsProto(test_document2_)));
-
   // Checks derived filter cache
-  EXPECT_THAT(doc_store->GetDocumentFilterData(document_id2),
-              IsOkAndHolds(DocumentFilterData(
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      DocumentFilterData doc_filter_data,
+      doc_store->GetAliveDocumentFilterData(
+          document_id2, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(doc_filter_data,
+              Eq(DocumentFilterData(
                   /*namespace_id=*/0,
                   /*schema_type_id=*/0, document2_expiration_timestamp_)));
+
   // Checks derived score cache
-  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id2),
-              IsOkAndHolds(DocumentAssociatedScoreData(
-                  document2_score_, document2_creation_timestamp_)));
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id2),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+          /*length_in_tokens=*/4)));
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/1, /*sum_length_in_tokens=*/4)));
 }
 
-TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
+TEST_P(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
   DocumentId document_id1, document_id2;
   {
     // Can put and delete fine.
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<DocumentStore> doc_store,
-        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                              schema_store_.get()));
-    ICING_ASSERT_OK_AND_ASSIGN(document_id1,
-                               doc_store->Put(DocumentProto(test_document1_)));
-    ICING_ASSERT_OK_AND_ASSIGN(document_id2,
-                               doc_store->Put(DocumentProto(test_document2_)));
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id1,
+        doc_store->Put(DocumentProto(test_document1_), /*num_tokens=*/4));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id2,
+        doc_store->Put(DocumentProto(test_document2_), /*num_tokens=*/4));
     EXPECT_THAT(doc_store->Get(document_id1),
                 IsOkAndHolds(EqualsProto(test_document1_)));
     EXPECT_THAT(doc_store->Get(document_id2),
                 IsOkAndHolds(EqualsProto(test_document2_)));
-    EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
+    // Checks derived score cache
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id1),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document1_score_, document1_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id2),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+                IsOkAndHolds(CorpusAssociatedScoreData(
+                    /*num_docs=*/2, /*sum_length_in_tokens=*/8)));
+    // Delete document 1
+    EXPECT_THAT(doc_store->Delete("icing", "email/1",
+                                  fake_clock_.GetSystemTimeMilliseconds()),
+                IsOk());
     EXPECT_THAT(doc_store->Get(document_id1),
                 StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
     EXPECT_THAT(doc_store->Get(document_id2),
                 IsOkAndHolds(EqualsProto(test_document2_)));
+
+    EXPECT_THAT(doc_store->ReportUsage(CreateUsageReport(
+                    /*name_space=*/"icing", /*uri=*/"email/2",
+                    /*timestamp_ms=*/0, UsageReport::USAGE_TYPE1)),
+                IsOk());
   }
 
-  // "Corrupt" one of the derived files by adding non-checksummed data to
-  // it. This will mess up the checksum and throw an error on the derived file's
-  // initialization.
+  // "Corrupt" one of the derived files by modifying an existing data without
+  // calling PersistToDisk() or updating its checksum. This will mess up the
+  // checksum and throw an error on the derived file's initialization.
   const std::string document_id_mapper_file =
       absl_ports::StrCat(document_store_dir_, "/document_id_mapper");
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -984,94 +1552,243 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
       FileBackedVector<int64_t>::Create(
           filesystem_, document_id_mapper_file,
           MemoryMappedFile::READ_WRITE_AUTO_SYNC));
-  int64_t corrupt_document_id = 3;
-  int64_t corrupt_offset = 3;
+  int64_t corrupt_document_id = 1;
+  int64_t corrupt_offset = 123456;
   EXPECT_THAT(document_id_mapper->Set(corrupt_document_id, corrupt_offset),
               IsOk());
 
+  // Will get error when initializing document id mapper file, so it will
+  // trigger RegenerateDerivedFiles.
   // Successfully recover from a corrupt derived file issue.
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
   EXPECT_THAT(doc_store->Get(document_id1),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(doc_store->Get(document_id2),
               IsOkAndHolds(EqualsProto(test_document2_)));
 
   // Checks derived filter cache
-  EXPECT_THAT(doc_store->GetDocumentFilterData(document_id2),
-              IsOkAndHolds(DocumentFilterData(
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      DocumentFilterData doc_filter_data,
+      doc_store->GetAliveDocumentFilterData(
+          document_id2, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(doc_filter_data,
+              Eq(DocumentFilterData(
                   /*namespace_id=*/0,
                   /*schema_type_id=*/0, document2_expiration_timestamp_)));
+
   // Checks derived score cache
-  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id2),
-              IsOkAndHolds(DocumentAssociatedScoreData(
-                  document2_score_, document2_creation_timestamp_)));
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id2),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+          /*length_in_tokens=*/4)));
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/1, /*sum_length_in_tokens=*/4)));
+
+  // Checks usage score data - note that they aren't regenerated from
+  // scratch.
+  UsageStore::UsageScores expected_scores;
+  expected_scores.usage_type1_count = 1;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      UsageStore::UsageScores actual_scores,
+      doc_store->GetUsageScores(document_id2,
+                                fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
 }
 
-TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
+TEST_P(DocumentStoreTest, ShouldRecoverFromDiscardDerivedFiles) {
   DocumentId document_id1, document_id2;
   {
     // Can put and delete fine.
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<DocumentStore> doc_store,
-        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                              schema_store_.get()));
-    ICING_ASSERT_OK_AND_ASSIGN(document_id1,
-                               doc_store->Put(DocumentProto(test_document1_)));
-    ICING_ASSERT_OK_AND_ASSIGN(document_id2,
-                               doc_store->Put(DocumentProto(test_document2_)));
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id1,
+        doc_store->Put(DocumentProto(test_document1_), /*num_tokens=*/4));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id2,
+        doc_store->Put(DocumentProto(test_document2_), /*num_tokens=*/4));
     EXPECT_THAT(doc_store->Get(document_id1),
                 IsOkAndHolds(EqualsProto(test_document1_)));
     EXPECT_THAT(doc_store->Get(document_id2),
                 IsOkAndHolds(EqualsProto(test_document2_)));
-    EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
+    // Checks derived score cache
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id1),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document1_score_, document1_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id2),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+                IsOkAndHolds(CorpusAssociatedScoreData(
+                    /*num_docs=*/2, /*sum_length_in_tokens=*/8)));
+    // Delete document 1
+    EXPECT_THAT(doc_store->Delete("icing", "email/1",
+                                  fake_clock_.GetSystemTimeMilliseconds()),
+                IsOk());
     EXPECT_THAT(doc_store->Get(document_id1),
                 StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
     EXPECT_THAT(doc_store->Get(document_id2),
                 IsOkAndHolds(EqualsProto(test_document2_)));
+
+    EXPECT_THAT(doc_store->ReportUsage(CreateUsageReport(
+                    /*name_space=*/"icing", /*uri=*/"email/2",
+                    /*timestamp_ms=*/0, UsageReport::USAGE_TYPE1)),
+                IsOk());
   }
 
-  // Change the DocStore's header combined checksum so that it won't match the
-  // recalculated checksum on initialization. This will force a regeneration of
-  // derived files from ground truth.
-  const std::string header_file =
-      absl_ports::StrCat(document_store_dir_, "/document_store_header");
-  DocumentStore::Header header;
-  header.magic = DocumentStore::Header::kMagic;
-  header.checksum = 10;  // Arbitrary garbage checksum
-  filesystem_.DeleteFile(header_file.c_str());
-  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+  // Discard all derived files.
+  ICING_ASSERT_OK(
+      DocumentStore::DiscardDerivedFiles(&filesystem_, document_store_dir_));
 
-  // Successfully recover from a corrupt derived file issue.
+  // Successfully recover after discarding all derived files.
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  EXPECT_THAT(doc_store->Get(document_id1),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(doc_store->Get(document_id2),
+              IsOkAndHolds(EqualsProto(test_document2_)));
+
+  // Checks derived filter cache
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      DocumentFilterData doc_filter_data,
+      doc_store->GetAliveDocumentFilterData(
+          document_id2, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(doc_filter_data,
+              Eq(DocumentFilterData(
+                  /*namespace_id=*/0,
+                  /*schema_type_id=*/0, document2_expiration_timestamp_)));
+
+  // Checks derived score cache.
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id2),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+          /*length_in_tokens=*/4)));
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/1, /*sum_length_in_tokens=*/4)));
+
+  // Checks usage score data - note that they aren't regenerated from
+  // scratch.
+  UsageStore::UsageScores expected_scores;
+  expected_scores.usage_type1_count = 1;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      UsageStore::UsageScores actual_scores,
+      doc_store->GetUsageScores(document_id2,
+                                fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+}
+
+TEST_P(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
+  DocumentId document_id1, document_id2;
+  {
+    // Can put and delete fine.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
                             schema_store_.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id1,
+        doc_store->Put(DocumentProto(test_document1_), /*num_tokens=*/4));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id2,
+        doc_store->Put(DocumentProto(test_document2_), /*num_tokens=*/4));
+    EXPECT_THAT(doc_store->Get(document_id1),
+                IsOkAndHolds(EqualsProto(test_document1_)));
+    EXPECT_THAT(doc_store->Get(document_id2),
+                IsOkAndHolds(EqualsProto(test_document2_)));
+    // Checks derived score cache
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id1),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document1_score_, document1_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id2),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+                IsOkAndHolds(CorpusAssociatedScoreData(
+                    /*num_docs=*/2, /*sum_length_in_tokens=*/8)));
+    EXPECT_THAT(doc_store->Delete("icing", "email/1",
+                                  fake_clock_.GetSystemTimeMilliseconds()),
+                IsOk());
+    EXPECT_THAT(doc_store->Get(document_id1),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+    EXPECT_THAT(doc_store->Get(document_id2),
+                IsOkAndHolds(EqualsProto(test_document2_)));
+  }
+
+  CorruptDocStoreHeaderChecksumFile();
+  // Successfully recover from a corrupt derived file issue.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
   EXPECT_THAT(doc_store->Get(document_id1),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(doc_store->Get(document_id2),
               IsOkAndHolds(EqualsProto(test_document2_)));
 
   // Checks derived filter cache
-  EXPECT_THAT(doc_store->GetDocumentFilterData(document_id2),
-              IsOkAndHolds(DocumentFilterData(
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      DocumentFilterData doc_filter_data,
+      doc_store->GetAliveDocumentFilterData(
+          document_id2, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(doc_filter_data,
+              Eq(DocumentFilterData(
                   /*namespace_id=*/0,
                   /*schema_type_id=*/0, document2_expiration_timestamp_)));
   // Checks derived score cache
-  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id2),
-              IsOkAndHolds(DocumentAssociatedScoreData(
-                  document2_score_, document2_creation_timestamp_)));
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id2),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+          /*length_in_tokens=*/4)));
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/1, /*sum_length_in_tokens=*/4)));
 }
 
-TEST_F(DocumentStoreTest, GetDiskUsage) {
+TEST_P(DocumentStoreTest, GetStorageInfo) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
-  ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_doc_store_size,
-                             doc_store->GetDiskUsage());
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  DocumentStorageInfoProto doc_store_storage_info = doc_store->GetStorageInfo();
+  int64_t empty_doc_store_size = doc_store_storage_info.document_store_size();
   EXPECT_THAT(empty_doc_store_size, Gt(0));
 
   DocumentProto document = DocumentBuilder()
@@ -1080,33 +1797,38 @@ TEST_F(DocumentStoreTest, GetDiskUsage) {
                                .AddStringProperty("subject", "foo")
                                .Build();
 
-  // Since our GetDiskUsage can only get sizes in increments of block_size, we
+  // Since GetStorageInfo can only get sizes in increments of block_size, we
   // need to insert enough documents so the disk usage will increase by at least
   // 1 block size. The number 100 is a bit arbitrary, gotten from manually
   // testing.
   for (int i = 0; i < 100; ++i) {
     ICING_ASSERT_OK(doc_store->Put(document));
   }
-  EXPECT_THAT(doc_store->GetDiskUsage(),
-              IsOkAndHolds(Gt(empty_doc_store_size)));
+  doc_store_storage_info = doc_store->GetStorageInfo();
+  EXPECT_THAT(doc_store_storage_info.document_store_size(),
+              Gt(empty_doc_store_size));
 
   // Bad file system
   MockFilesystem mock_filesystem;
-  ON_CALL(mock_filesystem, GetDiskUsage(A<const char *>()))
+  ON_CALL(mock_filesystem, GetDiskUsage(A<const char*>()))
       .WillByDefault(Return(Filesystem::kBadFileSize));
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem,
-      DocumentStore::Create(&mock_filesystem, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
-  EXPECT_THAT(doc_store_with_mock_filesystem->GetDiskUsage(),
-              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+      create_result, CreateDocumentStore(&mock_filesystem, document_store_dir_,
+                                         &fake_clock_, schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem =
+      std::move(create_result.document_store);
+
+  doc_store_storage_info = doc_store_with_mock_filesystem->GetStorageInfo();
+  EXPECT_THAT(doc_store_storage_info.document_store_size(), Eq(-1));
 }
 
-TEST_F(DocumentStoreTest, MaxDocumentId) {
+TEST_P(DocumentStoreTest, MaxDocumentId) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   // Since the DocumentStore is empty, we get an invalid DocumentId
   EXPECT_THAT(doc_store->last_added_document_id(), Eq(kInvalidDocumentId));
@@ -1116,7 +1838,8 @@ TEST_F(DocumentStoreTest, MaxDocumentId) {
   EXPECT_THAT(doc_store->last_added_document_id(), Eq(document_id1));
 
   // Still returns the last DocumentId even if it was deleted
-  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
+  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1",
+                                    fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(doc_store->last_added_document_id(), Eq(document_id1));
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
@@ -1124,11 +1847,13 @@ TEST_F(DocumentStoreTest, MaxDocumentId) {
   EXPECT_THAT(doc_store->last_added_document_id(), Eq(document_id2));
 }
 
-TEST_F(DocumentStoreTest, GetNamespaceId) {
+TEST_P(DocumentStoreTest, GetNamespaceId) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   DocumentProto document_namespace1 =
       DocumentBuilder().SetKey("namespace1", "1").SetSchema("email").Build();
@@ -1145,15 +1870,23 @@ TEST_F(DocumentStoreTest, GetNamespaceId) {
   // DocumentStore
   EXPECT_THAT(doc_store->GetNamespaceId("namespace2"), IsOkAndHolds(Eq(1)));
 
+  // DELETE namespace1 - document_namespace1 is deleted.
+  DocumentStore::DeleteByGroupResult group_result =
+      doc_store->DeleteByNamespace("namespace1");
+  EXPECT_THAT(group_result.status, IsOk());
+  EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
+
   // NamespaceMapper doesn't care if the document has been deleted
   EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0)));
 }
 
-TEST_F(DocumentStoreTest, GetDuplicateNamespaceId) {
+TEST_P(DocumentStoreTest, GetDuplicateNamespaceId) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   DocumentProto document1 =
       DocumentBuilder().SetKey("namespace", "1").SetSchema("email").Build();
@@ -1167,43 +1900,431 @@ TEST_F(DocumentStoreTest, GetDuplicateNamespaceId) {
   EXPECT_THAT(doc_store->GetNamespaceId("namespace"), IsOkAndHolds(Eq(0)));
 }
 
-TEST_F(DocumentStoreTest, NonexistentNamespaceNotFound) {
+TEST_P(DocumentStoreTest, NonexistentNamespaceNotFound) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   EXPECT_THAT(doc_store->GetNamespaceId("nonexistent_namespace"),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, FilterCacheHoldsDeletedDocumentData) {
+TEST_P(DocumentStoreTest, GetCorpusDuplicateCorpusId) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "1").SetSchema("email").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "2").SetSchema("email").Build();
+
+  ICING_ASSERT_OK(doc_store->Put(document1));
+  ICING_ASSERT_OK(doc_store->Put(document2));
+
+  // CorpusId of 0 since it was the first namespace seen by the DocumentStore
+  EXPECT_THAT(doc_store->GetCorpusId("namespace", "email"),
+              IsOkAndHolds(Eq(0)));
+}
+
+TEST_P(DocumentStoreTest, GetCorpusId) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document_corpus1 =
+      DocumentBuilder().SetKey("namespace1", "1").SetSchema("email").Build();
+  DocumentProto document_corpus2 =
+      DocumentBuilder().SetKey("namespace2", "2").SetSchema("email").Build();
+
+  ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_corpus1)));
+  ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_corpus2)));
+
+  // CorpusId of 0 since it was the first corpus seen by the DocumentStore
+  EXPECT_THAT(doc_store->GetCorpusId("namespace1", "email"),
+              IsOkAndHolds(Eq(0)));
+
+  // CorpusId of 1 since it was the second corpus seen by the
+  // DocumentStore
+  EXPECT_THAT(doc_store->GetCorpusId("namespace2", "email"),
+              IsOkAndHolds(Eq(1)));
+
+  // DELETE namespace1 - document_corpus1 is deleted.
+  DocumentStore::DeleteByGroupResult group_result =
+      doc_store->DeleteByNamespace("namespace1");
+  EXPECT_THAT(group_result.status, IsOk());
+  EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
+
+  // CorpusMapper doesn't care if the document has been deleted
+  EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0)));
+}
+
+TEST_P(DocumentStoreTest, NonexistentCorpusNotFound) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  EXPECT_THAT(
+      doc_store->GetCorpusId("nonexistent_namespace", "nonexistent_schema"),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  DocumentProto document_corpus =
+      DocumentBuilder().SetKey("namespace1", "1").SetSchema("email").Build();
+  ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_corpus)));
+
+  EXPECT_THAT(doc_store->GetCorpusId("nonexistent_namespace", "email"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(doc_store->GetCorpusId("namespace1", "nonexistent_schema"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_P(DocumentStoreTest, GetCorpusAssociatedScoreDataSameCorpus) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "1").SetSchema("email").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "2").SetSchema("email").Build();
+
+  ICING_ASSERT_OK(doc_store->Put(document1, /*num_tokens=*/5));
+  ICING_ASSERT_OK(doc_store->Put(document2, /*num_tokens=*/7));
+
+  // CorpusId of 0 since it was the first namespace seen by the DocumentStore
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/2, /*sum_length_in_tokens=*/12)));
+  // Only one corpus exists
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_P(DocumentStoreTest, GetCorpusAssociatedScoreData) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document_corpus1 =
+      DocumentBuilder().SetKey("namespace1", "1").SetSchema("email").Build();
+  DocumentProto document_corpus2 =
+      DocumentBuilder().SetKey("namespace2", "2").SetSchema("email").Build();
+
+  ICING_ASSERT_OK(
+      doc_store->Put(DocumentProto(document_corpus1), /*num_tokens=*/5));
+  ICING_ASSERT_OK(
+      doc_store->Put(DocumentProto(document_corpus2), /*num_tokens=*/7));
+
+  // CorpusId of 0 since it was the first corpus seen by the DocumentStore
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/1, /*sum_length_in_tokens=*/5)));
+
+  // CorpusId of 1 since it was the second corpus seen by the
+  // DocumentStore
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/1),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/1, /*sum_length_in_tokens=*/7)));
+
+  // DELETE namespace1 - document_corpus1 is deleted.
+  ICING_EXPECT_OK(doc_store->DeleteByNamespace("namespace1").status);
+
+  // Corpus score cache doesn't care if the document has been deleted
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/1, /*sum_length_in_tokens=*/5)));
+}
+
+TEST_P(DocumentStoreTest, NonexistentCorpusAssociatedScoreDataOutOfRange) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_P(DocumentStoreTest, GetDocumentAssociatedScoreDataSameCorpus) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "1")
+          .SetSchema("email")
+          .SetScore(document1_score_)
+          .SetCreationTimestampMs(
+              document1_creation_timestamp_)  // A random timestamp
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "2")
+          .SetSchema("email")
+          .SetScore(document2_score_)
+          .SetCreationTimestampMs(
+              document2_creation_timestamp_)  // A random timestamp
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      doc_store->Put(DocumentProto(document1), /*num_tokens=*/5));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      doc_store->Put(DocumentProto(document2), /*num_tokens=*/7));
+
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id1),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/0, document1_score_, document1_creation_timestamp_,
+          /*length_in_tokens=*/5)));
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id2),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+          /*length_in_tokens=*/7)));
+}
+
+TEST_P(DocumentStoreTest, GetDocumentAssociatedScoreDataDifferentCorpus) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "1")
+          .SetSchema("email")
+          .SetScore(document1_score_)
+          .SetCreationTimestampMs(
+              document1_creation_timestamp_)  // A random timestamp
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "2")
+          .SetSchema("email")
+          .SetScore(document2_score_)
+          .SetCreationTimestampMs(
+              document2_creation_timestamp_)  // A random timestamp
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      doc_store->Put(DocumentProto(document1), /*num_tokens=*/5));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      doc_store->Put(DocumentProto(document2), /*num_tokens=*/7));
+
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id1),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/0, document1_score_, document1_creation_timestamp_,
+          /*length_in_tokens=*/5)));
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id2),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/1, document2_score_, document2_creation_timestamp_,
+          /*length_in_tokens=*/7)));
+}
+
+TEST_P(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataNotFound) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(/*document_id=*/0),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_P(DocumentStoreTest, NonexistentDocumentFilterDataNotFound) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(
+      /*document_id=*/0, fake_clock_.GetSystemTimeMilliseconds()));
+}
+
+TEST_P(DocumentStoreTest, DeleteClearsFilterCache) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
                              doc_store->Put(test_document1_));
 
-  EXPECT_THAT(
-      doc_store->GetDocumentFilterData(document_id),
-      IsOkAndHolds(DocumentFilterData(
-          /*namespace_id=*/0,
-          /*schema_type_id=*/0,
-          /*expiration_timestamp_ms=*/document1_expiration_timestamp_)));
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      DocumentFilterData doc_filter_data,
+      doc_store->GetAliveDocumentFilterData(
+          document_id, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(doc_filter_data,
+              Eq(DocumentFilterData(
+                  /*namespace_id=*/0,
+                  /*schema_type_id=*/0, document1_expiration_timestamp_)));
 
-  // FilterCache doesn't care if the document has been deleted
-  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
+  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1",
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+  // Associated entry of the deleted document is removed.
+  EXPECT_FALSE(doc_store->GetAliveDocumentFilterData(
+      document_id, fake_clock_.GetSystemTimeMilliseconds()));
+}
+
+TEST_P(DocumentStoreTest, DeleteClearsScoreCache) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             doc_store->Put(test_document1_, /*num_tokens=*/4));
+
+  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id),
+              IsOkAndHolds(DocumentAssociatedScoreData(
+                  /*corpus_id=*/0,
+                  /*document_score=*/document1_score_,
+                  /*creation_timestamp_ms=*/document1_creation_timestamp_,
+                  /*length_in_tokens=*/4)));
+
+  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1",
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+  // Associated entry of the deleted document is removed.
   EXPECT_THAT(
-      doc_store->GetDocumentFilterData(document_id),
-      IsOkAndHolds(DocumentFilterData(
-          /*namespace_id=*/0,
-          /*schema_type_id=*/0,
-          /*expiration_timestamp_ms=*/document1_expiration_timestamp_)));
+      doc_store->GetDocumentAssociatedScoreData(document_id),
+      IsOkAndHolds(DocumentAssociatedScoreData(kInvalidCorpusId,
+                                               /*document_score=*/-1,
+                                               /*creation_timestamp_ms=*/-1,
+                                               /*length_in_tokens=*/0)));
+}
+
+TEST_P(DocumentStoreTest, DeleteShouldPreventUsageScores) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             doc_store->Put(test_document1_));
+
+  // Report usage with type 1.
+  UsageReport usage_report_type1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
+
+  UsageStore::UsageScores expected_scores;
+  expected_scores.usage_type1_count = 1;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      UsageStore::UsageScores actual_scores,
+      doc_store->GetUsageScores(document_id,
+                                fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+
+  // Delete the document.
+  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1",
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+
+  // Can't report or get usage scores on the deleted document
+  ASSERT_THAT(
+      doc_store->ReportUsage(usage_report_type1),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+               HasSubstr("Couldn't report usage on a nonexistent document")));
+
+  EXPECT_FALSE(doc_store->GetUsageScores(
+      document_id, fake_clock_.GetSystemTimeMilliseconds()));
+}
+
+TEST_P(DocumentStoreTest, ExpirationShouldPreventUsageScores) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "email/1")
+                               .SetSchema("email")
+                               .AddStringProperty("subject", "subject foo")
+                               .AddStringProperty("body", "body bar")
+                               .SetScore(document1_score_)
+                               .SetCreationTimestampMs(10)
+                               .SetTtlMs(100)
+                               .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
+
+  // Some arbitrary time before the document's creation time (10) + ttl (100)
+  fake_clock_.SetSystemTimeMilliseconds(109);
+
+  // Report usage with type 1.
+  UsageReport usage_report_type1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
+
+  UsageStore::UsageScores expected_scores;
+  expected_scores.usage_type1_count = 1;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      UsageStore::UsageScores actual_scores,
+      doc_store->GetUsageScores(document_id,
+                                fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+
+  // Some arbitrary time past the document's creation time (10) + ttl (100)
+  fake_clock_.SetSystemTimeMilliseconds(200);
+
+  // Can't report or get usage scores on the expired document
+  ASSERT_THAT(
+      doc_store->ReportUsage(usage_report_type1),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+               HasSubstr("Couldn't report usage on a nonexistent document")));
+
+  EXPECT_FALSE(doc_store->GetUsageScores(
+      document_id, fake_clock_.GetSystemTimeMilliseconds()));
 }
 
-TEST_F(DocumentStoreTest,
+TEST_P(DocumentStoreTest,
        ExpirationTimestampIsSumOfNonZeroTtlAndCreationTimestamp) {
   DocumentProto document = DocumentBuilder()
                                .SetKey("namespace1", "1")
@@ -1213,20 +2334,24 @@ TEST_F(DocumentStoreTest,
                                .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
-
-  EXPECT_THAT(
-      doc_store->GetDocumentFilterData(document_id),
-      IsOkAndHolds(DocumentFilterData(/*namespace_id=*/0,
-                                      /*schema_type_id=*/0,
-                                      /*expiration_timestamp_ms=*/1100)));
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      DocumentFilterData doc_filter_data,
+      doc_store->GetAliveDocumentFilterData(
+          document_id, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(doc_filter_data, Eq(DocumentFilterData(
+                                   /*namespace_id=*/0,
+                                   /*schema_type_id=*/0,
+                                   /*expiration_timestamp_ms=*/1100)));
 }
 
-TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
+TEST_P(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
   DocumentProto document = DocumentBuilder()
                                .SetKey("namespace1", "1")
                                .SetSchema("email")
@@ -1235,21 +2360,28 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
                                .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
 
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      DocumentFilterData doc_filter_data,
+      doc_store->GetAliveDocumentFilterData(
+          document_id, fake_clock_.GetSystemTimeMilliseconds()));
+
   EXPECT_THAT(
-      doc_store->GetDocumentFilterData(document_id),
-      IsOkAndHolds(DocumentFilterData(
+      doc_filter_data,
+      Eq(DocumentFilterData(
           /*namespace_id=*/0,
           /*schema_type_id=*/0,
           /*expiration_timestamp_ms=*/std::numeric_limits<int64_t>::max())));
 }
 
-TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
+TEST_P(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
   DocumentProto document =
       DocumentBuilder()
           .SetKey("namespace1", "1")
@@ -1259,21 +2391,28 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
 
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      DocumentFilterData doc_filter_data,
+      doc_store->GetAliveDocumentFilterData(
+          document_id, fake_clock_.GetSystemTimeMilliseconds()));
+
   EXPECT_THAT(
-      doc_store->GetDocumentFilterData(document_id),
-      IsOkAndHolds(DocumentFilterData(
+      doc_filter_data,
+      Eq(DocumentFilterData(
           /*namespace_id=*/0,
           /*schema_type_id=*/0,
           /*expiration_timestamp_ms=*/std::numeric_limits<int64_t>::max())));
 }
 
-TEST_F(DocumentStoreTest, CreationTimestampShouldBePopulated) {
+TEST_P(DocumentStoreTest, CreationTimestampShouldBePopulated) {
   // Creates a document without a given creation timestamp
   DocumentProto document_without_creation_timestamp =
       DocumentBuilder()
@@ -1286,9 +2425,11 @@ TEST_F(DocumentStoreTest, CreationTimestampShouldBePopulated) {
   int64_t fake_real_time = 100;
   fake_clock_.SetSystemTimeMilliseconds(fake_real_time);
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentId document_id,
@@ -1302,7 +2443,7 @@ TEST_F(DocumentStoreTest, CreationTimestampShouldBePopulated) {
               Eq(fake_real_time));
 }
 
-TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
+TEST_P(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
   DocumentProto document1 = DocumentBuilder()
                                 .SetKey("icing", "email/1")
                                 .SetSchema("email")
@@ -1310,16 +2451,18 @@ TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
                                 // With default doc score 0
                                 .Build();
   DocumentProto document2 = DocumentBuilder()
-                                .SetKey("icing", "email/1")
+                                .SetKey("icing", "email/2")
                                 .SetSchema("email")
                                 .AddStringProperty("subject", "subject foo")
                                 .SetScore(5)
                                 .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> doc_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
                              doc_store->Put(document1));
@@ -1328,18 +2471,24 @@ TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
 
   EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id1),
               IsOkAndHolds(DocumentAssociatedScoreData(
-                  /*document_score=*/0, /*creation_timestamp_ms=*/0)));
+                  /*corpus_id=*/0,
+                  /*document_score=*/0, /*creation_timestamp_ms=*/0,
+                  /*length_in_tokens=*/0)));
 
   EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id2),
               IsOkAndHolds(DocumentAssociatedScoreData(
-                  /*document_score=*/5, /*creation_timestamp_ms=*/0)));
+                  /*corpus_id=*/0,
+                  /*document_score=*/5, /*creation_timestamp_ms=*/0,
+                  /*length_in_tokens=*/0)));
 }
 
-TEST_F(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
+TEST_P(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   ICING_EXPECT_OK(document_store->Put(test_document1_));
   ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
@@ -1348,11 +2497,13 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
   EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
 }
 
-TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
+TEST_P(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   ICING_EXPECT_OK(document_store->Put(test_document1_));
   ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
@@ -1360,17 +2511,20 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
   // Destroy the previous instance and recreate DocumentStore
   document_store.reset();
   ICING_ASSERT_OK_AND_ASSIGN(
-      document_store, DocumentStore::Create(&filesystem_, document_store_dir_,
-                                            &fake_clock_, schema_store_.get()));
+      create_result, CreateDocumentStore(&filesystem_, document_store_dir_,
+                                         &fake_clock_, schema_store_.get()));
+  document_store = std::move(create_result.document_store);
 
   EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
 }
 
-TEST_F(DocumentStoreTest, ComputeChecksumChangesOnModification) {
+TEST_P(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   ICING_EXPECT_OK(document_store->Put(test_document1_));
   ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
@@ -1380,7 +2534,25 @@ TEST_F(DocumentStoreTest, ComputeChecksumChangesOnModification) {
               IsOkAndHolds(Not(Eq(checksum))));
 }
 
-TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
+TEST_P(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  ICING_EXPECT_OK(document_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
+
+  UsageReport usage_report =
+      CreateUsageReport(test_document1_.namespace_(), test_document1_.uri(),
+                        /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
+  ICING_EXPECT_OK(document_store->ReportUsage(usage_report));
+  EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+}
+
+TEST_P(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
   const std::string schema_store_dir = schema_store_dir_ + "_custom";
 
   DocumentId email_document_id;
@@ -1407,13 +2579,15 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
     filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
     ICING_ASSERT_OK_AND_ASSIGN(
         std::unique_ptr<SchemaStore> schema_store,
-        SchemaStore::Create(&filesystem_, schema_store_dir));
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    type_config->set_schema_type("email");
-    type_config = schema.add_types();
-    type_config->set_schema_type("message");
-    ICING_EXPECT_OK(schema_store->SetSchema(schema));
+        SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(SchemaTypeConfigBuilder().SetType("email"))
+            .AddType(SchemaTypeConfigBuilder().SetType("message"))
+            .Build();
+    ICING_EXPECT_OK(schema_store->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
 
     ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
                                schema_store->GetSchemaTypeId("email"));
@@ -1421,18 +2595,21 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
                                schema_store->GetSchemaTypeId("message"));
 
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<DocumentStore> document_store,
-        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                              schema_store.get()));
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+    std::unique_ptr<DocumentStore> document_store =
+        std::move(create_result.document_store);
 
     // Insert and verify a "email "document
     ICING_ASSERT_OK_AND_ASSIGN(
         email_document_id, document_store->Put(DocumentProto(email_document)));
     EXPECT_THAT(document_store->Get(email_document_id),
                 IsOkAndHolds(EqualsProto(email_document)));
-    ICING_ASSERT_OK_AND_ASSIGN(
+    ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
         DocumentFilterData email_data,
-        document_store->GetDocumentFilterData(email_document_id));
+        document_store->GetAliveDocumentFilterData(
+            email_document_id, fake_clock_.GetSystemTimeMilliseconds()));
     EXPECT_THAT(email_data.schema_type_id(), Eq(email_schema_type_id));
     email_namespace_id = email_data.namespace_id();
     email_expiration_timestamp = email_data.expiration_timestamp_ms();
@@ -1443,24 +2620,16 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
         document_store->Put(DocumentProto(message_document)));
     EXPECT_THAT(document_store->Get(message_document_id),
                 IsOkAndHolds(EqualsProto(message_document)));
-    ICING_ASSERT_OK_AND_ASSIGN(
+    ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
         DocumentFilterData message_data,
-        document_store->GetDocumentFilterData(message_document_id));
+        document_store->GetAliveDocumentFilterData(
+            message_document_id, fake_clock_.GetSystemTimeMilliseconds()));
     EXPECT_THAT(message_data.schema_type_id(), Eq(message_schema_type_id));
     message_namespace_id = message_data.namespace_id();
     message_expiration_timestamp = message_data.expiration_timestamp_ms();
   }  // Everything destructs and commits changes to file
 
-  // Change the DocumentStore's header combined checksum so that it won't match
-  // the recalculated checksum on initialization. This will force a regeneration
-  // of derived files from ground truth.
-  const std::string header_file =
-      absl_ports::StrCat(document_store_dir_, "/document_store_header");
-  DocumentStore::Header header;
-  header.magic = DocumentStore::Header::kMagic;
-  header.checksum = 10;  // Arbitrary garbage checksum
-  filesystem_.DeleteFile(header_file.c_str());
-  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+  CorruptDocStoreHeaderChecksumFile();
 
   // Change the schema so that we don't know of the Document's type anymore.
   // Since we can't set backwards incompatible changes, we do some file-level
@@ -1470,11 +2639,14 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir));
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+
+  SchemaProto schema = SchemaBuilder()
+                           .AddType(SchemaTypeConfigBuilder().SetType("email"))
+                           .Build();
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
                              schema_store->GetSchemaTypeId("email"));
@@ -1482,16 +2654,19 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
   // Successfully recover from a corrupt derived file issue. We don't fail just
   // because the "message" schema type is missing
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   // "email" document is fine
   EXPECT_THAT(document_store->Get(email_document_id),
               IsOkAndHolds(EqualsProto(email_document)));
-  ICING_ASSERT_OK_AND_ASSIGN(
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
       DocumentFilterData email_data,
-      document_store->GetDocumentFilterData(email_document_id));
+      document_store->GetAliveDocumentFilterData(
+          email_document_id, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(email_data.schema_type_id(), Eq(email_schema_type_id));
   // Make sure that all the other fields are stll valid/the same
   EXPECT_THAT(email_data.namespace_id(), Eq(email_namespace_id));
@@ -1501,9 +2676,10 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
   // "message" document has an invalid SchemaTypeId
   EXPECT_THAT(document_store->Get(message_document_id),
               IsOkAndHolds(EqualsProto(message_document)));
-  ICING_ASSERT_OK_AND_ASSIGN(
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
       DocumentFilterData message_data,
-      document_store->GetDocumentFilterData(message_document_id));
+      document_store->GetAliveDocumentFilterData(
+          message_document_id, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(message_data.schema_type_id(), Eq(-1));
   // Make sure that all the other fields are stll valid/the same
   EXPECT_THAT(message_data.namespace_id(), Eq(message_namespace_id));
@@ -1511,22 +2687,24 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
               Eq(message_expiration_timestamp));
 }
 
-TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
+TEST_P(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
   const std::string schema_store_dir = test_dir_ + "_custom";
   filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
 
   // Set a schema
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir));
-  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id,
                              schema_store->GetSchemaTypeId("email"));
@@ -1547,32 +2725,38 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
 
   // Add the documents and check SchemaTypeIds match
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
                              document_store->Put(email_document));
-  ICING_ASSERT_OK_AND_ASSIGN(
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
       DocumentFilterData email_data,
-      document_store->GetDocumentFilterData(email_document_id));
+      document_store->GetAliveDocumentFilterData(
+          email_document_id, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(email_data.schema_type_id(), Eq(old_email_schema_type_id));
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
                              document_store->Put(message_document));
-  ICING_ASSERT_OK_AND_ASSIGN(
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
       DocumentFilterData message_data,
-      document_store->GetDocumentFilterData(message_document_id));
+      document_store->GetAliveDocumentFilterData(
+          message_document_id, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(message_data.schema_type_id(), Eq(old_message_schema_type_id));
 
   // Rearrange the schema types. Since SchemaTypeId is assigned based on order,
   // this should change the SchemaTypeIds.
-  schema.clear_types();
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
-  type_config = schema.add_types();
-  type_config->set_schema_type("email");
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("message"))
+               .AddType(SchemaTypeConfigBuilder().SetType("email"))
+               .Build();
 
-  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId new_email_schema_type_id,
                              schema_store->GetSchemaTypeId("email"));
@@ -1586,38 +2770,40 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
   ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get()));
 
   // Check that the FilterCache holds the new SchemaTypeIds
-  ICING_ASSERT_OK_AND_ASSIGN(
-      email_data, document_store->GetDocumentFilterData(email_document_id));
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      email_data,
+      document_store->GetAliveDocumentFilterData(
+          email_document_id, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(email_data.schema_type_id(), Eq(new_email_schema_type_id));
 
-  ICING_ASSERT_OK_AND_ASSIGN(
-      message_data, document_store->GetDocumentFilterData(message_document_id));
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      message_data,
+      document_store->GetAliveDocumentFilterData(
+          message_document_id, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(message_data.schema_type_id(), Eq(new_message_schema_type_id));
 }
 
-TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
+TEST_P(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
   const std::string schema_store_dir = test_dir_ + "_custom";
   filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
 
   // Set a schema
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-
-  auto property_config = type_config->add_properties();
-  property_config->set_property_name("subject");
-  property_config->set_data_type(PropertyConfigProto::DataType::STRING);
-  property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property_config->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  property_config->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir));
-  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   // Add two documents, with and without a subject
   DocumentProto email_without_subject = DocumentBuilder()
@@ -1637,9 +2823,12 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
 
   // Insert documents and check they're ok
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_without_subject_document_id,
                              document_store->Put(email_without_subject));
   EXPECT_THAT(document_store->Get(email_without_subject_document_id),
@@ -1656,7 +2845,8 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
       PropertyConfigProto::Cardinality::REQUIRED);
 
   ICING_EXPECT_OK(schema_store->SetSchema(
-      schema, /*ignore_errors_and_delete_documents=*/true));
+      schema, /*ignore_errors_and_delete_documents=*/true,
+      /*allow_circular_schema_definitions=*/false));
 
   ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get()));
 
@@ -1669,23 +2859,25 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
               IsOkAndHolds(EqualsProto(email_with_subject)));
 }
 
-TEST_F(DocumentStoreTest,
+TEST_P(DocumentStoreTest,
        UpdateSchemaStoreDeletesDocumentsByDeletedSchemaType) {
   const std::string schema_store_dir = test_dir_ + "_custom";
   filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
 
   // Set a schema
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir));
-  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   // Add a "email" and "message" document
   DocumentProto email_document = DocumentBuilder()
@@ -1704,9 +2896,12 @@ TEST_F(DocumentStoreTest,
 
   // Insert documents and check they're ok
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
                              document_store->Put(email_document));
   EXPECT_THAT(document_store->Get(email_document_id),
@@ -1717,13 +2912,15 @@ TEST_F(DocumentStoreTest,
   EXPECT_THAT(document_store->Get(message_document_id),
               IsOkAndHolds(EqualsProto(message_document)));
 
-  SchemaProto new_schema;
-  type_config = new_schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   ICING_EXPECT_OK(
       schema_store->SetSchema(new_schema,
-                              /*ignore_errors_and_delete_documents=*/true));
+                              /*ignore_errors_and_delete_documents=*/true,
+                              /*allow_circular_schema_definitions=*/false));
 
   ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get()));
 
@@ -1736,22 +2933,24 @@ TEST_F(DocumentStoreTest,
               IsOkAndHolds(EqualsProto(message_document)));
 }
 
-TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
+TEST_P(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
   const std::string schema_store_dir = test_dir_ + "_custom";
   filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
 
   // Set a schema
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir));
-  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id,
                              schema_store->GetSchemaTypeId("email"));
@@ -1772,33 +2971,40 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
 
   // Add the documents and check SchemaTypeIds match
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
                              document_store->Put(email_document));
-  ICING_ASSERT_OK_AND_ASSIGN(
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
       DocumentFilterData email_data,
-      document_store->GetDocumentFilterData(email_document_id));
+      document_store->GetAliveDocumentFilterData(
+          email_document_id, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(email_data.schema_type_id(), Eq(old_email_schema_type_id));
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
                              document_store->Put(message_document));
-  ICING_ASSERT_OK_AND_ASSIGN(
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
       DocumentFilterData message_data,
-      document_store->GetDocumentFilterData(message_document_id));
+      document_store->GetAliveDocumentFilterData(
+          message_document_id, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(message_data.schema_type_id(), Eq(old_message_schema_type_id));
 
   // Rearrange the schema types. Since SchemaTypeId is assigned based on order,
   // this should change the SchemaTypeIds.
-  schema.clear_types();
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
-  type_config = schema.add_types();
-  type_config->set_schema_type("email");
+  schema = SchemaBuilder()
+               .AddType(SchemaTypeConfigBuilder().SetType("message"))
+               .AddType(SchemaTypeConfigBuilder().SetType("email"))
+               .Build();
 
-  ICING_ASSERT_OK_AND_ASSIGN(SchemaStore::SetSchemaResult set_schema_result,
-                             schema_store->SetSchema(schema));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SchemaStore::SetSchemaResult set_schema_result,
+      schema_store->SetSchema(schema,
+                              /*ignore_errors_and_delete_documents=*/false,
+                              /*allow_circular_schema_definitions=*/false));
 
   ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId new_email_schema_type_id,
                              schema_store->GetSchemaTypeId("email"));
@@ -1813,38 +3019,40 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
       schema_store.get(), set_schema_result));
 
   // Check that the FilterCache holds the new SchemaTypeIds
-  ICING_ASSERT_OK_AND_ASSIGN(
-      email_data, document_store->GetDocumentFilterData(email_document_id));
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      email_data,
+      document_store->GetAliveDocumentFilterData(
+          email_document_id, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(email_data.schema_type_id(), Eq(new_email_schema_type_id));
 
-  ICING_ASSERT_OK_AND_ASSIGN(
-      message_data, document_store->GetDocumentFilterData(message_document_id));
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      message_data,
+      document_store->GetAliveDocumentFilterData(
+          message_document_id, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(message_data.schema_type_id(), Eq(new_message_schema_type_id));
 }
 
-TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
+TEST_P(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
   const std::string schema_store_dir = test_dir_ + "_custom";
   filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
 
   // Set a schema
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-
-  auto property_config = type_config->add_properties();
-  property_config->set_property_name("subject");
-  property_config->set_data_type(PropertyConfigProto::DataType::STRING);
-  property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property_config->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  property_config->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("subject")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir));
-  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   // Add two documents, with and without a subject
   DocumentProto email_without_subject = DocumentBuilder()
@@ -1864,9 +3072,12 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
 
   // Insert documents and check they're ok
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_without_subject_document_id,
                              document_store->Put(email_without_subject));
   EXPECT_THAT(document_store->Get(email_without_subject_document_id),
@@ -1885,7 +3096,8 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
   ICING_ASSERT_OK_AND_ASSIGN(
       SchemaStore::SetSchemaResult set_schema_result,
       schema_store->SetSchema(schema,
-                              /*ignore_errors_and_delete_documents=*/true));
+                              /*ignore_errors_and_delete_documents=*/true,
+                              /*allow_circular_schema_definitions=*/false));
 
   ICING_EXPECT_OK(document_store->OptimizedUpdateSchemaStore(
       schema_store.get(), set_schema_result));
@@ -1899,23 +3111,25 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
               IsOkAndHolds(EqualsProto(email_with_subject)));
 }
 
-TEST_F(DocumentStoreTest,
+TEST_P(DocumentStoreTest,
        OptimizedUpdateSchemaStoreDeletesDocumentsByDeletedSchemaType) {
   const std::string schema_store_dir = test_dir_ + "_custom";
   filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
   filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
 
   // Set a schema
-  SchemaProto schema;
-  auto type_config = schema.add_types();
-  type_config->set_schema_type("email");
-  type_config = schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("email"))
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, schema_store_dir));
-  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
 
   // Add a "email" and "message" document
   DocumentProto email_document = DocumentBuilder()
@@ -1934,9 +3148,12 @@ TEST_F(DocumentStoreTest,
 
   // Insert documents and check they're ok
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
                              document_store->Put(email_document));
   EXPECT_THAT(document_store->Get(email_document_id),
@@ -1947,14 +3164,16 @@ TEST_F(DocumentStoreTest,
   EXPECT_THAT(document_store->Get(message_document_id),
               IsOkAndHolds(EqualsProto(message_document)));
 
-  SchemaProto new_schema;
-  type_config = new_schema.add_types();
-  type_config->set_schema_type("message");
+  SchemaProto new_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("message"))
+          .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(
       SchemaStore::SetSchemaResult set_schema_result,
       schema_store->SetSchema(new_schema,
-                              /*ignore_errors_and_delete_documents=*/true));
+                              /*ignore_errors_and_delete_documents=*/true,
+                              /*allow_circular_schema_definitions=*/false));
 
   ICING_EXPECT_OK(document_store->OptimizedUpdateSchemaStore(
       schema_store.get(), set_schema_result));
@@ -1968,11 +3187,13 @@ TEST_F(DocumentStoreTest,
               IsOkAndHolds(EqualsProto(message_document)));
 }
 
-TEST_F(DocumentStoreTest, GetOptimizeInfo) {
+TEST_P(DocumentStoreTest, GetOptimizeInfo) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   // Nothing should be optimizable yet
   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::OptimizeInfo optimize_info,
@@ -1990,8 +3211,9 @@ TEST_F(DocumentStoreTest, GetOptimizeInfo) {
   EXPECT_THAT(optimize_info.estimated_optimizable_bytes, Eq(0));
 
   // Delete a document. Now something is optimizable
-  ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
-                                         test_document1_.uri()));
+  ICING_EXPECT_OK(document_store->Delete(
+      test_document1_.namespace_(), test_document1_.uri(),
+      fake_clock_.GetSystemTimeMilliseconds()));
   ICING_ASSERT_OK_AND_ASSIGN(optimize_info, document_store->GetOptimizeInfo());
   EXPECT_THAT(optimize_info.total_docs, Eq(1));
   EXPECT_THAT(optimize_info.optimizable_docs, Eq(1));
@@ -2002,12 +3224,14 @@ TEST_F(DocumentStoreTest, GetOptimizeInfo) {
   std::string optimized_dir = document_store_dir_ + "_optimize";
   EXPECT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
   EXPECT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
-  ICING_ASSERT_OK(document_store->OptimizeInto(optimized_dir));
+  ICING_ASSERT_OK(
+      document_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
   document_store.reset();
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> optimized_document_store,
-      DocumentStore::Create(&filesystem_, optimized_dir, &fake_clock_,
-                            schema_store_.get()));
+      create_result, CreateDocumentStore(&filesystem_, optimized_dir,
+                                         &fake_clock_, schema_store_.get()));
+  std::unique_ptr<DocumentStore> optimized_document_store =
+      std::move(create_result.document_store);
 
   ICING_ASSERT_OK_AND_ASSIGN(optimize_info,
                              optimized_document_store->GetOptimizeInfo());
@@ -2016,11 +3240,13 @@ TEST_F(DocumentStoreTest, GetOptimizeInfo) {
   EXPECT_THAT(optimize_info.estimated_optimizable_bytes, Eq(0));
 }
 
-TEST_F(DocumentStoreTest, GetAllNamespaces) {
+TEST_P(DocumentStoreTest, GetAllNamespaces) {
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<DocumentStore> document_store,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
 
   // Empty namespaces to start with
   EXPECT_THAT(document_store->GetAllNamespaces(), IsEmpty());
@@ -2066,13 +3292,15 @@ TEST_F(DocumentStoreTest, GetAllNamespaces) {
 
   // After deleting namespace2_uri1, there's still namespace2_uri2, so
   // "namespace2" still shows up in results
-  ICING_EXPECT_OK(document_store->Delete("namespace2", "uri1"));
+  ICING_EXPECT_OK(document_store->Delete(
+      "namespace2", "uri1", fake_clock_.GetSystemTimeMilliseconds()));
 
   EXPECT_THAT(document_store->GetAllNamespaces(),
               UnorderedElementsAre("namespace1", "namespace2", "namespace3"));
 
   // After deleting namespace2_uri2, there's no more documents in "namespace2"
-  ICING_EXPECT_OK(document_store->Delete("namespace2", "uri2"));
+  ICING_EXPECT_OK(document_store->Delete(
+      "namespace2", "uri2", fake_clock_.GetSystemTimeMilliseconds()));
 
   EXPECT_THAT(document_store->GetAllNamespaces(),
               UnorderedElementsAre("namespace1", "namespace3"));
@@ -2084,5 +3312,1587 @@ TEST_F(DocumentStoreTest, GetAllNamespaces) {
               UnorderedElementsAre("namespace1"));
 }
 
+TEST_P(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store->Put(test_document1_));
+
+  // Report usage with type 1 and time 1.
+  UsageReport usage_report_type1_time1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1_time1));
+
+  UsageStore::UsageScores expected_scores;
+  expected_scores.usage_type1_last_used_timestamp_s = 1;
+  ++expected_scores.usage_type1_count;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      UsageStore::UsageScores actual_scores,
+      document_store->GetUsageScores(document_id,
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+
+  // Report usage with type 1 and time 5, time should be updated.
+  UsageReport usage_report_type1_time5 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1_time5));
+
+  expected_scores.usage_type1_last_used_timestamp_s = 5;
+  ++expected_scores.usage_type1_count;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      actual_scores, document_store->GetUsageScores(
+                         document_id, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+
+  // Report usage with type 2 and time 1.
+  UsageReport usage_report_type2_time1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE2);
+  ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type2_time1));
+
+  expected_scores.usage_type2_last_used_timestamp_s = 1;
+  ++expected_scores.usage_type2_count;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      actual_scores, document_store->GetUsageScores(
+                         document_id, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+
+  // Report usage with type 2 and time 5.
+  UsageReport usage_report_type2_time5 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+      UsageReport::USAGE_TYPE2);
+  ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type2_time5));
+
+  expected_scores.usage_type2_last_used_timestamp_s = 5;
+  ++expected_scores.usage_type2_count;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      actual_scores, document_store->GetUsageScores(
+                         document_id, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+
+  // Report usage with type 3 and time 1.
+  UsageReport usage_report_type3_time1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE3);
+  ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type3_time1));
+
+  expected_scores.usage_type3_last_used_timestamp_s = 1;
+  ++expected_scores.usage_type3_count;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      actual_scores, document_store->GetUsageScores(
+                         document_id, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+
+  // Report usage with type 3 and time 5.
+  UsageReport usage_report_type3_time5 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+      UsageReport::USAGE_TYPE3);
+  ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type3_time5));
+
+  expected_scores.usage_type3_last_used_timestamp_s = 5;
+  ++expected_scores.usage_type3_count;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      actual_scores, document_store->GetUsageScores(
+                         document_id, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+}
+
+TEST_P(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store->Put(test_document1_));
+
+  // Report usage with type 1.
+  UsageReport usage_report_type1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+  UsageStore::UsageScores expected_scores;
+  ++expected_scores.usage_type1_count;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      UsageStore::UsageScores actual_scores,
+      document_store->GetUsageScores(document_id,
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+
+  // Report usage with type 2.
+  UsageReport usage_report_type2 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE2);
+  ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type2));
+
+  ++expected_scores.usage_type2_count;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      actual_scores, document_store->GetUsageScores(
+                         document_id, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+
+  // Report usage with type 3.
+  UsageReport usage_report_type3 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE3);
+  ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type3));
+
+  ++expected_scores.usage_type3_count;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      actual_scores, document_store->GetUsageScores(
+                         document_id, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+}
+
+TEST_P(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) {
+  UsageStore::UsageScores expected_scores;
+  DocumentId document_id;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+    std::unique_ptr<DocumentStore> document_store =
+        std::move(create_result.document_store);
+
+    ICING_ASSERT_OK_AND_ASSIGN(document_id,
+                               document_store->Put(test_document1_));
+
+    // Report usage with type 1.
+    UsageReport usage_report_type1 = CreateUsageReport(
+        /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+        UsageReport::USAGE_TYPE1);
+    ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+    ++expected_scores.usage_type1_count;
+    ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+        UsageStore::UsageScores actual_scores,
+        document_store->GetUsageScores(
+            document_id, fake_clock_.GetSystemTimeMilliseconds()));
+    EXPECT_THAT(actual_scores, Eq(expected_scores));
+  }
+
+  CorruptDocStoreHeaderChecksumFile();
+  // Successfully recover from a corrupt derived file issue.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  // Usage scores should be the same.
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      UsageStore::UsageScores actual_scores,
+      document_store->GetUsageScores(document_id,
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+}
+
+TEST_P(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
+  UsageStore::UsageScores expected_scores;
+  DocumentId document_id;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+    std::unique_ptr<DocumentStore> document_store =
+        std::move(create_result.document_store);
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id, document_store->Put(DocumentProto(test_document1_)));
+
+    // Report usage with type 1.
+    UsageReport usage_report_type1 = CreateUsageReport(
+        /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+        UsageReport::USAGE_TYPE1);
+    ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+    ++expected_scores.usage_type1_count;
+    ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+        UsageStore::UsageScores actual_scores,
+        document_store->GetUsageScores(
+            document_id, fake_clock_.GetSystemTimeMilliseconds()));
+    EXPECT_THAT(actual_scores, Eq(expected_scores));
+  }
+
+  // "Corrupt" the content written in the log by adding non-checksummed data to
+  // it. This will mess up the checksum of the proto log, forcing it to rewind
+  // to the last saved point.
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+  const std::string serialized_document = document.SerializeAsString();
+
+  const std::string document_log_file = absl_ports::StrCat(
+      document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
+  int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
+  filesystem_.PWrite(document_log_file.c_str(), file_size,
+                     serialized_document.data(), serialized_document.size());
+
+  // Successfully recover from a data loss issue.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  // Usage scores should still be available.
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      UsageStore::UsageScores actual_scores,
+      document_store->GetUsageScores(document_id,
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+}
+
+TEST_P(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store->Put(DocumentProto(test_document1_)));
+
+  // Report usage with type 1.
+  UsageReport usage_report_type1 = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+  UsageStore::UsageScores expected_scores;
+  ++expected_scores.usage_type1_count;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      UsageStore::UsageScores actual_scores,
+      document_store->GetUsageScores(document_id,
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+
+  // Update the document.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId updated_document_id,
+      document_store->Put(DocumentProto(test_document1_)));
+  // We should get a different document id.
+  ASSERT_THAT(updated_document_id, Not(Eq(document_id)));
+
+  // Usage scores should be the same.
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      actual_scores,
+      document_store->GetUsageScores(updated_document_id,
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+}
+
+TEST_P(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store->Put(DocumentProto(test_document1_)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store->Put(DocumentProto(test_document2_)));
+  ICING_ASSERT_OK(document_store->Delete(
+      document_id1, fake_clock_.GetSystemTimeMilliseconds()));
+
+  // Report usage of document 2.
+  UsageReport usage_report = CreateUsageReport(
+      /*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/0,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(document_store->ReportUsage(usage_report));
+
+  UsageStore::UsageScores expected_scores;
+  ++expected_scores.usage_type1_count;
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      UsageStore::UsageScores actual_scores,
+      document_store->GetUsageScores(document_id2,
+                                     fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+
+  // Run optimize
+  std::string optimized_dir = document_store_dir_ + "/optimize_test";
+  filesystem_.CreateDirectoryRecursively(optimized_dir.c_str());
+  ICING_ASSERT_OK(
+      document_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+
+  // Get optimized document store
+  ICING_ASSERT_OK_AND_ASSIGN(
+      create_result, CreateDocumentStore(&filesystem_, optimized_dir,
+                                         &fake_clock_, schema_store_.get()));
+  std::unique_ptr<DocumentStore> optimized_document_store =
+      std::move(create_result.document_store);
+
+  // Usage scores should be the same.
+  // The original document_id2 should have become document_id2 - 1.
+  ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+      actual_scores,
+      optimized_document_store->GetUsageScores(
+          document_id2 - 1, fake_clock_.GetSystemTimeMilliseconds()));
+  EXPECT_THAT(actual_scores, Eq(expected_scores));
+}
+
+TEST_P(DocumentStoreTest, DetectPartialDataLoss) {
+  {
+    // Can put and delete fine.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+    EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+    EXPECT_THAT(create_result.derived_files_regenerated, IsFalse());
+
+    ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                               doc_store->Put(DocumentProto(test_document1_)));
+    EXPECT_THAT(doc_store->Get(document_id),
+                IsOkAndHolds(EqualsProto(test_document1_)));
+  }
+
+  // "Corrupt" the content written in the log by adding non-checksummed data to
+  // it. This will mess up the checksum of the proto log, forcing it to rewind
+  // to the last saved point and triggering data loss.
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+  const std::string serialized_document = document.SerializeAsString();
+
+  const std::string document_log_file =
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str();
+  int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
+  filesystem_.PWrite(document_log_file.c_str(), file_size,
+                     serialized_document.data(), serialized_document.size());
+
+  // Successfully recover from a data loss issue.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+  EXPECT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
+  EXPECT_THAT(create_result.derived_files_regenerated, IsTrue());
+}
+
+TEST_P(DocumentStoreTest, DetectCompleteDataLoss) {
+  int64_t corruptible_offset;
+  const std::string document_log_file = absl_ports::StrCat(
+      document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
+  {
+    // Can put and delete fine.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+    EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+    EXPECT_THAT(create_result.derived_files_regenerated, IsFalse());
+
+    // There's some space at the beginning of the file (e.g. header, kmagic,
+    // etc) that is necessary to initialize the FileBackedProtoLog. We can't
+    // corrupt that region, so we need to figure out the offset at which
+    // documents will be written to - which is the file size after
+    // initialization.
+    corruptible_offset = filesystem_.GetFileSize(document_log_file.c_str());
+
+    ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                               doc_store->Put(DocumentProto(test_document1_)));
+    EXPECT_THAT(doc_store->Get(document_id),
+                IsOkAndHolds(EqualsProto(test_document1_)));
+  }
+
+  // "Corrupt" the persisted content written in the log. We can't recover if
+  // the persisted data was corrupted.
+  std::string corruption = "abc";
+  filesystem_.PWrite(document_log_file.c_str(),
+                     /*offset=*/corruptible_offset, corruption.data(),
+                     corruption.size());
+
+  {
+    // "Corrupt" the content written in the log. Make the corrupt document
+    // smaller than our original one so we don't accidentally write past our
+    // file.
+    DocumentProto document =
+        DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+    std::string serialized_document = document.SerializeAsString();
+    ASSERT_TRUE(filesystem_.PWrite(
+        document_log_file.c_str(), corruptible_offset,
+        serialized_document.data(), serialized_document.size()));
+
+    PortableFileBackedProtoLog<DocumentWrapper>::Header header =
+        ReadDocumentLogHeader(filesystem_, document_log_file);
+
+    // Set dirty bit to true to reflect that something changed in the log.
+    header.SetDirtyFlag(true);
+    header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+    WriteDocumentLogHeader(filesystem_, document_log_file, header);
+  }
+
+  // Successfully recover from a data loss issue.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+  EXPECT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
+  EXPECT_THAT(create_result.derived_files_regenerated, IsTrue());
+}
+
+TEST_P(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
+  // The directory testdata/score_cache_without_length_in_tokens/document_store
+  // contains only the scoring_cache and the document_store_header (holding the
+  // crc for the scoring_cache). If the current code is compatible with the
+  // format of the v0 scoring_cache, then an empty document store should be
+  // initialized, but the non-empty scoring_cache should be retained. The
+  // current document-asscoiated-score-data has a new field with respect to the
+  // ones stored in testdata/score_cache_Without_length_in_tokens, hence the
+  // document store's initialization requires regenerating its derived files.
+
+  // Create dst directory
+  ASSERT_THAT(filesystem_.CreateDirectory(document_store_dir_.c_str()), true);
+
+  // Get src files
+  std::string document_store_without_length_in_tokens;
+  if (IsAndroidArm() || IsIosPlatform()) {
+    document_store_without_length_in_tokens = GetTestFilePath(
+        "icing/testdata/score_cache_without_length_in_tokens/"
+        "document_store_android_ios_compatible");
+  } else if (IsAndroidX86()) {
+    document_store_without_length_in_tokens = GetTestFilePath(
+        "icing/testdata/score_cache_without_length_in_tokens/"
+        "document_store_android_x86");
+  } else {
+    document_store_without_length_in_tokens = GetTestFilePath(
+        "icing/testdata/score_cache_without_length_in_tokens/"
+        "document_store");
+  }
+  Filesystem filesystem;
+  ICING_LOG(INFO) << "Copying files "
+                  << document_store_without_length_in_tokens;
+  ASSERT_THAT(
+      filesystem.CopyDirectory(document_store_without_length_in_tokens.c_str(),
+                               document_store_dir_.c_str(), /*recursive=*/true),
+      true);
+
+  InitializeStatsProto initialize_stats;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(
+          &filesystem_, document_store_dir_, &fake_clock_, schema_store_.get(),
+          /*force_recovery_and_revalidate_documents=*/false,
+          GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+          GetParam().use_persistent_hash_map,
+          PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+          &initialize_stats));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+  // The document log is using the legacy v0 format so that a migration is
+  // needed, which will also trigger regeneration.
+  EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
+              Eq(InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT));
+  // There should be no data loss, but we still need to regenerate derived files
+  // since we migrated document log from v0 to v1.
+  EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+  EXPECT_THAT(create_result.derived_files_regenerated, IsTrue());
+}
+
+TEST_P(DocumentStoreTest, DocumentStoreStorageInfo) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  // Add three documents.
+  DocumentProto document1 = test_document1_;
+  document1.set_namespace_("namespace.1");
+  document1.set_uri("uri1");
+  ICING_ASSERT_OK(doc_store->Put(document1));
+
+  DocumentProto document2 = test_document1_;
+  document2.set_namespace_("namespace.1");
+  document2.set_uri("uri2");
+  document2.set_creation_timestamp_ms(fake_clock_.GetSystemTimeMilliseconds());
+  document2.set_ttl_ms(100);
+  ICING_ASSERT_OK(doc_store->Put(document2));
+
+  DocumentProto document3 = test_document1_;
+  document3.set_namespace_("namespace.1");
+  document3.set_uri("uri3");
+  ICING_ASSERT_OK(doc_store->Put(document3));
+
+  DocumentProto document4 = test_document1_;
+  document4.set_namespace_("namespace.2");
+  document4.set_uri("uri1");
+  ICING_ASSERT_OK(doc_store->Put(document4));
+
+  // Report usage with type 1 on document1
+  UsageReport usage_report_type1 = CreateUsageReport(
+      /*name_space=*/"namespace.1", /*uri=*/"uri1", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
+
+  // Report usage with type 2 on document2
+  UsageReport usage_report_type2 = CreateUsageReport(
+      /*name_space=*/"namespace.1", /*uri=*/"uri2", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE2);
+  ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type2));
+
+  // Report usage with type 3 on document3
+  UsageReport usage_report_type3 = CreateUsageReport(
+      /*name_space=*/"namespace.1", /*uri=*/"uri3", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE3);
+  ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type3));
+
+  // Report usage with type 1 on document4
+  usage_report_type1 = CreateUsageReport(
+      /*name_space=*/"namespace.2", /*uri=*/"uri1", /*timestamp_ms=*/1000,
+      UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
+
+  // Delete the first doc.
+  ICING_ASSERT_OK(doc_store->Delete(document1.namespace_(), document1.uri(),
+                                    fake_clock_.GetSystemTimeMilliseconds()));
+
+  // Expire the second doc.
+  fake_clock_.SetSystemTimeMilliseconds(document2.creation_timestamp_ms() +
+                                        document2.ttl_ms() + 1);
+
+  // Check high level info
+  DocumentStorageInfoProto storage_info = doc_store->GetStorageInfo();
+  EXPECT_THAT(storage_info.num_alive_documents(), Eq(2));
+  EXPECT_THAT(storage_info.num_deleted_documents(), Eq(1));
+  EXPECT_THAT(storage_info.num_expired_documents(), Eq(1));
+  EXPECT_THAT(storage_info.document_store_size(), Ge(0));
+  EXPECT_THAT(storage_info.document_log_size(), Ge(0));
+  EXPECT_THAT(storage_info.key_mapper_size(), Ge(0));
+  EXPECT_THAT(storage_info.document_id_mapper_size(), Ge(0));
+  EXPECT_THAT(storage_info.score_cache_size(), Ge(0));
+  EXPECT_THAT(storage_info.filter_cache_size(), Ge(0));
+  EXPECT_THAT(storage_info.corpus_mapper_size(), Ge(0));
+  EXPECT_THAT(storage_info.corpus_score_cache_size(), Ge(0));
+  EXPECT_THAT(storage_info.namespace_id_mapper_size(), Ge(0));
+  EXPECT_THAT(storage_info.num_namespaces(), Eq(2));
+
+  // Check per-namespace info
+  EXPECT_THAT(storage_info.namespace_storage_info_size(), Eq(2));
+
+  NamespaceStorageInfoProto namespace_storage_info =
+      GetNamespaceStorageInfo(storage_info, "namespace.1");
+  EXPECT_THAT(namespace_storage_info.num_alive_documents(), Eq(1));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents(), Eq(1));
+  EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type1(), Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type2(), Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type3(), Eq(1));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type1(),
+              Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type2(),
+              Eq(1));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type3(),
+              Eq(0));
+
+  namespace_storage_info = GetNamespaceStorageInfo(storage_info, "namespace.2");
+  EXPECT_THAT(namespace_storage_info.num_alive_documents(), Eq(1));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents(), Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type1(), Eq(1));
+  EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type2(), Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type3(), Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type1(),
+              Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type2(),
+              Eq(0));
+  EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type3(),
+              Eq(0));
+}
+
+TEST_P(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
+  // Start fresh and set the schema with one type.
+  filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("body")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ASSERT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+  // The typeid for "email" should be 0.
+  ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+
+  DocumentId docid = kInvalidDocumentId;
+  {
+    // Create the document store the first time and add an email document.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    DocumentProto doc =
+        DocumentBuilder()
+            .SetKey("icing", "email/1")
+            .SetSchema("email")
+            .AddStringProperty("subject", "subject foo")
+            .AddStringProperty("body", "body bar")
+            .SetScore(document1_score_)
+            .SetCreationTimestampMs(
+                document1_creation_timestamp_)  // A random timestamp
+            .SetTtlMs(document1_ttl_)
+            .Build();
+    ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc));
+    ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+        DocumentFilterData filter_data,
+        doc_store->GetAliveDocumentFilterData(
+            docid, fake_clock_.GetSystemTimeMilliseconds()));
+
+    ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
+  }
+
+  // Add another type to the schema before the email type.
+  schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("alarm")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("time")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(email_type_config)
+          .Build();
+  ASSERT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+  // Adding a new type should cause ids to be reassigned. Ids are assigned in
+  // order of appearance so 'alarm' should be 0 and 'email' should be 1.
+  ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0));
+  ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(1));
+
+  {
+    // Create the document store the second time and force recovery
+    InitializeStatsProto initialize_stats;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+            /*force_recovery_and_revalidate_documents=*/true,
+            GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+            GetParam().use_persistent_hash_map,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            &initialize_stats));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    // Ensure that the type id of the email document has been correctly updated.
+    ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+        DocumentFilterData filter_data,
+        doc_store->GetAliveDocumentFilterData(
+            docid, fake_clock_.GetSystemTimeMilliseconds()));
+    EXPECT_THAT(filter_data.schema_type_id(), Eq(1));
+    EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
+                Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+  }
+}
+
+TEST_P(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
+  // Start fresh and set the schema with one type.
+  filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("body")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ASSERT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+  // The typeid for "email" should be 0.
+  ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+
+  DocumentId docid = kInvalidDocumentId;
+  {
+    // Create the document store the first time and add an email document.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    DocumentProto doc =
+        DocumentBuilder()
+            .SetKey("icing", "email/1")
+            .SetSchema("email")
+            .AddStringProperty("subject", "subject foo")
+            .AddStringProperty("body", "body bar")
+            .SetScore(document1_score_)
+            .SetCreationTimestampMs(
+                document1_creation_timestamp_)  // A random timestamp
+            .SetTtlMs(document1_ttl_)
+            .Build();
+    ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc));
+    ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+        DocumentFilterData filter_data,
+        doc_store->GetAliveDocumentFilterData(
+            docid, fake_clock_.GetSystemTimeMilliseconds()));
+
+    ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
+  }
+
+  // Add another type to the schema.
+  schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("alarm")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("time")
+                                        .SetDataType(TYPE_INT64)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(email_type_config)
+          .Build();
+  ASSERT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+  // Adding a new type should cause ids to be reassigned. Ids are assigned in
+  // order of appearance so 'alarm' should be 0 and 'email' should be 1.
+  ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0));
+  ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(1));
+
+  {
+    // Create the document store the second time. Don't force recovery.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    // Check that the type id of the email document has not been updated.
+    ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+        DocumentFilterData filter_data,
+        doc_store->GetAliveDocumentFilterData(
+            docid, fake_clock_.GetSystemTimeMilliseconds()));
+    ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
+  }
+}
+
+TEST_P(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
+  // Start fresh and set the schema with one type.
+  filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("body")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ASSERT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  DocumentProto docWithBody =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .AddStringProperty("body", "body bar")
+          .SetScore(document1_score_)
+          .SetCreationTimestampMs(
+              document1_creation_timestamp_)  // A random timestamp
+          .SetTtlMs(document1_ttl_)
+          .Build();
+  DocumentProto docWithoutBody =
+      DocumentBuilder()
+          .SetKey("icing", "email/2")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetScore(document1_score_)
+          .SetCreationTimestampMs(
+              document1_creation_timestamp_)  // A random timestamp
+          .SetTtlMs(document1_ttl_)
+          .Build();
+
+  {
+    // Create the document store the first time and add two email documents: one
+    // that has the 'body' section and one that doesn't.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    DocumentId docid = kInvalidDocumentId;
+    ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody));
+    ASSERT_NE(docid, kInvalidDocumentId);
+    docid = kInvalidDocumentId;
+    ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody));
+    ASSERT_NE(docid, kInvalidDocumentId);
+
+    ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+                IsOkAndHolds(EqualsProto(docWithBody)));
+    ASSERT_THAT(
+        doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+        IsOkAndHolds(EqualsProto(docWithoutBody)));
+  }
+
+  // Delete the 'body' property from the 'email' type, making all pre-existing
+  // documents with the 'body' property invalid.
+  email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  schema = SchemaBuilder().AddType(email_type_config).Build();
+  ASSERT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/true,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  {
+    // Create the document store the second time and force recovery
+    CorruptDocStoreHeaderChecksumFile();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+            /*force_recovery_and_revalidate_documents=*/true,
+            GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+            GetParam().use_persistent_hash_map,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            /*initialize_stats=*/nullptr));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+    ASSERT_THAT(
+        doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+        IsOkAndHolds(EqualsProto(docWithoutBody)));
+  }
+}
+
+TEST_P(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
+  // Start fresh and set the schema with one type.
+  filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+  SchemaTypeConfigProto email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("body")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+  ASSERT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  DocumentProto docWithBody =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .AddStringProperty("body", "body bar")
+          .SetScore(document1_score_)
+          .SetCreationTimestampMs(
+              document1_creation_timestamp_)  // A random timestamp
+          .SetTtlMs(document1_ttl_)
+          .Build();
+  DocumentProto docWithoutBody =
+      DocumentBuilder()
+          .SetKey("icing", "email/2")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetScore(document1_score_)
+          .SetCreationTimestampMs(
+              document1_creation_timestamp_)  // A random timestamp
+          .SetTtlMs(document1_ttl_)
+          .Build();
+
+  {
+    // Create the document store the first time and add two email documents: one
+    // that has the 'body' section and one that doesn't.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    DocumentId docid = kInvalidDocumentId;
+    ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody));
+    ASSERT_NE(docid, kInvalidDocumentId);
+    docid = kInvalidDocumentId;
+    ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody));
+    ASSERT_NE(docid, kInvalidDocumentId);
+
+    ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+                IsOkAndHolds(EqualsProto(docWithBody)));
+    ASSERT_THAT(
+        doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+        IsOkAndHolds(EqualsProto(docWithoutBody)));
+  }
+
+  // Delete the 'body' property from the 'email' type, making all pre-existing
+  // documents with the 'body' property invalid.
+  email_type_config =
+      SchemaTypeConfigBuilder()
+          .SetType("email")
+          .AddProperty(PropertyConfigBuilder()
+                           .SetName("subject")
+                           .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                           .SetCardinality(CARDINALITY_OPTIONAL))
+          .Build();
+  schema = SchemaBuilder().AddType(email_type_config).Build();
+  ASSERT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/true,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  {
+    // Corrupt the document store header checksum so that we will perform
+    // recovery, but without revalidation.
+    CorruptDocStoreHeaderChecksumFile();
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+
+    ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+                IsOkAndHolds(EqualsProto(docWithBody)));
+    ASSERT_THAT(
+        doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+        IsOkAndHolds(EqualsProto(docWithoutBody)));
+  }
+}
+
+TEST_P(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
+  // Set up schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  std::string schema_store_dir = schema_store_dir_ + "_migrate";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+
+  ASSERT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
+
+  // Create dst directory that we'll initialize the DocumentStore over.
+  std::string document_store_dir = document_store_dir_ + "_migrate";
+  ASSERT_THAT(
+      filesystem_.DeleteDirectoryRecursively(document_store_dir.c_str()), true);
+  ASSERT_THAT(
+      filesystem_.CreateDirectoryRecursively(document_store_dir.c_str()), true);
+
+  // Copy the testdata files into our DocumentStore directory
+  std::string document_store_without_portable_log;
+  if (IsAndroidX86()) {
+    document_store_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_android_x86/document_dir");
+  } else if (IsAndroidArm()) {
+    document_store_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_android_arm/document_dir");
+  } else if (IsIosPlatform()) {
+    document_store_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_ios/document_dir");
+  } else {
+    document_store_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_linux/document_dir");
+  }
+
+  ASSERT_TRUE(filesystem_.CopyDirectory(
+      document_store_without_portable_log.c_str(), document_store_dir.c_str(),
+      /*recursive=*/true));
+
+  // Initialize the DocumentStore over our copied files.
+  InitializeStatsProto initialize_stats;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(
+          &filesystem_, document_store_dir, &fake_clock_, schema_store.get(),
+          /*force_recovery_and_revalidate_documents=*/false,
+          GetParam().pre_mapping_fbv, GetParam().use_persistent_hash_map,
+          GetParam().namespace_id_fingerprint,
+          PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+          &initialize_stats));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  // These are the documents that are stored in the testdata files. Do not
+  // change unless you're also updating the testdata files.
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .AddStringProperty("body", "bar")
+                                .Build();
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri2")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(20)
+                                .SetScore(321)
+                                .AddStringProperty("body", "baz bat")
+                                .Build();
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(30)
+                                .SetScore(123)
+                                .AddStringProperty("subject", "phoo")
+                                .Build();
+
+  // Check that we didn't lose anything. A migration also doesn't technically
+  // count as data loss, but we still have to regenerate derived files after
+  // migration.
+  EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+  EXPECT_THAT(create_result.derived_files_regenerated, IsTrue());
+  EXPECT_EQ(initialize_stats.document_store_recovery_cause(),
+            InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT);
+
+  // Document 1 and 3 were put normally, and document 2 was deleted in our
+  // testdata files.
+  //
+  // Check by namespace, uri
+  EXPECT_THAT(document_store->Get(document1.namespace_(), document1.uri()),
+              IsOkAndHolds(EqualsProto(document1)));
+  EXPECT_THAT(document_store->Get(document2.namespace_(), document2.uri()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_store->Get(document3.namespace_(), document3.uri()),
+              IsOkAndHolds(EqualsProto(document3)));
+
+  // Check by document_id
+  EXPECT_THAT(document_store->Get(/*document_id=*/0),
+              IsOkAndHolds(EqualsProto(document1)));
+  EXPECT_THAT(document_store->Get(/*document_id=*/1),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_store->Get(/*document_id=*/2),
+              IsOkAndHolds(EqualsProto(document3)));
+}
+
+TEST_P(DocumentStoreTest, GetDebugInfo) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("subject")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("body")
+                                        .SetDataTypeString(TERM_MATCH_EXACT,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  std::string schema_store_dir = schema_store_dir_ + "_custom";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+
+  ICING_ASSERT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/false,
+      /*allow_circular_schema_definitions=*/false));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "email/1")
+                                .SetSchema("email")
+                                .AddStringProperty("subject", "aa bb cc")
+                                .AddStringProperty("body", "dd ee")
+                                .SetCreationTimestampMs(1)
+                                .Build();
+  ICING_ASSERT_OK(document_store->Put(document1, 5));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "email/2")
+                                .SetSchema("email")
+                                .AddStringProperty("subject", "aa bb")
+                                .AddStringProperty("body", "cc")
+                                .SetCreationTimestampMs(1)
+                                .Build();
+  ICING_ASSERT_OK(document_store->Put(document2, 3));
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "email/3")
+                                .SetSchema("email")
+                                .AddStringProperty("subject", "aa")
+                                .AddStringProperty("body", "")
+                                .SetCreationTimestampMs(1)
+                                .Build();
+  ICING_ASSERT_OK(document_store->Put(document3, 1));
+
+  DocumentProto document4 = DocumentBuilder()
+                                .SetKey("namespace1", "person/1")
+                                .SetSchema("person")
+                                .AddStringProperty("name", "test test")
+                                .SetCreationTimestampMs(1)
+                                .Build();
+  ICING_ASSERT_OK(document_store->Put(document4, 2));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentDebugInfoProto out1,
+      document_store->GetDebugInfo(DebugInfoVerbosity::DETAILED));
+  EXPECT_THAT(out1.crc(), Gt(0));
+  EXPECT_THAT(out1.document_storage_info().num_alive_documents(), Eq(4));
+  EXPECT_THAT(out1.document_storage_info().num_deleted_documents(), Eq(0));
+  EXPECT_THAT(out1.document_storage_info().num_expired_documents(), Eq(0));
+
+  DocumentDebugInfoProto::CorpusInfo info1, info2, info3;
+  info1.set_namespace_("namespace1");
+  info1.set_schema("email");
+  info1.set_total_documents(1);  // document1
+  info1.set_total_token(5);
+
+  info2.set_namespace_("namespace2");
+  info2.set_schema("email");
+  info2.set_total_documents(2);  // document2 and document3
+  info2.set_total_token(4);      // 3 + 1
+
+  info3.set_namespace_("namespace1");
+  info3.set_schema("person");
+  info3.set_total_documents(1);  // document4
+  info3.set_total_token(2);
+
+  EXPECT_THAT(out1.corpus_info(),
+              UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2),
+                                   EqualsProto(info3)));
+
+  // Delete document3.
+  ICING_ASSERT_OK(document_store->Delete(
+      "namespace2", "email/3", fake_clock_.GetSystemTimeMilliseconds()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentDebugInfoProto out2,
+      document_store->GetDebugInfo(DebugInfoVerbosity::DETAILED));
+  EXPECT_THAT(out2.crc(), Gt(0));
+  EXPECT_THAT(out2.crc(), Not(Eq(out1.crc())));
+  EXPECT_THAT(out2.document_storage_info().num_alive_documents(), Eq(3));
+  EXPECT_THAT(out2.document_storage_info().num_deleted_documents(), Eq(1));
+  EXPECT_THAT(out2.document_storage_info().num_expired_documents(), Eq(0));
+  info2.set_total_documents(1);  // document2
+  info2.set_total_token(3);
+  EXPECT_THAT(out2.corpus_info(),
+              UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2),
+                                   EqualsProto(info3)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentDebugInfoProto out3,
+      document_store->GetDebugInfo(DebugInfoVerbosity::BASIC));
+  EXPECT_THAT(out3.corpus_info(), IsEmpty());
+}
+
+TEST_P(DocumentStoreTest, GetDebugInfoWithoutSchema) {
+  std::string schema_store_dir = schema_store_dir_ + "_custom";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentDebugInfoProto out,
+      document_store->GetDebugInfo(DebugInfoVerbosity::DETAILED));
+  EXPECT_THAT(out.crc(), Gt(0));
+  EXPECT_THAT(out.document_storage_info().num_alive_documents(), Eq(0));
+  EXPECT_THAT(out.document_storage_info().num_deleted_documents(), Eq(0));
+  EXPECT_THAT(out.document_storage_info().num_expired_documents(), Eq(0));
+  EXPECT_THAT(out.corpus_info(), IsEmpty());
+}
+
+TEST_P(DocumentStoreTest, GetDebugInfoForEmptyDocumentStore) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+                          schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentDebugInfoProto out,
+      document_store->GetDebugInfo(DebugInfoVerbosity::DETAILED));
+  EXPECT_THAT(out.crc(), Gt(0));
+  EXPECT_THAT(out.document_storage_info().num_alive_documents(), Eq(0));
+  EXPECT_THAT(out.document_storage_info().num_deleted_documents(), Eq(0));
+  EXPECT_THAT(out.document_storage_info().num_expired_documents(), Eq(0));
+  EXPECT_THAT(out.corpus_info(), IsEmpty());
+}
+
+TEST_P(DocumentStoreTest, SwitchKeyMapperTypeShouldRegenerateDerivedFiles) {
+  std::string dynamic_trie_uri_mapper_dir =
+      document_store_dir_ + "/key_mapper_dir";
+  std::string persistent_hash_map_uri_mapper_dir =
+      document_store_dir_ + "/uri_mapper";
+  DocumentId document_id1;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store_.get(),
+                              /*force_recovery_and_revalidate_documents=*/false,
+                              GetParam().namespace_id_fingerprint,
+                              GetParam().pre_mapping_fbv,
+                              GetParam().use_persistent_hash_map,
+                              PortableFileBackedProtoLog<
+                                  DocumentWrapper>::kDeflateCompressionLevel,
+                              /*initialize_stats=*/nullptr));
+
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+    ICING_ASSERT_OK_AND_ASSIGN(document_id1, doc_store->Put(test_document1_));
+
+    if (GetParam().use_persistent_hash_map) {
+      EXPECT_THAT(filesystem_.DirectoryExists(
+                      persistent_hash_map_uri_mapper_dir.c_str()),
+                  IsTrue());
+      EXPECT_THAT(
+          filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+          IsFalse());
+    } else {
+      EXPECT_THAT(filesystem_.DirectoryExists(
+                      persistent_hash_map_uri_mapper_dir.c_str()),
+                  IsFalse());
+      EXPECT_THAT(
+          filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+          IsTrue());
+    }
+  }
+
+  // Switch key mapper. We should get I/O error and derived files should be
+  // regenerated.
+  {
+    bool switch_key_mapper_flag = !GetParam().use_persistent_hash_map;
+    InitializeStatsProto initialize_stats;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(
+            &filesystem_, document_store_dir_, &fake_clock_,
+            schema_store_.get(),
+            /*force_recovery_and_revalidate_documents=*/false,
+            GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+            /*use_persistent_hash_map=*/switch_key_mapper_flag,
+            PortableFileBackedProtoLog<
+                DocumentWrapper>::kDeflateCompressionLevel,
+            &initialize_stats));
+    EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
+                Eq(InitializeStatsProto::IO_ERROR));
+
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+    EXPECT_THAT(doc_store->GetDocumentId(test_document1_.namespace_(),
+                                         test_document1_.uri()),
+                IsOkAndHolds(document_id1));
+
+    if (switch_key_mapper_flag) {
+      EXPECT_THAT(filesystem_.DirectoryExists(
+                      persistent_hash_map_uri_mapper_dir.c_str()),
+                  IsTrue());
+      EXPECT_THAT(
+          filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+          IsFalse());
+    } else {
+      EXPECT_THAT(filesystem_.DirectoryExists(
+                      persistent_hash_map_uri_mapper_dir.c_str()),
+                  IsFalse());
+      EXPECT_THAT(
+          filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+          IsTrue());
+    }
+  }
+}
+
+TEST_P(DocumentStoreTest, SameKeyMapperTypeShouldNotRegenerateDerivedFiles) {
+  std::string dynamic_trie_uri_mapper_dir =
+      document_store_dir_ + "/key_mapper_dir";
+  std::string persistent_hash_map_uri_mapper_dir =
+      document_store_dir_ + "/uri_mapper";
+  DocumentId document_id1;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store_.get(),
+                              /*force_recovery_and_revalidate_documents=*/false,
+                              GetParam().namespace_id_fingerprint,
+                              GetParam().pre_mapping_fbv,
+                              GetParam().use_persistent_hash_map,
+                              PortableFileBackedProtoLog<
+                                  DocumentWrapper>::kDeflateCompressionLevel,
+                              /*initialize_stats=*/nullptr));
+
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+    ICING_ASSERT_OK_AND_ASSIGN(document_id1, doc_store->Put(test_document1_));
+
+    if (GetParam().use_persistent_hash_map) {
+      EXPECT_THAT(filesystem_.DirectoryExists(
+                      persistent_hash_map_uri_mapper_dir.c_str()),
+                  IsTrue());
+      EXPECT_THAT(
+          filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+          IsFalse());
+    } else {
+      EXPECT_THAT(filesystem_.DirectoryExists(
+                      persistent_hash_map_uri_mapper_dir.c_str()),
+                  IsFalse());
+      EXPECT_THAT(
+          filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+          IsTrue());
+    }
+  }
+
+  // Use the same key mapper type. Derived files should not be regenerated.
+  {
+    InitializeStatsProto initialize_stats;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store_.get(),
+                              /*force_recovery_and_revalidate_documents=*/false,
+                              GetParam().namespace_id_fingerprint,
+                              GetParam().pre_mapping_fbv,
+                              GetParam().use_persistent_hash_map,
+                              PortableFileBackedProtoLog<
+                                  DocumentWrapper>::kDeflateCompressionLevel,
+                              &initialize_stats));
+    EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
+                Eq(InitializeStatsProto::NONE));
+
+    std::unique_ptr<DocumentStore> doc_store =
+        std::move(create_result.document_store);
+    EXPECT_THAT(doc_store->GetDocumentId(test_document1_.namespace_(),
+                                         test_document1_.uri()),
+                IsOkAndHolds(document_id1));
+
+    if (GetParam().use_persistent_hash_map) {
+      EXPECT_THAT(filesystem_.DirectoryExists(
+                      persistent_hash_map_uri_mapper_dir.c_str()),
+                  IsTrue());
+      EXPECT_THAT(
+          filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+          IsFalse());
+    } else {
+      EXPECT_THAT(filesystem_.DirectoryExists(
+                      persistent_hash_map_uri_mapper_dir.c_str()),
+                  IsFalse());
+      EXPECT_THAT(
+          filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+          IsTrue());
+    }
+  }
+}
+
+TEST_P(DocumentStoreTest, GetDocumentIdByNamespaceFingerprintIdentifier) {
+  std::string dynamic_trie_uri_mapper_dir =
+      document_store_dir_ + "/key_mapper_dir";
+  std::string persistent_hash_map_uri_mapper_dir =
+      document_store_dir_ + "/uri_mapper";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(
+          &filesystem_, document_store_dir_, &fake_clock_, schema_store_.get(),
+          /*force_recovery_and_revalidate_documents=*/false,
+          GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+          GetParam().use_persistent_hash_map,
+          PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+          /*initialize_stats=*/nullptr));
+
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             doc_store->Put(test_document1_));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId namespace_id,
+      doc_store->GetNamespaceId(test_document1_.namespace_()));
+  NamespaceFingerprintIdentifier ns_fingerprint(
+      namespace_id,
+      /*target_str=*/test_document1_.uri());
+  if (GetParam().namespace_id_fingerprint) {
+    EXPECT_THAT(doc_store->GetDocumentId(ns_fingerprint),
+                IsOkAndHolds(document_id));
+
+    NamespaceFingerprintIdentifier non_existing_ns_fingerprint(
+        namespace_id + 1, /*target_str=*/test_document1_.uri());
+    EXPECT_THAT(doc_store->GetDocumentId(non_existing_ns_fingerprint),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  } else {
+    EXPECT_THAT(doc_store->GetDocumentId(ns_fingerprint),
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    DocumentStoreTest, DocumentStoreTest,
+    testing::Values(
+        DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/false,
+                               /*pre_mapping_fbv_in=*/false,
+                               /*use_persistent_hash_map_in=*/false),
+        DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/true,
+                               /*pre_mapping_fbv_in=*/false,
+                               /*use_persistent_hash_map_in=*/false),
+        DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/false,
+                               /*pre_mapping_fbv_in=*/true,
+                               /*use_persistent_hash_map_in=*/false),
+        DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/true,
+                               /*pre_mapping_fbv_in=*/true,
+                               /*use_persistent_hash_map_in=*/false),
+        DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/false,
+                               /*pre_mapping_fbv_in=*/false,
+                               /*use_persistent_hash_map_in=*/true),
+        DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/true,
+                               /*pre_mapping_fbv_in=*/false,
+                               /*use_persistent_hash_map_in=*/true),
+        DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/false,
+                               /*pre_mapping_fbv_in=*/true,
+                               /*use_persistent_hash_map_in=*/true),
+        DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/true,
+                               /*pre_mapping_fbv_in=*/true,
+                               /*use_persistent_hash_map_in=*/true)));
+
+}  // namespace
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/store/dynamic-trie-key-mapper.h b/icing/store/dynamic-trie-key-mapper.h
new file mode 100644
index 0000000..63e8488
--- /dev/null
+++ b/icing/store/dynamic-trie-key-mapper.h
@@ -0,0 +1,334 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
+#define ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <type_traits>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/file/filesystem.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/store/key-mapper.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// File-backed mapping between the string key and a trivially copyable value
+// type.
+//
+// DynamicTrieKeyMapper is thread-compatible
+template <typename T, typename Formatter = absl_ports::DefaultFormatter>
+class DynamicTrieKeyMapper : public KeyMapper<T, Formatter> {
+ public:
+  // Returns an initialized instance of DynamicTrieKeyMapper that can
+  // immediately handle read/write operations.
+  // Returns any encountered IO errors.
+  //
+  // base_dir : Base directory used to save all the files required to persist
+  //            DynamicTrieKeyMapper. If this base_dir was previously used to
+  //            create a DynamicTrieKeyMapper, then this existing data would be
+  //            loaded. Otherwise, an empty DynamicTrieKeyMapper would be
+  //            created.
+  // maximum_size_bytes : The maximum allowable size of the key mapper storage.
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>>
+  Create(const Filesystem& filesystem, std::string_view base_dir,
+         int maximum_size_bytes);
+
+  // Deletes all the files associated with the DynamicTrieKeyMapper.
+  //
+  // base_dir : Base directory used to save all the files required to persist
+  //            DynamicTrieKeyMapper. Should be the same as passed into
+  //            Create().
+  //
+  // Returns
+  //   OK on success
+  //   INTERNAL_ERROR on I/O error
+  static libtextclassifier3::Status Delete(const Filesystem& filesystem,
+                                           std::string_view base_dir);
+
+  ~DynamicTrieKeyMapper() override = default;
+
+  libtextclassifier3::Status Put(std::string_view key, T value) override;
+
+  libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key,
+                                           T next_value) override;
+
+  libtextclassifier3::StatusOr<T> Get(std::string_view key) const override;
+
+  bool Delete(std::string_view key) override;
+
+  std::unique_ptr<typename KeyMapper<T, Formatter>::Iterator> GetIterator()
+      const override;
+
+  int32_t num_keys() const override { return trie_.size(); }
+
+  libtextclassifier3::Status PersistToDisk() override;
+
+  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const override;
+
+  libtextclassifier3::StatusOr<int64_t> GetElementsSize() const override;
+
+  libtextclassifier3::StatusOr<Crc32> ComputeChecksum() override;
+
+ private:
+  class Iterator : public KeyMapper<T, Formatter>::Iterator {
+   public:
+    explicit Iterator(const IcingDynamicTrie& trie)
+        : itr_(trie, /*prefix=*/""), start_(true) {}
+
+    ~Iterator() override = default;
+
+    bool Advance() override {
+      if (start_) {
+        start_ = false;
+        return itr_.IsValid();
+      }
+      return itr_.Advance();
+    }
+
+    std::string_view GetKey() const override {
+      const char* key = itr_.GetKey();
+      return std::string_view(key);
+    }
+
+    T GetValue() const override {
+      T value;
+      memcpy(&value, itr_.GetValue(), sizeof(T));
+      return value;
+    }
+
+   private:
+    IcingDynamicTrie::Iterator itr_;
+
+    // TODO(b/241784804): remove this flag after changing IcingDynamicTrie to
+    //                    follow the common iterator pattern in our codebase.
+    bool start_;
+  };
+
+  static constexpr char kDynamicTrieKeyMapperDir[] = "key_mapper_dir";
+  static constexpr char kDynamicTrieKeyMapperPrefix[] = "key_mapper";
+
+  // Use DynamicTrieKeyMapper::Create() to instantiate.
+  explicit DynamicTrieKeyMapper(std::string_view key_mapper_dir);
+
+  // Load any existing DynamicTrieKeyMapper data from disk, or creates a new
+  // instance of DynamicTrieKeyMapper on disk and gets ready to process
+  // read/write operations.
+  //
+  // Returns any encountered IO errors.
+  libtextclassifier3::Status Initialize(int maximum_size_bytes);
+
+  const std::string file_prefix_;
+
+  // TODO(adorokhine) Filesystem is a forked class that's available both in
+  // icing and icing namespaces. We will need icing::Filesystem in order
+  // to use IcingDynamicTrie. Filesystem class should be fully refactored
+  // to have a single definition across both namespaces. Such a class should
+  // use icing (and general google3) coding conventions and behave like
+  // a proper C++ class.
+  const IcingFilesystem icing_filesystem_;
+  IcingDynamicTrie trie_;
+
+  static_assert(std::is_trivially_copyable<T>::value,
+                "T must be trivially copyable");
+};
+
+template <typename T, typename Formatter>
+libtextclassifier3::StatusOr<
+    std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>>
+DynamicTrieKeyMapper<T, Formatter>::Create(const Filesystem& filesystem,
+                                           std::string_view base_dir,
+                                           int maximum_size_bytes) {
+  // We create a subdirectory since the trie creates and stores multiple files.
+  // This makes it easier to isolate the trie files away from other files that
+  // could potentially be in the same base_dir, and makes it easier to delete.
+  const std::string key_mapper_dir =
+      absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir);
+  if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to create DynamicTrieKeyMapper directory: ", key_mapper_dir));
+  }
+  auto mapper = std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>(
+      new DynamicTrieKeyMapper<T, Formatter>(key_mapper_dir));
+  ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes));
+  return mapper;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Delete(
+    const Filesystem& filesystem, std::string_view base_dir) {
+  std::string key_mapper_dir =
+      absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir);
+  if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to delete DynamicTrieKeyMapper directory: ", key_mapper_dir));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T, typename Formatter>
+DynamicTrieKeyMapper<T, Formatter>::DynamicTrieKeyMapper(
+    std::string_view key_mapper_dir)
+    : file_prefix_(
+          absl_ports::StrCat(key_mapper_dir, "/", kDynamicTrieKeyMapperPrefix)),
+      trie_(file_prefix_,
+            IcingDynamicTrie::RuntimeOptions().set_storage_policy(
+                IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
+            &icing_filesystem_) {}
+
+template <typename T, typename Formatter>
+libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Initialize(
+    int maximum_size_bytes) {
+  IcingDynamicTrie::Options options;
+  // Divide the max space between the three internal arrays: nodes, nexts and
+  // suffixes. MaxNodes and MaxNexts are in units of their own data structures.
+  // MaxSuffixesSize is in units of bytes.
+  options.max_nodes = maximum_size_bytes / (3 * sizeof(IcingDynamicTrie::Node));
+  options.max_nexts = options.max_nodes;
+  options.max_suffixes_size =
+      sizeof(IcingDynamicTrie::Node) * options.max_nodes;
+  options.value_size = sizeof(T);
+
+  if (!trie_.CreateIfNotExist(options)) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to create DynamicTrieKeyMapper file: ", file_prefix_));
+  }
+  if (!trie_.Init()) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to init DynamicTrieKeyMapper file: ", file_prefix_));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::GetOrPut(
+    std::string_view key, T next_value) {
+  std::string string_key(key);
+  uint32_t value_index;
+  libtextclassifier3::Status status =
+      trie_.Insert(string_key.c_str(), &next_value, &value_index,
+                   /*replace=*/false);
+  if (!status.ok()) {
+    ICING_LOG(DBG) << "Unable to insert key " << string_key
+                   << " into DynamicTrieKeyMapper " << file_prefix_ << ".\n"
+                   << status.error_message();
+    return status;
+  }
+  // This memory address could be unaligned since we're just grabbing the value
+  // from somewhere in the trie's suffix array. The suffix array is filled with
+  // chars, so the address might not be aligned to T values.
+  const T* unaligned_value =
+      static_cast<const T*>(trie_.GetValueAtIndex(value_index));
+
+  // memcpy the value to ensure that the returned value here is in a T-aligned
+  // address
+  T aligned_value;
+  memcpy(&aligned_value, unaligned_value, sizeof(T));
+  return aligned_value;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Put(
+    std::string_view key, T value) {
+  std::string string_key(key);
+  libtextclassifier3::Status status = trie_.Insert(string_key.c_str(), &value);
+  if (!status.ok()) {
+    ICING_LOG(DBG) << "Unable to insert key " << string_key
+                   << " into DynamicTrieKeyMapper " << file_prefix_ << ".\n"
+                   << status.error_message();
+    return status;
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::Get(
+    std::string_view key) const {
+  std::string string_key(key);
+  T value;
+  if (!trie_.Find(string_key.c_str(), &value)) {
+    return absl_ports::NotFoundError(
+        absl_ports::StrCat("Key not found ", Formatter()(string_key),
+                           " in DynamicTrieKeyMapper ", file_prefix_, "."));
+  }
+  return value;
+}
+
+template <typename T, typename Formatter>
+bool DynamicTrieKeyMapper<T, Formatter>::Delete(std::string_view key) {
+  return trie_.Delete(key);
+}
+
+template <typename T, typename Formatter>
+std::unique_ptr<typename KeyMapper<T, Formatter>::Iterator>
+DynamicTrieKeyMapper<T, Formatter>::GetIterator() const {
+  return std::make_unique<DynamicTrieKeyMapper<T, Formatter>::Iterator>(trie_);
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::PersistToDisk() {
+  if (!trie_.Sync()) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to sync DynamicTrieKeyMapper file: ", file_prefix_));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::StatusOr<int64_t>
+DynamicTrieKeyMapper<T, Formatter>::GetDiskUsage() const {
+  int64_t size = trie_.GetDiskUsage();
+  if (size == IcingFilesystem::kBadFileSize || size < 0) {
+    return absl_ports::InternalError("Failed to get disk usage of key mapper");
+  }
+  return size;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::StatusOr<int64_t>
+DynamicTrieKeyMapper<T, Formatter>::GetElementsSize() const {
+  int64_t size = trie_.GetElementsSize();
+  if (size == IcingFilesystem::kBadFileSize || size < 0) {
+    return absl_ports::InternalError(
+        "Failed to get disk usage of elements in the key mapper");
+  }
+  return size;
+}
+
+template <typename T, typename Formatter>
+libtextclassifier3::StatusOr<Crc32>
+DynamicTrieKeyMapper<T, Formatter>::ComputeChecksum() {
+  return Crc32(trie_.UpdateCrc());
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
diff --git a/icing/store/dynamic-trie-key-mapper_test.cc b/icing/store/dynamic-trie-key-mapper_test.cc
new file mode 100644
index 0000000..fd56170
--- /dev/null
+++ b/icing/store/dynamic-trie-key-mapper_test.cc
@@ -0,0 +1,67 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/dynamic-trie-key-mapper.h"
+
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+constexpr int kMaxDynamicTrieKeyMapperSize = 3 * 1024 * 1024;  // 3 MiB
+
+class DynamicTrieKeyMapperTest : public testing::Test {
+ protected:
+  void SetUp() override { base_dir_ = GetTestTempDir() + "/key_mapper"; }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  std::string base_dir_;
+  Filesystem filesystem_;
+};
+
+TEST_F(DynamicTrieKeyMapperTest, InvalidBaseDir) {
+  EXPECT_THAT(DynamicTrieKeyMapper<DocumentId>::Create(
+                  filesystem_, "/dev/null", kMaxDynamicTrieKeyMapperSize),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(DynamicTrieKeyMapperTest, NegativeMaxKeyMapperSizeReturnsInternalError) {
+  EXPECT_THAT(
+      DynamicTrieKeyMapper<DocumentId>::Create(filesystem_, base_dir_, -1),
+      StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(DynamicTrieKeyMapperTest, TooLargeMaxKeyMapperSizeReturnsInternalError) {
+  EXPECT_THAT(DynamicTrieKeyMapper<DocumentId>::Create(
+                  filesystem_, base_dir_, std::numeric_limits<int>::max()),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/store/key-mapper.h b/icing/store/key-mapper.h
index 4571df2..2767da8 100644
--- a/icing/store/key-mapper.h
+++ b/icing/store/key-mapper.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2019 Google LLC
+// Copyright (C) 2022 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -17,78 +17,83 @@
 
 #include <cstdint>
 #include <cstring>
-#include <memory>
 #include <string>
 #include <string_view>
 #include <type_traits>
+#include <unordered_map>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/absl_ports/canonical_errors.h"
-#include "icing/absl_ports/str_cat.h"
-#include "icing/file/filesystem.h"
-#include "icing/legacy/index/icing-dynamic-trie.h"
-#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/absl_ports/str_join.h"
 #include "icing/util/crc32.h"
-#include "icing/util/status-macros.h"
 
 namespace icing {
 namespace lib {
 
-// File-backed mapping between the string key and a trivially copyable value
-// type.
+// An interface for file-backed mapping between the string key and a trivially
+// copyable value type.
 //
-// KeyMapper is thread-compatible
-template <typename T>
+// The implementation for KeyMapper should be thread-compatible
+template <typename T, typename Formatter = absl_ports::DefaultFormatter>
 class KeyMapper {
  public:
-  // Returns an initialized instance of KeyMapper that can immediately handle
-  // read/write operations.
-  // Returns any encountered IO errors.
-  //
-  // base_dir : Base directory used to save all the files required to persist
-  //            KeyMapper. If this base_dir was previously used to create a
-  //            KeyMapper, then this existing data would be loaded. Otherwise,
-  //            an empty KeyMapper would be created.
-  // maximum_size_bytes : The maximum allowable size of the key mapper storage.
-  static libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>> Create(
-      const Filesystem& filesystem, std::string_view base_dir,
-      int maximum_size_bytes);
+  class Iterator {
+   public:
+    virtual ~Iterator() = default;
 
-  // Deletes all the files associated with the KeyMapper. Returns success or any
-  // encountered IO errors
-  //
-  // base_dir : Base directory used to save all the files required to persist
-  //            KeyMapper. Should be the same as passed into Create().
-  static libtextclassifier3::Status Delete(const Filesystem& filesystem,
-                                           std::string_view base_dir);
+    // Advance to the next entry.
+    //
+    // Returns:
+    //   True on success, otherwise false.
+    virtual bool Advance() = 0;
+
+    // Get the key.
+    //
+    // REQUIRES: The preceding call for Advance() is true.
+    virtual std::string_view GetKey() const = 0;
 
-  ~KeyMapper() = default;
+    // Get the value.
+    //
+    // REQUIRES: The preceding call for Advance() is true.
+    virtual T GetValue() const = 0;
+  };
+
+  virtual ~KeyMapper() = default;
 
   // Inserts/Updates value for key.
   // Returns any encountered IO errors.
   //
   // NOTE: Put() doesn't automatically flush changes to disk and relies on
   // either explicit calls to PersistToDisk() or a clean shutdown of the class.
-  libtextclassifier3::Status Put(std::string_view key, T value);
+  virtual libtextclassifier3::Status Put(std::string_view key, T value) = 0;
 
   // Finds the current value for key and returns it. If key is not present, it
   // is inserted with next_value and next_value is returned.
   //
   // Returns any IO errors that may occur during Put.
-  libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key, T next_value);
+  virtual libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key,
+                                                   T next_value) = 0;
 
   // Returns the value corresponding to the key.
   //
   // Returns NOT_FOUND error if the key was missing.
   // Returns any encountered IO errors.
-  libtextclassifier3::StatusOr<T> Get(std::string_view key) const;
+  virtual libtextclassifier3::StatusOr<T> Get(std::string_view key) const = 0;
+
+  // Deletes data related to the given key. Returns true on success.
+  virtual bool Delete(std::string_view key) = 0;
 
-  // Returns a map of values to keys. Empty map if the mapper is empty.
-  std::unordered_map<T, std::string> GetValuesToKeys() const;
+  // Returns an iterator of the key mapper.
+  //
+  // Example usage:
+  //   auto itr = key_mapper->GetIterator();
+  //   while (itr->Advance()) {
+  //     std::cout << itr->GetKey() << " " << itr->GetValue() << std::endl;
+  //   }
+  virtual std::unique_ptr<Iterator> GetIterator() const = 0;
 
   // Count of unique keys stored in the KeyMapper.
-  int32_t num_keys() const { return trie_.size(); }
+  virtual int32_t num_keys() const = 0;
 
   // Syncs all the changes made to the KeyMapper to disk.
   // Returns any encountered IO errors.
@@ -100,7 +105,7 @@ class KeyMapper {
   // Returns:
   //   OK on success
   //   INTERNAL on I/O error
-  libtextclassifier3::Status PersistToDisk();
+  virtual libtextclassifier3::Status PersistToDisk() = 0;
 
   // Calculates and returns the disk usage in bytes. Rounds up to the nearest
   // block size.
@@ -108,7 +113,7 @@ class KeyMapper {
   // Returns:
   //   Disk usage on success
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+  virtual libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const = 0;
 
   // Returns the size of the elements held in the key mapper. This excludes the
   // size of any internal metadata of the key mapper, e.g. the key mapper's
@@ -117,192 +122,16 @@ class KeyMapper {
   // Returns:
   //   File size on success
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+  virtual libtextclassifier3::StatusOr<int64_t> GetElementsSize() const = 0;
 
   // Computes and returns the checksum of the header and contents.
-  Crc32 ComputeChecksum();
+  virtual libtextclassifier3::StatusOr<Crc32> ComputeChecksum() = 0;
 
  private:
-  static constexpr char kKeyMapperDir[] = "key_mapper_dir";
-  static constexpr char kKeyMapperPrefix[] = "key_mapper";
-
-  // Use KeyMapper::Create() to instantiate.
-  explicit KeyMapper(std::string_view key_mapper_dir);
-
-  // Load any existing KeyMapper data from disk, or creates a new instance
-  // of KeyMapper on disk and gets ready to process read/write operations.
-  //
-  // Returns any encountered IO errors.
-  libtextclassifier3::Status Initialize(int maximum_size_bytes);
-
-  const std::string file_prefix_;
-
-  // TODO(adorokhine) Filesystem is a forked class that's available both in
-  // icing and icing namespaces. We will need icing::Filesystem in order
-  // to use IcingDynamicTrie. Filesystem class should be fully refactored
-  // to have a single definition across both namespaces. Such a class should
-  // use icing (and general google3) coding conventions and behave like
-  // a proper C++ class.
-  const IcingFilesystem icing_filesystem_;
-  IcingDynamicTrie trie_;
-
   static_assert(std::is_trivially_copyable<T>::value,
                 "T must be trivially copyable");
 };
 
-template <typename T>
-libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>>
-KeyMapper<T>::Create(const Filesystem& filesystem, std::string_view base_dir,
-                     int maximum_size_bytes) {
-  // We create a subdirectory since the trie creates and stores multiple files.
-  // This makes it easier to isolate the trie files away from other files that
-  // could potentially be in the same base_dir, and makes it easier to delete.
-  const std::string key_mapper_dir =
-      absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
-  if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) {
-    return absl_ports::InternalError(absl_ports::StrCat(
-        "Failed to create KeyMapper directory: ", key_mapper_dir));
-  }
-  auto mapper = std::unique_ptr<KeyMapper<T>>(new KeyMapper<T>(key_mapper_dir));
-  ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes));
-  return mapper;
-}
-
-template <typename T>
-libtextclassifier3::Status KeyMapper<T>::Delete(const Filesystem& filesystem,
-                                                std::string_view base_dir) {
-  std::string key_mapper_dir = absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
-  if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) {
-    return absl_ports::InternalError(absl_ports::StrCat(
-        "Failed to delete KeyMapper directory: ", key_mapper_dir));
-  }
-  return libtextclassifier3::Status::OK;
-}
-
-template <typename T>
-KeyMapper<T>::KeyMapper(std::string_view key_mapper_dir)
-    : file_prefix_(absl_ports::StrCat(key_mapper_dir, "/", kKeyMapperPrefix)),
-      trie_(file_prefix_,
-            IcingDynamicTrie::RuntimeOptions().set_storage_policy(
-                IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
-            &icing_filesystem_) {}
-
-template <typename T>
-libtextclassifier3::Status KeyMapper<T>::Initialize(int maximum_size_bytes) {
-  IcingDynamicTrie::Options options;
-  // Divide the max space between the three internal arrays: nodes, nexts and
-  // suffixes. MaxNodes and MaxNexts are in units of their own data structures.
-  // MaxSuffixesSize is in units of bytes.
-  options.max_nodes = maximum_size_bytes / (3 * sizeof(IcingDynamicTrie::Node));
-  options.max_nexts = options.max_nodes;
-  options.max_suffixes_size =
-      sizeof(IcingDynamicTrie::Node) * options.max_nodes;
-  options.value_size = sizeof(T);
-
-  if (!trie_.CreateIfNotExist(options)) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Failed to create KeyMapper file: ", file_prefix_));
-  }
-  if (!trie_.Init()) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Failed to init KeyMapper file: ", file_prefix_));
-  }
-  return libtextclassifier3::Status::OK;
-}
-
-template <typename T>
-libtextclassifier3::StatusOr<T> KeyMapper<T>::GetOrPut(std::string_view key,
-                                                       T next_value) {
-  std::string string_key(key);
-  uint32_t value_index;
-  if (!trie_.Insert(string_key.c_str(), &next_value, &value_index,
-                    /*replace=*/false)) {
-    return absl_ports::InternalError(absl_ports::StrCat(
-        "Unable to insert key ", key, " into KeyMapper ", file_prefix_, "."));
-  }
-  // This memory address could be unaligned since we're just grabbing the value
-  // from somewhere in the trie's suffix array. The suffix array is filled with
-  // chars, so the address might not be aligned to T values.
-  const T* unaligned_value =
-      static_cast<const T*>(trie_.GetValueAtIndex(value_index));
-
-  // memcpy the value to ensure that the returned value here is in a T-aligned
-  // address
-  T aligned_value;
-  memcpy(&aligned_value, unaligned_value, sizeof(T));
-  return aligned_value;
-}
-
-template <typename T>
-libtextclassifier3::Status KeyMapper<T>::Put(std::string_view key, T value) {
-  std::string string_key(key);
-  if (!trie_.Insert(string_key.c_str(), &value)) {
-    return absl_ports::InternalError(absl_ports::StrCat(
-        "Unable to insert key ", key, " into KeyMapper ", file_prefix_, "."));
-  }
-  return libtextclassifier3::Status::OK;
-}
-
-template <typename T>
-libtextclassifier3::StatusOr<T> KeyMapper<T>::Get(std::string_view key) const {
-  std::string string_key(key);
-  T value;
-  if (!trie_.Find(string_key.c_str(), &value)) {
-    return absl_ports::NotFoundError(absl_ports::StrCat(
-        "Key not found ", key, " in KeyMapper ", file_prefix_, "."));
-  }
-  return value;
-}
-
-template <typename T>
-std::unordered_map<T, std::string> KeyMapper<T>::GetValuesToKeys() const {
-  std::unordered_map<T, std::string> values_to_keys;
-  for (IcingDynamicTrie::Iterator itr(trie_, /*prefix=*/""); itr.IsValid();
-       itr.Advance()) {
-    if (itr.IsValid()) {
-      T value;
-      memcpy(&value, itr.GetValue(), sizeof(T));
-      values_to_keys.insert({value, itr.GetKey()});
-    }
-  }
-
-  return values_to_keys;
-}
-
-template <typename T>
-libtextclassifier3::Status KeyMapper<T>::PersistToDisk() {
-  if (!trie_.Sync()) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Failed to sync KeyMapper file: ", file_prefix_));
-  }
-
-  return libtextclassifier3::Status::OK;
-}
-
-template <typename T>
-libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetDiskUsage() const {
-  int64_t size = trie_.GetDiskUsage();
-  if (size == IcingFilesystem::kBadFileSize || size < 0) {
-    return absl_ports::InternalError("Failed to get disk usage of key mapper");
-  }
-  return size;
-}
-
-template <typename T>
-libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetElementsSize() const {
-  int64_t size = trie_.GetElementsSize();
-  if (size == IcingFilesystem::kBadFileSize || size < 0) {
-    return absl_ports::InternalError(
-        "Failed to get disk usage of elements in the key mapper");
-  }
-  return size;
-}
-
-template <typename T>
-Crc32 KeyMapper<T>::ComputeChecksum() {
-  return Crc32(trie_.UpdateCrc());
-}
-
 }  // namespace lib
 }  // namespace icing
 
diff --git a/icing/store/key-mapper_benchmark.cc b/icing/store/key-mapper_benchmark.cc
new file mode 100644
index 0000000..c25fe30
--- /dev/null
+++ b/icing/store/key-mapper_benchmark.cc
@@ -0,0 +1,323 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <random>
+#include <string>
+#include <unordered_map>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/filesystem.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsTrue;
+using ::testing::Not;
+
+class KeyMapperBenchmark {
+ public:
+  static constexpr int kKeyLength = 20;
+
+  explicit KeyMapperBenchmark()
+      : clock(std::make_unique<Clock>()),
+        base_dir(GetTestTempDir() + "/key_mapper_benchmark"),
+        random_engine(/*seed=*/12345) {}
+
+  std::string GenerateUniqueRandomKeyValuePair(int val,
+                                               std::string_view prefix = "") {
+    std::string rand_str = absl_ports::StrCat(
+        prefix, RandomString(kAlNumAlphabet, kKeyLength, &random_engine));
+    while (random_kvps_map.find(rand_str) != random_kvps_map.end()) {
+      rand_str = absl_ports::StrCat(
+          std::string(prefix),
+          RandomString(kAlNumAlphabet, kKeyLength, &random_engine));
+    }
+    std::pair<std::string, int> entry(rand_str, val);
+    random_kvps.push_back(entry);
+    random_kvps_map.insert(entry);
+    return rand_str;
+  }
+
+  template <typename UnknownKeyMapperType>
+  libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<int>>> CreateKeyMapper(
+      int max_num_entries) {
+    return absl_ports::InvalidArgumentError("Unknown type");
+  }
+
+  template <>
+  libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<int>>>
+  CreateKeyMapper<DynamicTrieKeyMapper<int>>(int max_num_entries) {
+    return DynamicTrieKeyMapper<int>::Create(
+        filesystem, base_dir,
+        /*maximum_size_bytes=*/128 * 1024 * 1024);
+  }
+
+  template <>
+  libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<int>>>
+  CreateKeyMapper<PersistentHashMapKeyMapper<int>>(int max_num_entries) {
+    std::string working_path =
+        absl_ports::StrCat(base_dir, "/", "key_mapper_dir");
+    return PersistentHashMapKeyMapper<int>::Create(
+        filesystem, std::move(working_path), /*pre_mapping_fbv=*/true,
+        max_num_entries, /*average_kv_byte_size=*/kKeyLength + 1 + sizeof(int),
+        /*max_load_factor_percent=*/100);
+  }
+
+  std::unique_ptr<Clock> clock;
+
+  Filesystem filesystem;
+  std::string base_dir;
+
+  std::default_random_engine random_engine;
+  std::vector<std::pair<std::string, int>> random_kvps;
+  std::unordered_map<std::string, int> random_kvps_map;
+};
+
+// Benchmark the total time of putting num_keys (specified by Arg) unique random
+// key value pairs.
+template <typename KeyMapperType>
+void BM_PutMany(benchmark::State& state) {
+  int num_keys = state.range(0);
+
+  KeyMapperBenchmark benchmark;
+  for (int i = 0; i < num_keys; ++i) {
+    benchmark.GenerateUniqueRandomKeyValuePair(i);
+  }
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str());
+    DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir);
+    ASSERT_THAT(ddir.is_valid(), IsTrue());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<KeyMapper<int>> key_mapper,
+        benchmark.CreateKeyMapper<KeyMapperType>(num_keys));
+    ASSERT_THAT(key_mapper->num_keys(), Eq(0));
+    state.ResumeTiming();
+
+    for (int i = 0; i < num_keys; ++i) {
+      ICING_ASSERT_OK(key_mapper->Put(benchmark.random_kvps[i].first,
+                                      benchmark.random_kvps[i].second));
+    }
+
+    // Explicit calls PersistToDisk.
+    ICING_ASSERT_OK(key_mapper->PersistToDisk());
+
+    state.PauseTiming();
+    ASSERT_THAT(key_mapper->num_keys(), Eq(num_keys));
+    // The destructor of IcingDynamicTrie doesn't implicitly call PersistToDisk,
+    // while PersistentHashMap does. Thus, we reset the unique pointer to invoke
+    // destructor in the pause timing block, so in this case PersistToDisk will
+    // be included into the benchmark only once.
+    key_mapper.reset();
+    state.ResumeTiming();
+  }
+}
+BENCHMARK(BM_PutMany<DynamicTrieKeyMapper<int>>)
+    ->Arg(1 << 10)
+    ->Arg(1 << 11)
+    ->Arg(1 << 12)
+    ->Arg(1 << 13)
+    ->Arg(1 << 14)
+    ->Arg(1 << 15)
+    ->Arg(1 << 16)
+    ->Arg(1 << 17)
+    ->Arg(1 << 18)
+    ->Arg(1 << 19)
+    ->Arg(1 << 20);
+BENCHMARK(BM_PutMany<PersistentHashMapKeyMapper<int>>)
+    ->Arg(1 << 10)
+    ->Arg(1 << 11)
+    ->Arg(1 << 12)
+    ->Arg(1 << 13)
+    ->Arg(1 << 14)
+    ->Arg(1 << 15)
+    ->Arg(1 << 16)
+    ->Arg(1 << 17)
+    ->Arg(1 << 18)
+    ->Arg(1 << 19)
+    ->Arg(1 << 20);
+
+// Benchmark the average time of putting 1 unique random key value pair. The
+// result will be affected by # of iterations, so use --benchmark_max_iters=k
+// and --benchmark_min_iters=k to force # of iterations to be fixed.
+template <typename KeyMapperType>
+void BM_Put(benchmark::State& state) {
+  KeyMapperBenchmark benchmark;
+  benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str());
+  DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir);
+  ASSERT_THAT(ddir.is_valid(), IsTrue());
+
+  // The overhead of state.PauseTiming is too large and affects the benchmark
+  // result a lot, so pre-generate enough kvps to avoid calling too many times
+  // state.PauseTiming for GenerateUniqueRandomKeyValuePair in the benchmark
+  // for-loop.
+  int MAX_PREGEN_KVPS = 1 << 22;
+  for (int i = 0; i < MAX_PREGEN_KVPS; ++i) {
+    benchmark.GenerateUniqueRandomKeyValuePair(i);
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<int>> key_mapper,
+      benchmark.CreateKeyMapper<KeyMapperType>(/*max_num_entries=*/1 << 22));
+  ASSERT_THAT(key_mapper->num_keys(), Eq(0));
+
+  int cnt = 0;
+  for (auto _ : state) {
+    if (cnt >= MAX_PREGEN_KVPS) {
+      state.PauseTiming();
+      benchmark.GenerateUniqueRandomKeyValuePair(cnt);
+      state.ResumeTiming();
+    }
+
+    ICING_ASSERT_OK(key_mapper->Put(benchmark.random_kvps[cnt].first,
+                                    benchmark.random_kvps[cnt].second));
+    ++cnt;
+  }
+}
+BENCHMARK(BM_Put<DynamicTrieKeyMapper<int>>);
+BENCHMARK(BM_Put<PersistentHashMapKeyMapper<int>>);
+
+// Benchmark the average time of getting 1 existing key value pair from the key
+// mapper with size num_keys (specified by Arg).
+template <typename KeyMapperType>
+void BM_Get(benchmark::State& state) {
+  int num_keys = state.range(0);
+
+  KeyMapperBenchmark benchmark;
+  benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str());
+  DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir);
+  ASSERT_THAT(ddir.is_valid(), IsTrue());
+
+  // Create a key mapper with num_keys entries.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<int>> key_mapper,
+      benchmark.CreateKeyMapper<KeyMapperType>(num_keys));
+  for (int i = 0; i < num_keys; ++i) {
+    ICING_ASSERT_OK(
+        key_mapper->Put(benchmark.GenerateUniqueRandomKeyValuePair(i), i));
+  }
+  ASSERT_THAT(key_mapper->num_keys(), Eq(num_keys));
+
+  std::uniform_int_distribution<> distrib(0, num_keys - 1);
+  std::default_random_engine e(/*seed=*/12345);
+  for (auto _ : state) {
+    int idx = distrib(e);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        int val, key_mapper->Get(benchmark.random_kvps[idx].first));
+    ASSERT_THAT(val, Eq(benchmark.random_kvps[idx].second));
+  }
+}
+BENCHMARK(BM_Get<DynamicTrieKeyMapper<int>>)
+    ->Arg(1 << 10)
+    ->Arg(1 << 11)
+    ->Arg(1 << 12)
+    ->Arg(1 << 13)
+    ->Arg(1 << 14)
+    ->Arg(1 << 15)
+    ->Arg(1 << 16)
+    ->Arg(1 << 17)
+    ->Arg(1 << 18)
+    ->Arg(1 << 19)
+    ->Arg(1 << 20);
+BENCHMARK(BM_Get<PersistentHashMapKeyMapper<int>>)
+    ->Arg(1 << 10)
+    ->Arg(1 << 11)
+    ->Arg(1 << 12)
+    ->Arg(1 << 13)
+    ->Arg(1 << 14)
+    ->Arg(1 << 15)
+    ->Arg(1 << 16)
+    ->Arg(1 << 17)
+    ->Arg(1 << 18)
+    ->Arg(1 << 19)
+    ->Arg(1 << 20);
+
+// Benchmark the total time of iterating through all key value pairs of the key
+// mapper with size num_keys (specified by Arg).
+template <typename KeyMapperType>
+void BM_Iterator(benchmark::State& state) {
+  int num_keys = state.range(0);
+
+  KeyMapperBenchmark benchmark;
+  benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str());
+  DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir);
+  ASSERT_THAT(ddir.is_valid(), IsTrue());
+
+  // Create a key mapper with num_keys entries.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<int>> key_mapper,
+      benchmark.CreateKeyMapper<KeyMapperType>(num_keys));
+  for (int i = 0; i < num_keys; ++i) {
+    ICING_ASSERT_OK(
+        key_mapper->Put(benchmark.GenerateUniqueRandomKeyValuePair(i), i));
+  }
+  ASSERT_THAT(key_mapper->num_keys(), Eq(num_keys));
+
+  for (auto _ : state) {
+    auto iter = key_mapper->GetIterator();
+    int cnt = 0;
+    while (iter->Advance()) {
+      ++cnt;
+      std::string key(iter->GetKey());
+      int value = iter->GetValue();
+      auto it = benchmark.random_kvps_map.find(key);
+      ASSERT_THAT(it, Not(Eq(benchmark.random_kvps_map.end())));
+      ASSERT_THAT(it->second, Eq(value));
+    }
+    ASSERT_THAT(cnt, Eq(num_keys));
+  }
+}
+BENCHMARK(BM_Iterator<DynamicTrieKeyMapper<int>>)
+    ->Arg(1 << 10)
+    ->Arg(1 << 11)
+    ->Arg(1 << 12)
+    ->Arg(1 << 13)
+    ->Arg(1 << 14)
+    ->Arg(1 << 15)
+    ->Arg(1 << 16)
+    ->Arg(1 << 17)
+    ->Arg(1 << 18)
+    ->Arg(1 << 19)
+    ->Arg(1 << 20);
+BENCHMARK(BM_Iterator<PersistentHashMapKeyMapper<int>>)
+    ->Arg(1 << 10)
+    ->Arg(1 << 11)
+    ->Arg(1 << 12)
+    ->Arg(1 << 13)
+    ->Arg(1 << 14)
+    ->Arg(1 << 15)
+    ->Arg(1 << 16)
+    ->Arg(1 << 17)
+    ->Arg(1 << 18)
+    ->Arg(1 << 19)
+    ->Arg(1 << 20);
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/store/key-mapper_test.cc b/icing/store/key-mapper_test.cc
index 4e3dd8a..fa7d1e8 100644
--- a/icing/store/key-mapper_test.cc
+++ b/icing/store/key-mapper_test.cc
@@ -1,4 +1,4 @@
-// Copyright (C) 2019 Google LLC
+// Copyright (C) 2022 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -14,65 +14,113 @@
 
 #include "icing/store/key-mapper.h"
 
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
 #include "icing/store/document-id.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/tmp-directory.h"
 
-using ::testing::_;
-using ::testing::HasSubstr;
 using ::testing::IsEmpty;
+using ::testing::IsTrue;
 using ::testing::Pair;
 using ::testing::UnorderedElementsAre;
 
 namespace icing {
 namespace lib {
+
 namespace {
-constexpr int kMaxKeyMapperSize = 3 * 1024 * 1024;  // 3 MiB
 
-class KeyMapperTest : public testing::Test {
+constexpr int kMaxDynamicTrieKeyMapperSize = 3 * 1024 * 1024;  // 3 MiB
+
+enum class KeyMapperType {
+  kDynamicTrie,
+  kPersistentHashMap,
+};
+
+struct KeyMapperTestParam {
+  KeyMapperType key_mapper_type;
+  bool pre_mapping_fbv;
+
+  explicit KeyMapperTestParam(KeyMapperType key_mapper_type_in,
+                              bool pre_mapping_fbv_in)
+      : key_mapper_type(key_mapper_type_in),
+        pre_mapping_fbv(pre_mapping_fbv_in) {}
+};
+
+class KeyMapperTest : public ::testing::TestWithParam<KeyMapperTestParam> {
  protected:
-  void SetUp() override { base_dir_ = GetTestTempDir() + "/key_mapper"; }
+  void SetUp() override {
+    base_dir_ = GetTestTempDir() + "/icing";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    working_dir_ = base_dir_ + "/key_mapper";
+  }
 
   void TearDown() override {
     filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
   }
 
+  libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<DocumentId>>>
+  CreateKeyMapper() {
+    const KeyMapperTestParam& param = GetParam();
+    switch (param.key_mapper_type) {
+      case KeyMapperType::kDynamicTrie:
+        return DynamicTrieKeyMapper<DocumentId>::Create(
+            filesystem_, working_dir_, kMaxDynamicTrieKeyMapperSize);
+      case KeyMapperType::kPersistentHashMap:
+        return PersistentHashMapKeyMapper<DocumentId>::Create(
+            filesystem_, working_dir_, param.pre_mapping_fbv);
+    }
+  }
+
+  libtextclassifier3::Status DeleteKeyMapper() {
+    const KeyMapperTestParam& param = GetParam();
+    switch (param.key_mapper_type) {
+      case KeyMapperType::kDynamicTrie:
+        return DynamicTrieKeyMapper<DocumentId>::Delete(filesystem_,
+                                                        working_dir_);
+      case KeyMapperType::kPersistentHashMap:
+        return PersistentHashMapKeyMapper<DocumentId>::Delete(filesystem_,
+                                                              working_dir_);
+    }
+  }
+
   std::string base_dir_;
+  std::string working_dir_;
   Filesystem filesystem_;
 };
 
-TEST_F(KeyMapperTest, InvalidBaseDir) {
-  ASSERT_THAT(
-      KeyMapper<DocumentId>::Create(filesystem_, "/dev/null", kMaxKeyMapperSize)
-          .status()
-          .error_message(),
-      HasSubstr("Failed to create KeyMapper"));
-}
-
-TEST_F(KeyMapperTest, NegativeMaxKeyMapperSizeReturnsInternalError) {
-  ASSERT_THAT(KeyMapper<DocumentId>::Create(filesystem_, base_dir_, -1),
-              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-}
+std::unordered_map<std::string, DocumentId> GetAllKeyValuePairs(
+    const KeyMapper<DocumentId>* key_mapper) {
+  std::unordered_map<std::string, DocumentId> ret;
 
-TEST_F(KeyMapperTest, TooLargeMaxKeyMapperSizeReturnsInternalError) {
-  ASSERT_THAT(KeyMapper<DocumentId>::Create(filesystem_, base_dir_,
-                                            std::numeric_limits<int>::max()),
-              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  std::unique_ptr<typename KeyMapper<DocumentId>::Iterator> itr =
+      key_mapper->GetIterator();
+  while (itr->Advance()) {
+    ret.emplace(itr->GetKey(), itr->GetValue());
+  }
+  return ret;
 }
 
-TEST_F(KeyMapperTest, CreateNewKeyMapper) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
-      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+TEST_P(KeyMapperTest, CreateNewKeyMapper) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
+                             CreateKeyMapper());
   EXPECT_THAT(key_mapper->num_keys(), 0);
 }
 
-TEST_F(KeyMapperTest, CanUpdateSameKeyMultipleTimes) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
-      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+TEST_P(KeyMapperTest, CanUpdateSameKeyMultipleTimes) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
+                             CreateKeyMapper());
 
   ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
   ICING_EXPECT_OK(key_mapper->Put("default-youtube.com", 50));
@@ -88,10 +136,9 @@ TEST_F(KeyMapperTest, CanUpdateSameKeyMultipleTimes) {
   EXPECT_THAT(key_mapper->num_keys(), 2);
 }
 
-TEST_F(KeyMapperTest, GetOrPutOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
-      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+TEST_P(KeyMapperTest, GetOrPutOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
+                             CreateKeyMapper());
 
   EXPECT_THAT(key_mapper->Get("foo"),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -99,15 +146,15 @@ TEST_F(KeyMapperTest, GetOrPutOk) {
   EXPECT_THAT(key_mapper->Get("foo"), IsOkAndHolds(1));
 }
 
-TEST_F(KeyMapperTest, CanPersistToDiskRegularly) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
-      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
-  // Can persist an empty KeyMapper.
+TEST_P(KeyMapperTest, CanPersistToDiskRegularly) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
+                             CreateKeyMapper());
+
+  // Can persist an empty DynamicTrieKeyMapper.
   ICING_EXPECT_OK(key_mapper->PersistToDisk());
   EXPECT_THAT(key_mapper->num_keys(), 0);
 
-  // Can persist the smallest KeyMapper.
+  // Can persist the smallest DynamicTrieKeyMapper.
   ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
   ICING_EXPECT_OK(key_mapper->PersistToDisk());
   EXPECT_THAT(key_mapper->num_keys(), 1);
@@ -124,17 +171,15 @@ TEST_F(KeyMapperTest, CanPersistToDiskRegularly) {
   EXPECT_THAT(key_mapper->num_keys(), 2);
 }
 
-TEST_F(KeyMapperTest, CanUseAcrossMultipleInstances) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
-      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+TEST_P(KeyMapperTest, CanUseAcrossMultipleInstances) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
+                             CreateKeyMapper());
   ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
   ICING_EXPECT_OK(key_mapper->PersistToDisk());
 
   key_mapper.reset();
-  ICING_ASSERT_OK_AND_ASSIGN(
-      key_mapper,
-      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+
+  ICING_ASSERT_OK_AND_ASSIGN(key_mapper, CreateKeyMapper());
   EXPECT_THAT(key_mapper->num_keys(), 1);
   EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(100));
 
@@ -146,43 +191,49 @@ TEST_F(KeyMapperTest, CanUseAcrossMultipleInstances) {
   EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(300));
 }
 
-TEST_F(KeyMapperTest, CanDeleteAndRestartKeyMapping) {
+TEST_P(KeyMapperTest, CanDeleteAndRestartKeyMapping) {
   // Can delete even if there's nothing there
-  ICING_EXPECT_OK(KeyMapper<DocumentId>::Delete(filesystem_, base_dir_));
+  ICING_EXPECT_OK(DeleteKeyMapper());
 
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
-      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
+                             CreateKeyMapper());
   ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
   ICING_EXPECT_OK(key_mapper->PersistToDisk());
-  ICING_EXPECT_OK(KeyMapper<DocumentId>::Delete(filesystem_, base_dir_));
+  ICING_EXPECT_OK(DeleteKeyMapper());
 
   key_mapper.reset();
-  ICING_ASSERT_OK_AND_ASSIGN(
-      key_mapper,
-      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+  ICING_ASSERT_OK_AND_ASSIGN(key_mapper, CreateKeyMapper());
   EXPECT_THAT(key_mapper->num_keys(), 0);
   ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
   EXPECT_THAT(key_mapper->num_keys(), 1);
 }
 
-TEST_F(KeyMapperTest, GetValuesToKeys) {
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
-      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
-  EXPECT_THAT(key_mapper->GetValuesToKeys(), IsEmpty());
+TEST_P(KeyMapperTest, Iterator) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
+                             CreateKeyMapper());
+  EXPECT_THAT(GetAllKeyValuePairs(key_mapper.get()), IsEmpty());
 
   ICING_EXPECT_OK(key_mapper->Put("foo", /*value=*/1));
   ICING_EXPECT_OK(key_mapper->Put("bar", /*value=*/2));
-  EXPECT_THAT(key_mapper->GetValuesToKeys(),
-              UnorderedElementsAre(Pair(1, "foo"), Pair(2, "bar")));
+  EXPECT_THAT(GetAllKeyValuePairs(key_mapper.get()),
+              UnorderedElementsAre(Pair("foo", 1), Pair("bar", 2)));
 
   ICING_EXPECT_OK(key_mapper->Put("baz", /*value=*/3));
   EXPECT_THAT(
-      key_mapper->GetValuesToKeys(),
-      UnorderedElementsAre(Pair(1, "foo"), Pair(2, "bar"), Pair(3, "baz")));
+      GetAllKeyValuePairs(key_mapper.get()),
+      UnorderedElementsAre(Pair("foo", 1), Pair("bar", 2), Pair("baz", 3)));
 }
 
+INSTANTIATE_TEST_SUITE_P(
+    KeyMapperTest, KeyMapperTest,
+    testing::Values(KeyMapperTestParam(KeyMapperType::kDynamicTrie,
+                                       /*pre_mapping_fbv_in=*/true),
+                    KeyMapperTestParam(KeyMapperType::kPersistentHashMap,
+                                       /*pre_mapping_fbv_in=*/true),
+                    KeyMapperTestParam(KeyMapperType::kPersistentHashMap,
+                                       /*pre_mapping_fbv_in=*/false)));
+
 }  // namespace
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/store/namespace-fingerprint-identifier.cc b/icing/store/namespace-fingerprint-identifier.cc
new file mode 100644
index 0000000..3910105
--- /dev/null
+++ b/icing/store/namespace-fingerprint-identifier.cc
@@ -0,0 +1,73 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/namespace-fingerprint-identifier.h"
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/text_classifier/lib3/utils/hash/farmhash.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/store/namespace-id.h"
+#include "icing/util/encode-util.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<NamespaceFingerprintIdentifier>
+NamespaceFingerprintIdentifier::DecodeFromCString(
+    std::string_view encoded_cstr) {
+  if (encoded_cstr.size() < kMinEncodedLength) {
+    return absl_ports::InvalidArgumentError("Invalid length");
+  }
+
+  NamespaceId namespace_id = encode_util::DecodeIntFromCString(
+      encoded_cstr.substr(0, kEncodedNamespaceIdLength));
+  uint64_t fingerprint = encode_util::DecodeIntFromCString(
+      encoded_cstr.substr(kEncodedNamespaceIdLength));
+  return NamespaceFingerprintIdentifier(namespace_id, fingerprint);
+}
+
+NamespaceFingerprintIdentifier::NamespaceFingerprintIdentifier(
+    NamespaceId namespace_id, std::string_view target_str)
+    : namespace_id_(namespace_id),
+      fingerprint_(tc3farmhash::Fingerprint64(target_str)) {}
+
+std::string NamespaceFingerprintIdentifier::EncodeToCString() const {
+  // encoded_namespace_id_str should be 1 to 3 bytes based on the value of
+  // namespace_id.
+  std::string encoded_namespace_id_str =
+      encode_util::EncodeIntToCString(namespace_id_);
+  // Make encoded_namespace_id_str to fixed kEncodedNamespaceIdLength bytes.
+  while (encoded_namespace_id_str.size() < kEncodedNamespaceIdLength) {
+    // C string cannot contain 0 bytes, so we append it using 1, just like what
+    // we do in encode_util::EncodeIntToCString.
+    //
+    // The reason that this works is because DecodeIntToString decodes a byte
+    // value of 0x01 as 0x00. When EncodeIntToCString returns an encoded
+    // namespace id that is less than 3 bytes, it means that the id contains
+    // unencoded leading 0x00. So here we're explicitly encoding those bytes as
+    // 0x01.
+    encoded_namespace_id_str.push_back(1);
+  }
+
+  return absl_ports::StrCat(encoded_namespace_id_str,
+                            encode_util::EncodeIntToCString(fingerprint_));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/store/namespace-fingerprint-identifier.h b/icing/store/namespace-fingerprint-identifier.h
new file mode 100644
index 0000000..d91ef94
--- /dev/null
+++ b/icing/store/namespace-fingerprint-identifier.h
@@ -0,0 +1,72 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_NAMESPACE_FINGERPRINT_IDENTIFIER_H_
+#define ICING_STORE_NAMESPACE_FINGERPRINT_IDENTIFIER_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/store/namespace-id.h"
+
+namespace icing {
+namespace lib {
+
+class NamespaceFingerprintIdentifier {
+ public:
+  static constexpr int kEncodedNamespaceIdLength = 3;
+  static constexpr int kMinEncodedLength = kEncodedNamespaceIdLength + 1;
+
+  static libtextclassifier3::StatusOr<NamespaceFingerprintIdentifier>
+  DecodeFromCString(std::string_view encoded_cstr);
+
+  explicit NamespaceFingerprintIdentifier()
+      : namespace_id_(0), fingerprint_(0) {}
+
+  explicit NamespaceFingerprintIdentifier(NamespaceId namespace_id,
+                                          uint64_t fingerprint)
+      : namespace_id_(namespace_id), fingerprint_(fingerprint) {}
+
+  explicit NamespaceFingerprintIdentifier(NamespaceId namespace_id,
+                                          std::string_view target_str);
+
+  std::string EncodeToCString() const;
+
+  bool operator<(const NamespaceFingerprintIdentifier& other) const {
+    if (namespace_id_ != other.namespace_id_) {
+      return namespace_id_ < other.namespace_id_;
+    }
+    return fingerprint_ < other.fingerprint_;
+  }
+
+  bool operator==(const NamespaceFingerprintIdentifier& other) const {
+    return namespace_id_ == other.namespace_id_ &&
+           fingerprint_ == other.fingerprint_;
+  }
+
+  NamespaceId namespace_id() const { return namespace_id_; }
+  uint64_t fingerprint() const { return fingerprint_; }
+
+ private:
+  NamespaceId namespace_id_;
+  uint64_t fingerprint_;
+} __attribute__((packed));
+static_assert(sizeof(NamespaceFingerprintIdentifier) == 10, "");
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_NAMESPACE_FINGERPRINT_IDENTIFIER_H_
diff --git a/icing/store/namespace-fingerprint-identifier_test.cc b/icing/store/namespace-fingerprint-identifier_test.cc
new file mode 100644
index 0000000..5f86156
--- /dev/null
+++ b/icing/store/namespace-fingerprint-identifier_test.cc
@@ -0,0 +1,148 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/namespace-fingerprint-identifier.h"
+
+#include <cstdint>
+#include <limits>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/store/namespace-id.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+
+TEST(NamespaceFingerprintIdentifierTest, EncodeToCString) {
+  NamespaceFingerprintIdentifier identifier1(/*namespace_id=*/0,
+                                             /*fingerprint=*/0);
+  EXPECT_THAT(identifier1.EncodeToCString(), Eq("\x01\x01\x01\x01"));
+
+  NamespaceFingerprintIdentifier identifier2(/*namespace_id=*/0,
+                                             /*fingerprint=*/1);
+  EXPECT_THAT(identifier2.EncodeToCString(), Eq("\x01\x01\x01\x02"));
+
+  NamespaceFingerprintIdentifier identifier3(
+      /*namespace_id=*/0, /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+  EXPECT_THAT(identifier3.EncodeToCString(),
+              Eq("\x01\x01\x01\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02"));
+
+  NamespaceFingerprintIdentifier identifier4(/*namespace_id=*/1,
+                                             /*fingerprint=*/0);
+  EXPECT_THAT(identifier4.EncodeToCString(), Eq("\x02\x01\x01\x01"));
+
+  NamespaceFingerprintIdentifier identifier5(/*namespace_id=*/1,
+                                             /*fingerprint=*/1);
+  EXPECT_THAT(identifier5.EncodeToCString(), Eq("\x02\x01\x01\x02"));
+
+  NamespaceFingerprintIdentifier identifier6(
+      /*namespace_id=*/1, /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+  EXPECT_THAT(identifier6.EncodeToCString(),
+              Eq("\x02\x01\x01\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02"));
+
+  NamespaceFingerprintIdentifier identifier7(
+      /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+      /*fingerprint=*/0);
+  EXPECT_THAT(identifier7.EncodeToCString(), Eq("\x80\x80\x02\x01"));
+
+  NamespaceFingerprintIdentifier identifier8(
+      /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+      /*fingerprint=*/1);
+  EXPECT_THAT(identifier8.EncodeToCString(), Eq("\x80\x80\x02\x02"));
+
+  NamespaceFingerprintIdentifier identifier9(
+      /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+      /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+  EXPECT_THAT(identifier9.EncodeToCString(),
+              Eq("\x80\x80\x02\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02"));
+}
+
+TEST(NamespaceFingerprintIdentifierTest,
+     MultipleCStringConversionsAreReversible) {
+  NamespaceFingerprintIdentifier identifier1(/*namespace_id=*/0,
+                                             /*fingerprint=*/0);
+  EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+                  identifier1.EncodeToCString()),
+              IsOkAndHolds(identifier1));
+
+  NamespaceFingerprintIdentifier identifier2(/*namespace_id=*/0,
+                                             /*fingerprint=*/1);
+  EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+                  identifier2.EncodeToCString()),
+              IsOkAndHolds(identifier2));
+
+  NamespaceFingerprintIdentifier identifier3(
+      /*namespace_id=*/0, /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+  EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+                  identifier3.EncodeToCString()),
+              IsOkAndHolds(identifier3));
+
+  NamespaceFingerprintIdentifier identifier4(/*namespace_id=*/1,
+                                             /*fingerprint=*/0);
+  EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+                  identifier4.EncodeToCString()),
+              IsOkAndHolds(identifier4));
+
+  NamespaceFingerprintIdentifier identifier5(/*namespace_id=*/1,
+                                             /*fingerprint=*/1);
+  EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+                  identifier5.EncodeToCString()),
+              IsOkAndHolds(identifier5));
+
+  NamespaceFingerprintIdentifier identifier6(
+      /*namespace_id=*/1, /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+  EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+                  identifier6.EncodeToCString()),
+              IsOkAndHolds(identifier6));
+
+  NamespaceFingerprintIdentifier identifier7(
+      /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+      /*fingerprint=*/0);
+  EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+                  identifier7.EncodeToCString()),
+              IsOkAndHolds(identifier7));
+
+  NamespaceFingerprintIdentifier identifier8(
+      /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+      /*fingerprint=*/1);
+  EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+                  identifier8.EncodeToCString()),
+              IsOkAndHolds(identifier8));
+
+  NamespaceFingerprintIdentifier identifier9(
+      /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+      /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+  EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+                  identifier9.EncodeToCString()),
+              IsOkAndHolds(identifier9));
+}
+
+TEST(NamespaceFingerprintIdentifierTest,
+     DecodeFromCStringInvalidLengthShouldReturnError) {
+  std::string invalid_str = "\x01\x01\x01";
+  EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(invalid_str),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/store/namespace-id.h b/icing/store/namespace-id.h
index 4225be3..374e7a8 100644
--- a/icing/store/namespace-id.h
+++ b/icing/store/namespace-id.h
@@ -22,6 +22,7 @@ namespace lib {
 
 // Id of unique namespace in DocumentProto. Generated in DocumentStore.
 using NamespaceId = int16_t;
+inline constexpr NamespaceId kInvalidNamespaceId = -1;
 
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/store/persistent-hash-map-key-mapper.h b/icing/store/persistent-hash-map-key-mapper.h
new file mode 100644
index 0000000..0596fe3
--- /dev/null
+++ b/icing/store/persistent-hash-map-key-mapper.h
@@ -0,0 +1,206 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_PERSISTENT_HASH_MAP_KEY_MAPPER_H_
+#define ICING_STORE_PERSISTENT_HASH_MAP_KEY_MAPPER_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <type_traits>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-hash-map.h"
+#include "icing/store/key-mapper.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// File-backed mapping between the string key and a trivially copyable value
+// type.
+template <typename T, typename Formatter = absl_ports::DefaultFormatter>
+class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> {
+ public:
+  static constexpr int32_t kDefaultMaxNumEntries =
+      PersistentHashMap::Entry::kMaxNumEntries;
+  static constexpr int32_t kDefaultAverageKVByteSize =
+      PersistentHashMap::Options::kDefaultAverageKVByteSize;
+  static constexpr int32_t kDefaultMaxLoadFactorPercent =
+      PersistentHashMap::Options::kDefaultMaxLoadFactorPercent;
+
+  // Returns an initialized instance of PersistentHashMapKeyMapper that can
+  // immediately handle read/write operations.
+  // Returns any encountered IO errors.
+  //
+  // filesystem: Object to make system level calls
+  // working_path: Working directory used to save all the files required to
+  //               persist PersistentHashMapKeyMapper. If this working_path was
+  //               previously used to create a PersistentHashMapKeyMapper, then
+  //               this existing data would be loaded. Otherwise, an empty
+  //               PersistentHashMapKeyMapper would be created. See
+  //               PersistentStorage for more details about the concept of
+  //               working_path.
+  // pre_mapping_fbv: flag indicating whether memory map max possible file size
+  //                  for underlying FileBackedVector before growing the actual
+  //                  file size.
+  // max_num_entries: max # of kvps. It will be used to compute 3 storages size.
+  // average_kv_byte_size: average byte size of a single key + serialized value.
+  //                       It will be used to compute kv_storage size.
+  // max_load_factor_percent: percentage of the max loading for the hash map.
+  //                          load_factor_percent = 100 * num_keys / num_buckets
+  //                          If load_factor_percent exceeds
+  //                          max_load_factor_percent, then rehash will be
+  //                          invoked (and # of buckets will be doubled).
+  //                          Note that load_factor_percent exceeding 100 is
+  //                          considered valid.
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>>
+  Create(const Filesystem& filesystem, std::string working_path,
+         bool pre_mapping_fbv, int32_t max_num_entries = kDefaultMaxNumEntries,
+         int32_t average_kv_byte_size = kDefaultAverageKVByteSize,
+         int32_t max_load_factor_percent = kDefaultMaxLoadFactorPercent);
+
+  // Deletes working_path (and all the files under it recursively) associated
+  // with the PersistentHashMapKeyMapper.
+  //
+  // working_path: Working directory used to save all the files required to
+  //               persist PersistentHashMapKeyMapper. Should be the same as
+  //               passed into Create().
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on I/O error
+  static libtextclassifier3::Status Delete(const Filesystem& filesystem,
+                                           const std::string& working_path);
+
+  ~PersistentHashMapKeyMapper() override = default;
+
+  libtextclassifier3::Status Put(std::string_view key, T value) override {
+    return persistent_hash_map_->Put(key, &value);
+  }
+
+  libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key,
+                                           T next_value) override {
+    ICING_RETURN_IF_ERROR(persistent_hash_map_->GetOrPut(key, &next_value));
+    return next_value;
+  }
+
+  libtextclassifier3::StatusOr<T> Get(std::string_view key) const override {
+    T value;
+    ICING_RETURN_IF_ERROR(persistent_hash_map_->Get(key, &value));
+    return value;
+  }
+
+  bool Delete(std::string_view key) override {
+    return persistent_hash_map_->Delete(key).ok();
+  }
+
+  std::unique_ptr<typename KeyMapper<T, Formatter>::Iterator> GetIterator()
+      const override {
+    return std::make_unique<PersistentHashMapKeyMapper<T, Formatter>::Iterator>(
+        persistent_hash_map_.get());
+  }
+
+  int32_t num_keys() const override { return persistent_hash_map_->size(); }
+
+  libtextclassifier3::Status PersistToDisk() override {
+    return persistent_hash_map_->PersistToDisk();
+  }
+
+  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const override {
+    return persistent_hash_map_->GetDiskUsage();
+  }
+
+  libtextclassifier3::StatusOr<int64_t> GetElementsSize() const override {
+    return persistent_hash_map_->GetElementsSize();
+  }
+
+  libtextclassifier3::StatusOr<Crc32> ComputeChecksum() override {
+    return persistent_hash_map_->UpdateChecksums();
+  }
+
+ private:
+  class Iterator : public KeyMapper<T, Formatter>::Iterator {
+   public:
+    explicit Iterator(const PersistentHashMap* persistent_hash_map)
+        : itr_(persistent_hash_map->GetIterator()) {}
+
+    ~Iterator() override = default;
+
+    bool Advance() override { return itr_.Advance(); }
+
+    std::string_view GetKey() const override { return itr_.GetKey(); }
+
+    T GetValue() const override {
+      T value;
+      memcpy(&value, itr_.GetValue(), sizeof(T));
+      return value;
+    }
+
+   private:
+    PersistentHashMap::Iterator itr_;
+  };
+
+  // Use PersistentHashMapKeyMapper::Create() to instantiate.
+  explicit PersistentHashMapKeyMapper(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map)
+      : persistent_hash_map_(std::move(persistent_hash_map)) {}
+
+  std::unique_ptr<PersistentHashMap> persistent_hash_map_;
+
+  static_assert(std::is_trivially_copyable<T>::value,
+                "T must be trivially copyable");
+};
+
+template <typename T, typename Formatter>
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>>
+PersistentHashMapKeyMapper<T, Formatter>::Create(
+    const Filesystem& filesystem, std::string working_path,
+    bool pre_mapping_fbv, int32_t max_num_entries, int32_t average_kv_byte_size,
+    int32_t max_load_factor_percent) {
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<PersistentHashMap> persistent_hash_map,
+      PersistentHashMap::Create(
+          filesystem, std::move(working_path),
+          PersistentHashMap::Options(
+              /*value_type_size_in=*/sizeof(T),
+              /*max_num_entries_in=*/max_num_entries,
+              /*max_load_factor_percent_in=*/max_load_factor_percent,
+              /*average_kv_byte_size_in=*/average_kv_byte_size,
+              /*init_num_buckets_in=*/
+              PersistentHashMap::Options::kDefaultInitNumBuckets,
+              /*pre_mapping_fbv_in=*/pre_mapping_fbv)));
+  return std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>(
+      new PersistentHashMapKeyMapper<T, Formatter>(
+          std::move(persistent_hash_map)));
+}
+
+template <typename T, typename Formatter>
+/* static */ libtextclassifier3::Status
+PersistentHashMapKeyMapper<T, Formatter>::Delete(
+    const Filesystem& filesystem, const std::string& working_path) {
+  return PersistentHashMap::Discard(filesystem, working_path);
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_PERSISTENT_HASH_MAP_KEY_MAPPER_H_
diff --git a/icing/store/persistent-hash-map-key-mapper_test.cc b/icing/store/persistent-hash-map-key-mapper_test.cc
new file mode 100644
index 0000000..0d610e9
--- /dev/null
+++ b/icing/store/persistent-hash-map-key-mapper_test.cc
@@ -0,0 +1,52 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/persistent-hash-map-key-mapper.h"
+
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+class PersistentHashMapKeyMapperTest : public testing::Test {
+ protected:
+  void SetUp() override { base_dir_ = GetTestTempDir() + "/key_mapper"; }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  std::string base_dir_;
+  Filesystem filesystem_;
+};
+
+TEST_F(PersistentHashMapKeyMapperTest, InvalidBaseDir) {
+  EXPECT_THAT(PersistentHashMapKeyMapper<DocumentId>::Create(
+                  filesystem_, "/dev/null", /*pre_mapping_fbv=*/false),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/store/suggestion-result-checker-impl.h b/icing/store/suggestion-result-checker-impl.h
new file mode 100644
index 0000000..4e01f81
--- /dev/null
+++ b/icing/store/suggestion-result-checker-impl.h
@@ -0,0 +1,154 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_SUGGESTION_RESULT_CHECKER_IMPL_H_
+#define ICING_STORE_SUGGESTION_RESULT_CHECKER_IMPL_H_
+
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/suggestion-result-checker.h"
+
+namespace icing {
+namespace lib {
+
+class SuggestionResultCheckerImpl : public SuggestionResultChecker {
+ public:
+  explicit SuggestionResultCheckerImpl(
+      const DocumentStore* document_store, const SchemaStore* schema_store,
+      std::unordered_set<NamespaceId> target_namespace_ids,
+      std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
+          document_id_filter_map,
+      std::unordered_set<SchemaTypeId> target_schema_type_ids,
+      std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map,
+      std::string target_section, std::unordered_set<DocumentId> search_base,
+      int64_t current_time_ms)
+      : document_store_(*document_store),
+        schema_store_(*schema_store),
+        target_namespace_ids_(std::move(target_namespace_ids)),
+        document_id_filter_map_(std::move(document_id_filter_map)),
+        target_schema_type_ids_(std::move(target_schema_type_ids)),
+        property_filter_map_(std::move(property_filter_map)),
+        target_section_(std::move(target_section)),
+        search_base_(std::move(search_base)),
+        current_time_ms_(current_time_ms) {}
+
+  bool MatchesTargetNamespace(NamespaceId namespace_id) const {
+    return target_namespace_ids_.empty() ||
+           target_namespace_ids_.find(namespace_id) !=
+               target_namespace_ids_.end();
+  }
+
+  bool MatchesTargetDocumentIds(NamespaceId namespace_id,
+                                DocumentId document_id) const {
+    if (document_id_filter_map_.empty()) {
+      return true;
+    }
+    auto document_ids_itr = document_id_filter_map_.find(namespace_id);
+    // The client doesn't set desired document ids in this namespace, or the
+    // client doesn't want this document.
+    return document_ids_itr == document_id_filter_map_.end() ||
+           document_ids_itr->second.find(document_id) !=
+               document_ids_itr->second.end();
+  }
+
+  bool MatchesTargetSchemaType(SchemaTypeId schema_type_id) const {
+    return target_schema_type_ids_.empty() ||
+           target_schema_type_ids_.find(schema_type_id) !=
+               target_schema_type_ids_.end();
+  }
+
+  bool MatchesTargetSection(SchemaTypeId schema_type_id,
+                            SectionId section_id) const {
+    if (target_section_.empty()) {
+      return true;
+    }
+    auto section_metadata_or =
+        schema_store_.GetSectionMetadata(schema_type_id, section_id);
+    if (!section_metadata_or.ok()) {
+      // cannot find the target section metadata.
+      return false;
+    }
+    const SectionMetadata* section_metadata = section_metadata_or.ValueOrDie();
+    return section_metadata->path == target_section_;
+  }
+
+  bool MatchesSearchBase(DocumentId document_id) const {
+    return search_base_.empty() ||
+           search_base_.find(document_id) != search_base_.end();
+  }
+
+  bool MatchesPropertyFilter(SchemaTypeId schema_type_id,
+                             SectionId section_id) const {
+    if (property_filter_map_.empty()) {
+      return true;
+    }
+    auto section_mask_itr = property_filter_map_.find(schema_type_id);
+    return section_mask_itr == property_filter_map_.end() ||
+           (section_mask_itr->second & (UINT64_C(1) << section_id)) != 0;
+  }
+
+  bool BelongsToTargetResults(DocumentId document_id,
+                              SectionId section_id) const override {
+    // Get the document filter data first.
+    auto document_filter_data_optional_ =
+        document_store_.GetAliveDocumentFilterData(document_id,
+                                                   current_time_ms_);
+    if (!document_filter_data_optional_) {
+      // The document doesn't exist.
+      return false;
+    }
+    DocumentFilterData document_filter_data =
+        document_filter_data_optional_.value();
+
+    if (!MatchesTargetNamespace(document_filter_data.namespace_id())) {
+      return false;
+    }
+    if (!MatchesTargetDocumentIds(document_filter_data.namespace_id(),
+                                  document_id)) {
+      return false;
+    }
+    if (!MatchesTargetSchemaType(document_filter_data.schema_type_id())) {
+      return false;
+    }
+    if (!MatchesTargetSection(document_filter_data.schema_type_id(),
+                              section_id)) {
+      return false;
+    }
+    if (!MatchesSearchBase(document_id)) {
+      return false;
+    }
+    if (!MatchesPropertyFilter(document_filter_data.schema_type_id(),
+                               section_id)) {
+      return false;
+    }
+    return true;
+  }
+  const DocumentStore& document_store_;
+  const SchemaStore& schema_store_;
+  std::unordered_set<NamespaceId> target_namespace_ids_;
+  std::unordered_map<NamespaceId, std::unordered_set<DocumentId>>
+      document_id_filter_map_;
+  std::unordered_set<SchemaTypeId> target_schema_type_ids_;
+  std::unordered_map<SchemaTypeId, SectionIdMask> property_filter_map_;
+  std::string target_section_;
+  std::unordered_set<DocumentId> search_base_;
+  int64_t current_time_ms_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_SUGGESTION_RESULT_CHECKER_IMPL_H_
+\ No newline at end of file
diff --git a/icing/store/suggestion-result-checker.h b/icing/store/suggestion-result-checker.h
new file mode 100644
index 0000000..8fadd3e
--- /dev/null
+++ b/icing/store/suggestion-result-checker.h
@@ -0,0 +1,44 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_SUGGESTION_RESULT_CHECKER_H_
+#define ICING_STORE_SUGGESTION_RESULT_CHECKER_H_
+
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+class SuggestionResultChecker {
+ public:
+  virtual ~SuggestionResultChecker() = default;
+
+  // Check whether the given document id is belongs to the target namespaces.
+  // Returns:
+  //   On success,
+  //     - true:  the given document id belongs to the target namespaces
+  //     - false: the given document id doesn't belong to the target namespaces
+  //   OUT_OF_RANGE if document_id is negative or exceeds previously seen
+  //                DocumentIds
+  //   NOT_FOUND if the document or the filter data is not found
+  //   INTERNAL_ERROR on all other errors
+  virtual bool BelongsToTargetResults(DocumentId document_id,
+                                      SectionId section_id) const = 0;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_SUGGESTION_RESULT_CHECKER_H_
diff --git a/icing/store/usage-store.cc b/icing/store/usage-store.cc
new file mode 100644
index 0000000..546067d
--- /dev/null
+++ b/icing/store/usage-store.cc
@@ -0,0 +1,262 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/usage-store.h"
+
+#include "icing/file/file-backed-vector.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+std::string MakeUsageScoreCacheFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/usage-scores");
+}
+}  // namespace
+
+libtextclassifier3::StatusOr<std::unique_ptr<UsageStore>> UsageStore::Create(
+    const Filesystem* filesystem, const std::string& base_dir) {
+  ICING_RETURN_ERROR_IF_NULL(filesystem);
+
+  if (!filesystem->CreateDirectoryRecursively(base_dir.c_str())) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to create UsageStore directory: ", base_dir));
+  }
+
+  const std::string score_cache_filename =
+      MakeUsageScoreCacheFilename(base_dir);
+
+  auto usage_score_cache_or = FileBackedVector<UsageScores>::Create(
+      *filesystem, score_cache_filename,
+      MemoryMappedFile::READ_WRITE_AUTO_SYNC);
+
+  if (absl_ports::IsFailedPrecondition(usage_score_cache_or.status())) {
+    // File checksum doesn't match the stored checksum. Delete and recreate the
+    // file.
+    ICING_RETURN_IF_ERROR(
+        FileBackedVector<int64_t>::Delete(*filesystem, score_cache_filename));
+
+    ICING_VLOG(1) << "The score cache file in UsageStore is corrupted, all "
+                     "scores have been reset.";
+
+    usage_score_cache_or = FileBackedVector<UsageScores>::Create(
+        *filesystem, score_cache_filename,
+        MemoryMappedFile::READ_WRITE_AUTO_SYNC);
+  }
+
+  if (!usage_score_cache_or.ok()) {
+    ICING_LOG(ERROR) << usage_score_cache_or.status().error_message()
+                     << "Failed to initialize usage_score_cache";
+    return usage_score_cache_or.status();
+  }
+
+  return std::unique_ptr<UsageStore>(new UsageStore(
+      std::move(usage_score_cache_or).ValueOrDie(), *filesystem, base_dir));
+}
+
+libtextclassifier3::Status UsageStore::AddUsageReport(const UsageReport& report,
+                                                      DocumentId document_id) {
+  if (!IsDocumentIdValid(document_id)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Document id %d is invalid.", document_id));
+  }
+
+  // We don't need a copy here because we'll set the value at the same index.
+  // This won't unintentionally grow the underlying file since we already have
+  // enough space for the current index.
+  auto usage_scores_or = usage_score_cache_->Get(document_id);
+
+  // OutOfRange means that the mapper hasn't seen this document id before, it's
+  // not an error here.
+  UsageScores usage_scores;
+  if (usage_scores_or.ok()) {
+    usage_scores = *std::move(usage_scores_or).ValueOrDie();
+  } else if (!absl_ports::IsOutOfRange(usage_scores_or.status())) {
+    // Real error
+    return usage_scores_or.status();
+  }
+
+  // Update last used timestamps and type counts. The counts won't be
+  // incremented if they are already the maximum values. The timestamp from
+  // UsageReport is in milliseconds, we need to convert it to seconds.
+  int64_t report_timestamp_s = report.usage_timestamp_ms() / 1000;
+
+  switch (report.usage_type()) {
+    case UsageReport::USAGE_TYPE1:
+      if (report_timestamp_s > std::numeric_limits<uint32_t>::max()) {
+        usage_scores.usage_type1_last_used_timestamp_s =
+            std::numeric_limits<uint32_t>::max();
+      } else if (report_timestamp_s >
+                 usage_scores.usage_type1_last_used_timestamp_s) {
+        usage_scores.usage_type1_last_used_timestamp_s = report_timestamp_s;
+      }
+
+      if (usage_scores.usage_type1_count < std::numeric_limits<int>::max()) {
+        ++usage_scores.usage_type1_count;
+      }
+      break;
+    case UsageReport::USAGE_TYPE2:
+      if (report_timestamp_s > std::numeric_limits<uint32_t>::max()) {
+        usage_scores.usage_type2_last_used_timestamp_s =
+            std::numeric_limits<uint32_t>::max();
+      } else if (report_timestamp_s >
+                 usage_scores.usage_type2_last_used_timestamp_s) {
+        usage_scores.usage_type2_last_used_timestamp_s = report_timestamp_s;
+      }
+
+      if (usage_scores.usage_type2_count < std::numeric_limits<int>::max()) {
+        ++usage_scores.usage_type2_count;
+      }
+      break;
+    case UsageReport::USAGE_TYPE3:
+      if (report_timestamp_s > std::numeric_limits<uint32_t>::max()) {
+        usage_scores.usage_type3_last_used_timestamp_s =
+            std::numeric_limits<uint32_t>::max();
+      } else if (report_timestamp_s >
+                 usage_scores.usage_type3_last_used_timestamp_s) {
+        usage_scores.usage_type3_last_used_timestamp_s = report_timestamp_s;
+      }
+
+      if (usage_scores.usage_type3_count < std::numeric_limits<int>::max()) {
+        ++usage_scores.usage_type3_count;
+      }
+  }
+
+  // Write updated usage scores to file.
+  return usage_score_cache_->Set(document_id, usage_scores);
+}
+
+libtextclassifier3::Status UsageStore::DeleteUsageScores(
+    DocumentId document_id) {
+  if (!IsDocumentIdValid(document_id)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Document id %d is invalid.", document_id));
+  }
+
+  if (document_id >= usage_score_cache_->num_elements()) {
+    // Nothing to delete.
+    return libtextclassifier3::Status::OK;
+  }
+
+  // Clear all the scores of the document.
+  return usage_score_cache_->Set(document_id, UsageScores());
+}
+
+libtextclassifier3::StatusOr<UsageStore::UsageScores>
+UsageStore::GetUsageScores(DocumentId document_id) {
+  if (!IsDocumentIdValid(document_id)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Document id %d is invalid.", document_id));
+  }
+
+  auto usage_scores_or = usage_score_cache_->GetCopy(document_id);
+  if (absl_ports::IsOutOfRange(usage_scores_or.status())) {
+    // No usage scores found. Return the default scores.
+    return UsageScores();
+  } else if (!usage_scores_or.ok()) {
+    // Pass up any other errors.
+    return usage_scores_or.status();
+  }
+
+  return std::move(usage_scores_or).ValueOrDie();
+}
+
+libtextclassifier3::Status UsageStore::SetUsageScores(
+    DocumentId document_id, const UsageScores& usage_scores) {
+  if (!IsDocumentIdValid(document_id)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Document id %d is invalid.", document_id));
+  }
+
+  return usage_score_cache_->Set(document_id, usage_scores);
+}
+
+libtextclassifier3::Status UsageStore::CloneUsageScores(
+    DocumentId from_document_id, DocumentId to_document_id) {
+  if (!IsDocumentIdValid(from_document_id)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "from_document_id %d is invalid.", from_document_id));
+  }
+
+  if (!IsDocumentIdValid(to_document_id)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "to_document_id %d is invalid.", to_document_id));
+  }
+
+  auto usage_scores_or = usage_score_cache_->GetCopy(from_document_id);
+  if (usage_scores_or.ok()) {
+    return usage_score_cache_->Set(to_document_id,
+                                   std::move(usage_scores_or).ValueOrDie());
+  } else if (absl_ports::IsOutOfRange(usage_scores_or.status())) {
+    // No usage scores found. Set default scores to to_document_id.
+    return usage_score_cache_->Set(to_document_id, UsageScores());
+  }
+
+  // Real error
+  return usage_scores_or.status();
+}
+
+libtextclassifier3::Status UsageStore::PersistToDisk() {
+  return usage_score_cache_->PersistToDisk();
+}
+
+libtextclassifier3::StatusOr<Crc32> UsageStore::ComputeChecksum() {
+  return usage_score_cache_->ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<int64_t> UsageStore::GetElementsFileSize() const {
+  return usage_score_cache_->GetElementsFileSize();
+}
+
+libtextclassifier3::StatusOr<int64_t> UsageStore::GetDiskUsage() const {
+  return usage_score_cache_->GetDiskUsage();
+}
+
+libtextclassifier3::Status UsageStore::TruncateTo(DocumentId num_documents) {
+  if (num_documents >= usage_score_cache_->num_elements()) {
+    // No need to truncate
+    return libtextclassifier3::Status::OK;
+  }
+  // "+1" because document ids start from 0.
+  return usage_score_cache_->TruncateTo(num_documents);
+}
+
+libtextclassifier3::Status UsageStore::Reset() {
+  // We delete all the scores by deleting the whole file.
+  libtextclassifier3::Status status = FileBackedVector<int64_t>::Delete(
+      filesystem_, MakeUsageScoreCacheFilename(base_dir_));
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message()
+                     << "Failed to delete usage_score_cache";
+    return status;
+  }
+
+  // Create a new usage_score_cache
+  auto usage_score_cache_or = FileBackedVector<UsageScores>::Create(
+      filesystem_, MakeUsageScoreCacheFilename(base_dir_),
+      MemoryMappedFile::READ_WRITE_AUTO_SYNC);
+  if (!usage_score_cache_or.ok()) {
+    ICING_LOG(ERROR) << usage_score_cache_or.status().error_message()
+                     << "Failed to re-create usage_score_cache";
+    return usage_score_cache_or.status();
+  }
+  usage_score_cache_ = std::move(usage_score_cache_or).ValueOrDie();
+
+  return PersistToDisk();
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/store/usage-store.h b/icing/store/usage-store.h
new file mode 100644
index 0000000..3c7a55e
--- /dev/null
+++ b/icing/store/usage-store.h
@@ -0,0 +1,205 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+
+#include "icing/file/file-backed-vector.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/store/document-id.h"
+
+#ifndef ICING_STORE_USAGE_STORE_H_
+#define ICING_STORE_USAGE_STORE_H_
+
+namespace icing {
+namespace lib {
+
+// A storage class that maintains scores that are calculated based on usage
+// reports.
+class UsageStore {
+ public:
+  // Factory function to create a UsageStore instance. The base directory is
+  // used to persist usage scores. If a usage store was previously created with
+  // this directory, it will reload the files saved by the last instance.
+  //
+  // TODO(b/169594617): consider returning StatusOr<UsageStore>
+  //
+  // Returns:
+  //   A UsageStore on success
+  //   FAILED_PRECONDITION on any null pointer input
+  //   INTERNAL_ERROR on I/O error
+  static libtextclassifier3::StatusOr<std::unique_ptr<UsageStore>> Create(
+      const Filesystem* filesystem, const std::string& base_dir);
+
+  // The scores here reflect the timestamps and usage types defined in
+  // usage.proto.
+  struct UsageScores {
+    // The latest timestamp in seconds reported with custom usage type 1.
+    uint32_t usage_type1_last_used_timestamp_s = 0;
+
+    // The latest timestamp in seconds reported with custom usage type 2.
+    uint32_t usage_type2_last_used_timestamp_s = 0;
+
+    // The latest timestamp in seconds reported with custom usage type 3.
+    uint32_t usage_type3_last_used_timestamp_s = 0;
+
+    // Count of reports with custom usage type 1
+    int usage_type1_count = 0;
+
+    // Count of reports with custom usage type 2
+    int usage_type2_count = 0;
+
+    // Count of reports with custom usage type 3
+    int usage_type3_count = 0;
+
+    bool operator==(const UsageScores& other) const {
+      return usage_type1_last_used_timestamp_s ==
+                 other.usage_type1_last_used_timestamp_s &&
+             usage_type2_last_used_timestamp_s ==
+                 other.usage_type2_last_used_timestamp_s &&
+             usage_type3_last_used_timestamp_s ==
+                 other.usage_type3_last_used_timestamp_s &&
+             usage_type1_count == other.usage_type1_count &&
+             usage_type2_count == other.usage_type2_count &&
+             usage_type3_count == other.usage_type3_count;
+    }
+  };
+
+  // Adds one usage report. The corresponding usage scores of the specified
+  // document will be updated.
+  //
+  // Note: changes are written to disk automatically, callers can also call
+  // PersistToDisk() to flush changes immediately.
+  //
+  // Returns:
+  //   OK on success
+  //   INVALID_ARGUMENT if document_id is invalid
+  //   INTERNAL_ERROR on I/O errors.
+  libtextclassifier3::Status AddUsageReport(const UsageReport& report,
+                                            DocumentId document_id);
+
+  // Deletes the usage scores of a document.
+  //
+  // Note: changes are written to disk automatically, callers can also call
+  // PersistToDisk() to flush changes immediately.
+  //
+  // Returns:
+  //   OK on success
+  //   INVALID_ARGUMENT if document_id is invalid
+  //   INTERNAL_ERROR on I/O errors
+  libtextclassifier3::Status DeleteUsageScores(DocumentId document_id);
+
+  // Gets the usage scores of a document.
+  //
+  // Returns:
+  //   UsageScores on success
+  //   INVALID_ARGUMENT if document_id is invalid
+  //   INTERNAL_ERROR on I/O errors
+  //
+  // TODO(b/169433395): return a pointer instead of an object.
+  libtextclassifier3::StatusOr<UsageScores> GetUsageScores(
+      DocumentId document_id);
+
+  // Sets the usage scores of a document.
+  //
+  // Note: changes are written to disk automatically, callers can also call
+  // PersistToDisk() to flush changes immediately.
+  //
+  // Returns:
+  //   OK on success
+  //   INVALID_ARGUMENT if document_id is invalid
+  //   INTERNAL_ERROR on I/O errors
+  libtextclassifier3::Status SetUsageScores(DocumentId document_id,
+                                            const UsageScores& usage_scores);
+
+  // Clones the usage scores from one document to another.
+  //
+  // Returns:
+  //   OK on success
+  //   INVALID_ARGUMENT if any of the document ids is invalid
+  //   INTERNAL_ERROR on I/O errors
+  //
+  // TODO(b/169433395): We can remove this method once GetUsageScores() returns
+  // a pointer.
+  libtextclassifier3::Status CloneUsageScores(DocumentId from_document_id,
+                                              DocumentId to_document_id);
+
+  // Syncs data to disk.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL on I/O error
+  libtextclassifier3::Status PersistToDisk();
+
+  // Updates checksum of the usage scores and returns it.
+  //
+  // Returns:
+  //   A Crc32 on success
+  //   INTERNAL_ERROR if the internal state is inconsistent
+  libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
+
+  // Returns the file size of the all the elements held in the UsageStore. File
+  // size is in bytes. This excludes the size of any internal metadata, e.g. any
+  // internal headers.
+  //
+  // Returns:
+  //   File size on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
+
+  // Calculates and returns the disk usage in bytes. Rounds up to the nearest
+  // block size.
+  //
+  // Returns:
+  //   Disk usage on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
+  // Resizes the storage so that only the usage scores of and before
+  // last_document_id are stored.
+  //
+  // Returns:
+  //   OK on success
+  //   OUT_OF_RANGE_ERROR if num_documents is negative
+  libtextclassifier3::Status TruncateTo(DocumentId num_documents);
+
+  // Deletes all usage data and re-initialize the storage.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status Reset();
+
+  int32_t num_elements() const { return usage_score_cache_->num_elements(); }
+
+ private:
+  explicit UsageStore(std::unique_ptr<FileBackedVector<UsageScores>>
+                          document_id_to_scores_mapper,
+                      const Filesystem& filesystem, std::string base_dir)
+      : filesystem_(filesystem),
+        base_dir_(std::move(base_dir)),
+        usage_score_cache_(std::move(document_id_to_scores_mapper)) {}
+
+  const Filesystem& filesystem_;
+
+  // Base directory where the files are located.
+  const std::string base_dir_;
+
+  // Used to store the usage scores of documents.
+  std::unique_ptr<FileBackedVector<UsageScores>> usage_score_cache_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_USAGE_STORE_H_
diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc
new file mode 100644
index 0000000..07fe2c5
--- /dev/null
+++ b/icing/store/usage-store_test.cc
@@ -0,0 +1,628 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/usage-store.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::Not;
+
+class UsageStoreTest : public testing::Test {
+ protected:
+  UsageStoreTest() : test_dir_(GetTestTempDir() + "/usage-store-test") {}
+
+  void SetUp() override {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+};
+
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+                              int64_t timestamp_ms,
+                              UsageReport::UsageType usage_type) {
+  UsageReport usage_report;
+  usage_report.set_document_namespace(name_space);
+  usage_report.set_document_uri(uri);
+  usage_report.set_usage_timestamp_ms(timestamp_ms);
+  usage_report.set_usage_type(usage_type);
+  return usage_report;
+}
+
+UsageStore::UsageScores CreateUsageScores(uint32_t type1_timestamp,
+                                          uint32_t type2_timestamp,
+                                          uint32_t type3_timestamp,
+                                          int type1_count, int type2_count,
+                                          int type3_count) {
+  UsageStore::UsageScores scores;
+  scores.usage_type1_last_used_timestamp_s = type1_timestamp;
+  scores.usage_type2_last_used_timestamp_s = type2_timestamp;
+  scores.usage_type3_last_used_timestamp_s = type3_timestamp;
+  scores.usage_type1_count = type1_count;
+  scores.usage_type2_count = type2_count;
+  scores.usage_type3_count = type3_count;
+
+  return scores;
+}
+
+TEST_F(UsageStoreTest, CreationShouldSucceed) {
+  EXPECT_THAT(UsageStore::Create(&filesystem_, test_dir_), IsOk());
+}
+
+TEST_F(UsageStoreTest, CreationShouldFailOnNullPointer) {
+  EXPECT_THAT(UsageStore::Create(nullptr, test_dir_),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(UsageStoreTest, UsageScoresShouldBeComparable) {
+  UsageStore::UsageScores scores1;
+  UsageStore::UsageScores scores2;
+  EXPECT_THAT(scores1, Eq(scores2));
+
+  // operator== should compare usage_type1_last_used_timestamp_s.
+  ++scores1.usage_type1_last_used_timestamp_s;
+  EXPECT_THAT(scores1, Not(Eq(scores2)));
+  ++scores2.usage_type1_last_used_timestamp_s;
+  EXPECT_THAT(scores1, Eq(scores2));
+
+  // operator== should compare usage_type2_last_used_timestamp_s.
+  ++scores1.usage_type2_last_used_timestamp_s;
+  EXPECT_THAT(scores1, Not(Eq(scores2)));
+  ++scores2.usage_type2_last_used_timestamp_s;
+  EXPECT_THAT(scores1, Eq(scores2));
+
+  // operator== should compare usage_type3_last_used_timestamp_s.
+  ++scores1.usage_type3_last_used_timestamp_s;
+  EXPECT_THAT(scores1, Not(Eq(scores2)));
+  ++scores2.usage_type3_last_used_timestamp_s;
+  EXPECT_THAT(scores1, Eq(scores2));
+
+  // operator== should compare usage_type1_count.
+  ++scores1.usage_type1_count;
+  EXPECT_THAT(scores1, Not(Eq(scores2)));
+  ++scores2.usage_type1_count;
+  EXPECT_THAT(scores1, Eq(scores2));
+
+  // operator== should compare usage_type2_count.
+  ++scores1.usage_type2_count;
+  EXPECT_THAT(scores1, Not(Eq(scores2)));
+  ++scores2.usage_type2_count;
+  EXPECT_THAT(scores1, Eq(scores2));
+
+  // operator== should compare usage_type3_count.
+  ++scores1.usage_type3_count;
+  EXPECT_THAT(scores1, Not(Eq(scores2)));
+  ++scores2.usage_type3_count;
+  EXPECT_THAT(scores1, Eq(scores2));
+}
+
+TEST_F(UsageStoreTest, InvalidDocumentIdShouldReturnError) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  DocumentId invalid_document_id = -1;
+
+  EXPECT_THAT(usage_store->AddUsageReport(UsageReport(), invalid_document_id),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(usage_store->DeleteUsageScores(invalid_document_id),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(usage_store->GetUsageScores(invalid_document_id),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(usage_store->SetUsageScores(invalid_document_id,
+                                          UsageStore::UsageScores()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(UsageStoreTest, AddUsageReportShouldUpdateLastUsedTimestamp) {
+  // Create 3 reports with different timestamps.
+  UsageReport usage_report_time1 = CreateUsageReport(
+      "namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
+  UsageReport usage_report_time5 = CreateUsageReport(
+      "namespace", "uri", /*timestamp_ms=*/5000, UsageReport::USAGE_TYPE1);
+  UsageReport usage_report_time10 = CreateUsageReport(
+      "namespace", "uri", /*timestamp_ms=*/10000, UsageReport::USAGE_TYPE1);
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // Report a usage with timestamp 5.
+  ICING_ASSERT_OK(
+      usage_store->AddUsageReport(usage_report_time5, /*document_id=*/1));
+  UsageStore::UsageScores expected_scores = CreateUsageScores(
+      /*type1_timestamp=*/5, /*type2_timestamp=*/0, /*type3_timestamp=*/0,
+      /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0);
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(expected_scores));
+
+  // Report a usage with timestamp 1. The timestamp won't be updated.
+  ICING_ASSERT_OK(
+      usage_store->AddUsageReport(usage_report_time1, /*document_id=*/1));
+  ++expected_scores.usage_type1_count;
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(expected_scores));
+
+  // Report a usage with timestamp 10. The timestamp should be updated.
+  ICING_ASSERT_OK(
+      usage_store->AddUsageReport(usage_report_time10, /*document_id=*/1));
+  expected_scores.usage_type1_last_used_timestamp_s = 10;
+  ++expected_scores.usage_type1_count;
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(expected_scores));
+}
+
+TEST_F(UsageStoreTest, AddUsageReportShouldUpdateCounts) {
+  // Create 3 reports with different usage types.
+  UsageReport usage_report_type1 = CreateUsageReport(
+      "namespace", "uri", /*timestamp_ms=*/0, UsageReport::USAGE_TYPE1);
+  UsageReport usage_report_type2 = CreateUsageReport(
+      "namespace", "uri", /*timestamp_ms=*/0, UsageReport::USAGE_TYPE2);
+  UsageReport usage_report_type3 = CreateUsageReport(
+      "namespace", "uri", /*timestamp_ms=*/0, UsageReport::USAGE_TYPE3);
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // Report a usage with type 1.
+  ICING_ASSERT_OK(
+      usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1));
+  UsageStore::UsageScores expected_scores = CreateUsageScores(
+      /*type1_timestamp=*/0, /*type2_timestamp=*/0, /*type3_timestamp=*/0,
+      /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0);
+  ;
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(expected_scores));
+  // Report another usage with type 1.
+  ICING_ASSERT_OK(
+      usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1));
+  ++expected_scores.usage_type1_count;
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(expected_scores));
+
+  // Report a usage with type 2.
+  ICING_ASSERT_OK(
+      usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1));
+  ++expected_scores.usage_type2_count;
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(expected_scores));
+  // Report another usage with type 2.
+  ICING_ASSERT_OK(
+      usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1));
+  ++expected_scores.usage_type2_count;
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(expected_scores));
+
+  // Report a usage with type 3.
+  ICING_ASSERT_OK(
+      usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1));
+  ++expected_scores.usage_type3_count;
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(expected_scores));
+  // Report another usage with type 3.
+  ICING_ASSERT_OK(
+      usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1));
+  ++expected_scores.usage_type3_count;
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(expected_scores));
+}
+
+TEST_F(UsageStoreTest, GetNonExistingDocumentShouldReturnDefaultScores) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, SetAndGetUsageScores) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // Create usage scores with some random numbers.
+  UsageStore::UsageScores scores = CreateUsageScores(
+      /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+      /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+
+  // Verify that set and get results are consistent.
+  ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(scores));
+}
+
+TEST_F(UsageStoreTest, ImplicitlyInitializedScoresShouldBeZero) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // Explicitly set scores for document 2.
+  ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/2,
+                                              UsageStore::UsageScores()));
+
+  // Now the scores of document 1 have been implicitly initialized. The scores
+  // should all be 0.
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, DeleteUsageScores) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // Create usage scores with some random numbers.
+  UsageStore::UsageScores scores = CreateUsageScores(
+      /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+      /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+  ;
+  ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+
+  // Delete the usage scores of document 1, all the scores of document 1 should
+  // be 0.
+  ICING_EXPECT_OK(usage_store->DeleteUsageScores(/*document_id=*/1));
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, CloneUsageScores) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // Create usage scores with some random numbers and assign them to document 1.
+  UsageStore::UsageScores scores_a = CreateUsageScores(
+      /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+      /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+  ;
+  ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores_a));
+
+  // Create another set of usage scores with some random numbers and assign them
+  // to document 2.
+  UsageStore::UsageScores scores_b = CreateUsageScores(
+      /*type1_timestamp=*/111, /*type2_timestamp=*/666, /*type3_timestamp=*/333,
+      /*type1_count=*/50, /*type2_count=*/30, /*type3_count=*/100);
+  ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores_b));
+
+  // Clone scores from document 1 to document 3.
+  EXPECT_THAT(usage_store->CloneUsageScores(/*from_document_id=*/1,
+                                            /*to_document_id=*/3),
+              IsOk());
+
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/3),
+              IsOkAndHolds(scores_a));
+
+  // Clone scores from document 2 to document 3.
+  EXPECT_THAT(usage_store->CloneUsageScores(/*from_document_id=*/2,
+                                            /*to_document_id=*/3),
+              IsOk());
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/3),
+              IsOkAndHolds(scores_b));
+
+  // Clone scores from document 4 to document 3, scores should be set to
+  // default.
+  EXPECT_THAT(usage_store->CloneUsageScores(/*from_document_id=*/4,
+                                            /*to_document_id=*/3),
+              IsOk());
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/3),
+              IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, PersistToDisk) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // Create usage scores with some random numbers.
+  UsageStore::UsageScores scores = CreateUsageScores(
+      /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+      /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+  ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+
+  EXPECT_THAT(usage_store->PersistToDisk(), IsOk());
+}
+
+TEST_F(UsageStoreTest, ComputeChecksum) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum1, usage_store->ComputeChecksum());
+
+  // Create usage scores with some random numbers.
+  UsageStore::UsageScores scores = CreateUsageScores(
+      /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+      /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+  ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum2, usage_store->ComputeChecksum());
+
+  ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum3, usage_store->ComputeChecksum());
+
+  EXPECT_THAT(checksum1, Not(Eq(checksum2)));
+  EXPECT_THAT(checksum1, Not(Eq(checksum3)));
+  EXPECT_THAT(checksum2, Not(Eq(checksum3)));
+
+  // Without changing the store, checksum should be the same.
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum4, usage_store->ComputeChecksum());
+  EXPECT_THAT(checksum3, Eq(checksum4));
+}
+
+TEST_F(UsageStoreTest, TruncateTo) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // Create usage scores with some random numbers and set scores for document 0,
+  // 1, 2.
+  UsageStore::UsageScores scores = CreateUsageScores(
+      /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+      /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+  ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores));
+  ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+  ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores));
+
+  // Truncate number of documents to 2, scores of document 2 should be gone.
+  EXPECT_THAT(usage_store->TruncateTo(/*num_documents=*/2), IsOk());
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+              IsOkAndHolds(scores));
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(scores));
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/2),
+              IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, TruncateToALargeNumberShouldDoNothing) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // Create usage scores with some random numbers and set scores for document
+  // 0, 1.
+  UsageStore::UsageScores scores = CreateUsageScores(
+      /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+      /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+  ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores));
+  ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+
+  ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+              IsOkAndHolds(scores));
+  ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(scores));
+  ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/2),
+              IsOkAndHolds(UsageStore::UsageScores()));
+
+  // Truncate to a number that is greater than the number of documents. Scores
+  // should be the same.
+  EXPECT_THAT(usage_store->TruncateTo(1000), IsOk());
+
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+              IsOkAndHolds(scores));
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(scores));
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/2),
+              IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, TruncateToNegativeNumberShouldReturnError) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  EXPECT_THAT(usage_store->TruncateTo(-1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(UsageStoreTest, Reset) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                               UsageStore::Create(&filesystem_, test_dir_));
+
+    // Create usage scores with some random numbers.
+    UsageStore::UsageScores scores = CreateUsageScores(
+        /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+        /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+
+    // Set scores for document 1 and document 2.
+    ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+    ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores));
+
+    EXPECT_THAT(usage_store->Reset(), IsOk());
+
+    // After resetting, all the scores are cleared.
+    EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+                IsOkAndHolds(UsageStore::UsageScores()));
+    EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/2),
+                IsOkAndHolds(UsageStore::UsageScores()));
+  }
+
+  // New instances should be created successfully after Reset().
+  EXPECT_THAT(UsageStore::Create(&filesystem_, test_dir_).status(), IsOk());
+}
+
+TEST_F(UsageStoreTest, TimestampInSecondsShouldNotOverflow) {
+  // Create a report with the max value of timestamps.
+  UsageReport usage_report = CreateUsageReport(
+      "namespace", "uri", /*timestamp_ms=*/std::numeric_limits<int64_t>::max(),
+      UsageReport::USAGE_TYPE1);
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // The stored timestamp in seconds should be the max value of uint32.
+  ICING_ASSERT_OK(usage_store->AddUsageReport(usage_report, /*document_id=*/1));
+  UsageStore::UsageScores expected_scores = CreateUsageScores(
+      /*type1_timestamp=*/std::numeric_limits<uint32_t>::max(),
+      /*type2_timestamp=*/0, /*type3_timestamp=*/0,
+      /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0);
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(expected_scores));
+}
+
+TEST_F(UsageStoreTest, CountsShouldNotOverflow) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // Create usage scores with the max value of int.
+  UsageStore::UsageScores scores = CreateUsageScores(
+      /*type1_timestamp=*/0, /*type2_timestamp=*/0, /*type3_timestamp=*/0,
+      /*type1_count=*/std::numeric_limits<int>::max(), /*type2_count=*/0,
+      /*type3_count=*/0);
+
+  ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+  ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(scores));
+
+  // Report another usage with type 1.
+  UsageReport usage_report = CreateUsageReport(
+      "namespace", "uri", /*timestamp_ms=*/0, UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(usage_store->AddUsageReport(usage_report, /*document_id=*/1));
+
+  // usage_type1_count should not change because it's already the max value.
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+              IsOkAndHolds(scores));
+}
+
+TEST_F(UsageStoreTest, StoreShouldBeResetOnVectorChecksumMismatch) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                               UsageStore::Create(&filesystem_, test_dir_));
+
+    // Create usage scores with some random numbers.
+    UsageStore::UsageScores scores = CreateUsageScores(
+        /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+        /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+    ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores));
+    ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+                IsOkAndHolds(scores));
+  }
+
+  // Modify the header to trigger a vector checksum mismatch.
+  const std::string score_cache_file_path =
+      absl_ports::StrCat(test_dir_, "/usage-scores");
+  FileBackedVector<UsageStore::UsageScores>::Header header{};
+  filesystem_.PRead(
+      score_cache_file_path.c_str(), /*buf=*/&header,
+      /*buf_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header),
+      /*offset=*/0);
+  header.vector_checksum = 10;  // Arbitrary garbage checksum
+  header.header_checksum = header.CalculateHeaderChecksum();
+  filesystem_.PWrite(
+      score_cache_file_path.c_str(), /*offset=*/0, /*data=*/&header,
+      /*data_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header));
+
+  // Recover from checksum mismatch.
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+  // Previous data should be cleared.
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+              IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, StoreShouldBeResetOnHeaderChecksumMismatch) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                               UsageStore::Create(&filesystem_, test_dir_));
+
+    // Create usage scores with some random numbers.
+    UsageStore::UsageScores scores = CreateUsageScores(
+        /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+        /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+    ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores));
+    ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+                IsOkAndHolds(scores));
+  }
+
+  // Modify the header to trigger a header checksum mismatch.
+  const std::string score_cache_file_path =
+      absl_ports::StrCat(test_dir_, "/usage-scores");
+  FileBackedVector<UsageStore::UsageScores>::Header header{};
+  filesystem_.PRead(
+      score_cache_file_path.c_str(), /*buf=*/&header,
+      /*buf_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header),
+      /*offset=*/0);
+  header.header_checksum = 10;  // Arbitrary garbage checksum
+  filesystem_.PWrite(
+      score_cache_file_path.c_str(), /*offset=*/0, /*data=*/&header,
+      /*data_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header));
+
+  // Recover from checksum mismatch.
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+  // Previous data should be cleared.
+  EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+              IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, GetElementsFileSize) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_file_size,
+                             usage_store->GetElementsFileSize());
+  EXPECT_THAT(empty_file_size, Eq(0));
+
+  UsageReport usage_report = CreateUsageReport(
+      "namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
+  ICING_ASSERT_OK(usage_store->AddUsageReport(usage_report, /*document_id=*/1));
+
+  EXPECT_THAT(usage_store->GetElementsFileSize(),
+              IsOkAndHolds(Gt(empty_file_size)));
+}
+
+TEST_F(UsageStoreTest, GetDiskUsageEmpty) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // There's some internal metadata, so our disk usage will round up to 1 block.
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_disk_usage,
+                             usage_store->GetDiskUsage());
+  EXPECT_THAT(empty_disk_usage, Gt(0));
+}
+
+TEST_F(UsageStoreTest, GetDiskUsageNonEmpty) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  // There's some internal metadata, so our disk usage will round up to 1 block.
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_disk_usage,
+                             usage_store->GetDiskUsage());
+
+  // Since our GetDiskUsage can only get sizes in increments of block_size, we
+  // need to insert enough usage reports so the disk usage will increase by at
+  // least 1 block size. The number 200 is a bit arbitrary, gotten from manually
+  // testing.
+  UsageReport usage_report = CreateUsageReport(
+      "namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
+  for (int i = 0; i < 200; ++i) {
+    ICING_ASSERT_OK(
+        usage_store->AddUsageReport(usage_report, /*document_id=*/i));
+  }
+
+  // We need to persist since iOS won't see the new disk allocations until after
+  // everything gets written.
+  ICING_ASSERT_OK(usage_store->PersistToDisk());
+
+  EXPECT_THAT(usage_store->GetDiskUsage(), IsOkAndHolds(Gt(empty_disk_usage)));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/testing/always-false-suggestion-result-checker-impl.h b/icing/testing/always-false-suggestion-result-checker-impl.h
new file mode 100644
index 0000000..2f956de
--- /dev/null
+++ b/icing/testing/always-false-suggestion-result-checker-impl.h
@@ -0,0 +1,36 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_ALWAYS_TRUE_SUGGESTION_RESULT_CHECKER_IMPL_H_
+#define ICING_TESTING_ALWAYS_TRUE_SUGGESTION_RESULT_CHECKER_IMPL_H_
+
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/suggestion-result-checker.h"
+
+namespace icing {
+namespace lib {
+
+class AlwaysFalseSuggestionResultCheckerImpl : public SuggestionResultChecker {
+ public:
+  bool BelongsToTargetResults(DocumentId document_id,
+                              SectionId section_id) const override {
+    return false;
+  }
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_ALWAYS_TRUE_SUGGESTION_RESULT_CHECKER_IMPL_H_
+\ No newline at end of file
diff --git a/icing/testing/always-true-suggestion-result-checker-impl.h b/icing/testing/always-true-suggestion-result-checker-impl.h
new file mode 100644
index 0000000..d25c39c
--- /dev/null
+++ b/icing/testing/always-true-suggestion-result-checker-impl.h
@@ -0,0 +1,36 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_ALWAYS_TRUE_SUGGESTION_RESULT_CHECKER_IMPL_H_
+#define ICING_TESTING_ALWAYS_TRUE_SUGGESTION_RESULT_CHECKER_IMPL_H_
+
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/suggestion-result-checker.h"
+
+namespace icing {
+namespace lib {
+
+class AlwaysTrueSuggestionResultCheckerImpl : public SuggestionResultChecker {
+ public:
+  bool BelongsToTargetResults(DocumentId document_id,
+                              SectionId section_id) const override {
+    return true;
+  }
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_ALWAYS_TRUE_SUGGESTION_RESULT_CHECKER_IMPL_H_
+\ No newline at end of file
diff --git a/icing/testing/common-matchers.cc b/icing/testing/common-matchers.cc
new file mode 100644
index 0000000..cd4e446
--- /dev/null
+++ b/icing/testing/common-matchers.cc
@@ -0,0 +1,124 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+ExtractTermFrequenciesResult ExtractTermFrequencies(
+    const std::unordered_map<SectionId, Hit::TermFrequency>&
+        section_ids_tf_map) {
+  ExtractTermFrequenciesResult result;
+  for (const auto& [section_id, tf] : section_ids_tf_map) {
+    result.term_frequencies[section_id] = tf;
+    result.section_mask |= UINT64_C(1) << section_id;
+  }
+  return result;
+}
+
+CheckTermFrequencyResult CheckTermFrequency(
+    const std::array<Hit::TermFrequency, kTotalNumSections>&
+        expected_term_frequencies,
+    const std::array<Hit::TermFrequency, kTotalNumSections>&
+        actual_term_frequencies) {
+  CheckTermFrequencyResult result;
+  for (SectionId section_id = 0; section_id < kTotalNumSections; ++section_id) {
+    if (expected_term_frequencies.at(section_id) !=
+        actual_term_frequencies.at(section_id)) {
+      result.term_frequencies_match = false;
+    }
+  }
+  result.actual_term_frequencies_str =
+      absl_ports::StrCat("[",
+                         absl_ports::StrJoin(actual_term_frequencies, ",",
+                                             absl_ports::NumberFormatter()),
+                         "]");
+  result.expected_term_frequencies_str =
+      absl_ports::StrCat("[",
+                         absl_ports::StrJoin(expected_term_frequencies, ",",
+                                             absl_ports::NumberFormatter()),
+                         "]");
+  return result;
+}
+
+std::string StatusCodeToString(libtextclassifier3::StatusCode code) {
+  switch (code) {
+    case libtextclassifier3::StatusCode::OK:
+      return "OK";
+    case libtextclassifier3::StatusCode::CANCELLED:
+      return "CANCELLED";
+    case libtextclassifier3::StatusCode::UNKNOWN:
+      return "UNKNOWN";
+    case libtextclassifier3::StatusCode::INVALID_ARGUMENT:
+      return "INVALID_ARGUMENT";
+    case libtextclassifier3::StatusCode::DEADLINE_EXCEEDED:
+      return "DEADLINE_EXCEEDED";
+    case libtextclassifier3::StatusCode::NOT_FOUND:
+      return "NOT_FOUND";
+    case libtextclassifier3::StatusCode::ALREADY_EXISTS:
+      return "ALREADY_EXISTS";
+    case libtextclassifier3::StatusCode::PERMISSION_DENIED:
+      return "PERMISSION_DENIED";
+    case libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED:
+      return "RESOURCE_EXHAUSTED";
+    case libtextclassifier3::StatusCode::FAILED_PRECONDITION:
+      return "FAILED_PRECONDITION";
+    case libtextclassifier3::StatusCode::ABORTED:
+      return "ABORTED";
+    case libtextclassifier3::StatusCode::OUT_OF_RANGE:
+      return "OUT_OF_RANGE";
+    case libtextclassifier3::StatusCode::UNIMPLEMENTED:
+      return "UNIMPLEMENTED";
+    case libtextclassifier3::StatusCode::INTERNAL:
+      return "INTERNAL";
+    case libtextclassifier3::StatusCode::UNAVAILABLE:
+      return "UNAVAILABLE";
+    case libtextclassifier3::StatusCode::DATA_LOSS:
+      return "DATA_LOSS";
+    case libtextclassifier3::StatusCode::UNAUTHENTICATED:
+      return "UNAUTHENTICATED";
+    default:
+      return "";
+  }
+}
+
+std::string ProtoStatusCodeToString(StatusProto::Code code) {
+  switch (code) {
+    case StatusProto::OK:
+      return "OK";
+    case StatusProto::UNKNOWN:
+      return "UNKNOWN";
+    case StatusProto::INVALID_ARGUMENT:
+      return "INVALID_ARGUMENT";
+    case StatusProto::NOT_FOUND:
+      return "NOT_FOUND";
+    case StatusProto::ALREADY_EXISTS:
+      return "ALREADY_EXISTS";
+    case StatusProto::OUT_OF_SPACE:
+      return "OUT_OF_SPACE";
+    case StatusProto::FAILED_PRECONDITION:
+      return "FAILED_PRECONDITION";
+    case StatusProto::ABORTED:
+      return "ABORTED";
+    case StatusProto::INTERNAL:
+      return "INTERNAL";
+    case StatusProto::WARNING_DATA_LOSS:
+      return "WARNING_DATA_LOSS";
+    default:
+      return "";
+  }
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index 25f6249..7d8e0cb 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -15,27 +15,40 @@
 #ifndef ICING_TESTING_COMMON_MATCHERS_H_
 #define ICING_TESTING_COMMON_MATCHERS_H_
 
+#include <algorithm>
+#include <cinttypes>
+#include <cmath>
+#include <string>
+#include <vector>
+
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/status_macros.h"
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_join.h"
 #include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/legacy/core/icing-string-util.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/schema/joinable-property.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
-#include "icing/util/status-macros.h"
+#include "icing/scoring/scored-document-hit.h"
 
 namespace icing {
 namespace lib {
 
 // Used to match Token(Token::Type type, std::string_view text)
 MATCHER_P2(EqualsToken, type, text, "") {
+  std::string arg_string(arg.text.data(), arg.text.length());
   if (arg.type != type || arg.text != text) {
     *result_listener << IcingStringUtil::StringPrintf(
         "(Expected: type=%d, text=\"%s\". Actual: type=%d, text=\"%s\")", type,
-        &text[0], arg.type, arg.text.data());
+        text, arg.type, arg_string.c_str());
     return false;
   }
   return true;
@@ -46,30 +59,199 @@ MATCHER_P2(EqualsDocHitInfo, document_id, section_ids, "") {
   const DocHitInfo& actual = arg;
   SectionIdMask section_mask = kSectionIdMaskNone;
   for (SectionId section_id : section_ids) {
-    section_mask |= 1U << section_id;
+    section_mask |= UINT64_C(1) << section_id;
   }
   *result_listener << IcingStringUtil::StringPrintf(
-      "(actual is {document_id=%d, section_mask=%d}, but expected was "
-      "{document_id=%d, section_mask=%d}.)",
+      "(actual is {document_id=%d, section_mask=%" PRIu64
+      "}, but expected was "
+      "{document_id=%d, section_mask=%" PRIu64 "}.)",
       actual.document_id(), actual.hit_section_ids_mask(), document_id,
       section_mask);
   return actual.document_id() == document_id &&
          actual.hit_section_ids_mask() == section_mask;
 }
 
+// Used to match a DocHitInfoIterator::CallStats
+MATCHER_P5(EqualsDocHitInfoIteratorCallStats, num_leaf_advance_calls_lite_index,
+           num_leaf_advance_calls_main_index,
+           num_leaf_advance_calls_integer_index,
+           num_leaf_advance_calls_no_index, num_blocks_inspected, "") {
+  const DocHitInfoIterator::CallStats& actual = arg;
+  *result_listener << IcingStringUtil::StringPrintf(
+      "(actual is {num_leaf_advance_calls_lite_index=%d, "
+      "num_leaf_advance_calls_main_index=%d, "
+      "num_leaf_advance_calls_integer_index=%d, "
+      "num_leaf_advance_calls_no_index=%d, num_blocks_inspected=%d}, but "
+      "expected was {num_leaf_advance_calls_lite_index=%d, "
+      "num_leaf_advance_calls_main_index=%d, "
+      "num_leaf_advance_calls_integer_index=%d, "
+      "num_leaf_advance_calls_no_index=%d, num_blocks_inspected=%d}.)",
+      actual.num_leaf_advance_calls_lite_index,
+      actual.num_leaf_advance_calls_main_index,
+      actual.num_leaf_advance_calls_integer_index,
+      actual.num_leaf_advance_calls_no_index, actual.num_blocks_inspected,
+      num_leaf_advance_calls_lite_index, num_leaf_advance_calls_main_index,
+      num_leaf_advance_calls_integer_index, num_leaf_advance_calls_no_index,
+      num_blocks_inspected);
+  return actual.num_leaf_advance_calls_lite_index ==
+             num_leaf_advance_calls_lite_index &&
+         actual.num_leaf_advance_calls_main_index ==
+             num_leaf_advance_calls_main_index &&
+         actual.num_leaf_advance_calls_integer_index ==
+             num_leaf_advance_calls_integer_index &&
+         actual.num_leaf_advance_calls_no_index ==
+             num_leaf_advance_calls_no_index &&
+         actual.num_blocks_inspected == num_blocks_inspected;
+}
+
+struct ExtractTermFrequenciesResult {
+  std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies = {0};
+  SectionIdMask section_mask = kSectionIdMaskNone;
+};
+// Extracts the term frequencies represented by the section_ids_tf_map.
+// Returns:
+//   - a SectionIdMask representing all sections that appears as entries in the
+//     map, even if they have an entry with term_frequency==0
+//   - an array representing the term frequencies for each section. Sections not
+//     present in section_ids_tf_map have a term frequency of 0.
+ExtractTermFrequenciesResult ExtractTermFrequencies(
+    const std::unordered_map<SectionId, Hit::TermFrequency>&
+        section_ids_tf_map);
+
+struct CheckTermFrequencyResult {
+  std::string expected_term_frequencies_str;
+  std::string actual_term_frequencies_str;
+  bool term_frequencies_match = true;
+};
+// Checks that the term frequencies in actual_term_frequencies match those
+// specified in expected_section_ids_tf_map. If there is no entry in
+// expected_section_ids_tf_map, then it is assumed that the term frequency for
+// that section is 0.
+// Returns:
+//   - a bool indicating if the term frequencies match
+//   - debug strings representing the contents of the actual and expected term
+//     term frequency arrays.
+CheckTermFrequencyResult CheckTermFrequency(
+    const std::array<Hit::TermFrequency, kTotalNumSections>&
+        expected_term_frequencies,
+    const std::array<Hit::TermFrequency, kTotalNumSections>&
+        actual_term_frequencies);
+
+// Used to match a DocHitInfo
+MATCHER_P2(EqualsDocHitInfoWithTermFrequency, document_id,
+           section_ids_to_term_frequencies_map, "") {
+  const DocHitInfoTermFrequencyPair& actual = arg;
+  std::array<Hit::TermFrequency, kTotalNumSections> actual_tf_array;
+  for (SectionId section_id = 0; section_id < kTotalNumSections; ++section_id) {
+    actual_tf_array[section_id] = actual.hit_term_frequency(section_id);
+  }
+  ExtractTermFrequenciesResult expected =
+      ExtractTermFrequencies(section_ids_to_term_frequencies_map);
+  CheckTermFrequencyResult check_tf_result =
+      CheckTermFrequency(expected.term_frequencies, actual_tf_array);
+
+  *result_listener << IcingStringUtil::StringPrintf(
+      "(actual is {document_id=%d, section_mask=%" PRIu64
+      ", term_frequencies=%s}, but expected was "
+      "{document_id=%d, section_mask=%" PRIu64 ", term_frequencies=%s}.)",
+      actual.doc_hit_info().document_id(),
+      actual.doc_hit_info().hit_section_ids_mask(),
+      check_tf_result.actual_term_frequencies_str.c_str(), document_id,
+      expected.section_mask,
+      check_tf_result.expected_term_frequencies_str.c_str());
+  return actual.doc_hit_info().document_id() == document_id &&
+         actual.doc_hit_info().hit_section_ids_mask() ==
+             expected.section_mask &&
+         check_tf_result.term_frequencies_match;
+}
+
+MATCHER_P2(EqualsTermMatchInfo, term, section_ids_to_term_frequencies_map, "") {
+  const TermMatchInfo& actual = arg;
+  std::string term_str(term);
+  ExtractTermFrequenciesResult expected =
+      ExtractTermFrequencies(section_ids_to_term_frequencies_map);
+  CheckTermFrequencyResult check_tf_result =
+      CheckTermFrequency(expected.term_frequencies, actual.term_frequencies);
+  *result_listener << IcingStringUtil::StringPrintf(
+      "(actual is {term=%s, section_mask=%" PRIu64
+      ", term_frequencies=%s}, but expected was "
+      "{term=%s, section_mask=%" PRIu64 ", term_frequencies=%s}.)",
+      actual.term.data(), actual.section_ids_mask,
+      check_tf_result.actual_term_frequencies_str.c_str(), term_str.data(),
+      expected.section_mask,
+      check_tf_result.expected_term_frequencies_str.c_str());
+  return actual.term == term &&
+         actual.section_ids_mask == expected.section_mask &&
+         check_tf_result.term_frequencies_match;
+}
+
+class ScoredDocumentHitFormatter {
+ public:
+  std::string operator()(const ScoredDocumentHit& scored_document_hit) {
+    return IcingStringUtil::StringPrintf(
+        "(document_id=%d, hit_section_id_mask=%" PRId64 ", score=%.2f)",
+        scored_document_hit.document_id(),
+        scored_document_hit.hit_section_id_mask(), scored_document_hit.score());
+  }
+};
+
+class ScoredDocumentHitEqualComparator {
+ public:
+  bool operator()(const ScoredDocumentHit& lhs,
+                  const ScoredDocumentHit& rhs) const {
+    return lhs.document_id() == rhs.document_id() &&
+           lhs.hit_section_id_mask() == rhs.hit_section_id_mask() &&
+           std::fabs(lhs.score() - rhs.score()) < 1e-6;
+  }
+};
+
 // Used to match a ScoredDocumentHit
 MATCHER_P(EqualsScoredDocumentHit, expected_scored_document_hit, "") {
-  if (arg.document_id() != expected_scored_document_hit.document_id() ||
-      arg.hit_section_id_mask() !=
-          expected_scored_document_hit.hit_section_id_mask() ||
-      arg.score() != expected_scored_document_hit.score()) {
+  ScoredDocumentHitEqualComparator equal_comparator;
+  if (!equal_comparator(arg, expected_scored_document_hit)) {
+    ScoredDocumentHitFormatter formatter;
+    *result_listener << "Expected: " << formatter(expected_scored_document_hit)
+                     << ". Actual: " << formatter(arg);
+    return false;
+  }
+  return true;
+}
+
+// Used to match a JoinedScoredDocumentHit
+MATCHER_P(EqualsJoinedScoredDocumentHit, expected_joined_scored_document_hit,
+          "") {
+  ScoredDocumentHitEqualComparator equal_comparator;
+  if (std::fabs(arg.final_score() -
+                expected_joined_scored_document_hit.final_score()) > 1e-6 ||
+      !equal_comparator(
+          arg.parent_scored_document_hit(),
+          expected_joined_scored_document_hit.parent_scored_document_hit()) ||
+      arg.child_scored_document_hits().size() !=
+          expected_joined_scored_document_hit.child_scored_document_hits()
+              .size() ||
+      !std::equal(
+          arg.child_scored_document_hits().cbegin(),
+          arg.child_scored_document_hits().cend(),
+          expected_joined_scored_document_hit.child_scored_document_hits()
+              .cbegin(),
+          equal_comparator)) {
+    ScoredDocumentHitFormatter formatter;
+
     *result_listener << IcingStringUtil::StringPrintf(
-        "Expected: document_id=%d, hit_section_id_mask=%d, score=%.2f. Actual: "
-        "document_id=%d, hit_section_id_mask=%d, score=%.2f",
-        expected_scored_document_hit.document_id(),
-        expected_scored_document_hit.hit_section_id_mask(),
-        expected_scored_document_hit.score(), arg.document_id(),
-        arg.hit_section_id_mask(), arg.score());
+        "Expected: final_score=%.2f, parent_scored_document_hit=%s, "
+        "child_scored_document_hits=[%s]. Actual: final_score=%.2f, "
+        "parent_scored_document_hit=%s, child_scored_document_hits=[%s]",
+        expected_joined_scored_document_hit.final_score(),
+        formatter(
+            expected_joined_scored_document_hit.parent_scored_document_hit())
+            .c_str(),
+        absl_ports::StrJoin(
+            expected_joined_scored_document_hit.child_scored_document_hits(),
+            ",", formatter)
+            .c_str(),
+        arg.final_score(), formatter(arg.parent_scored_document_hit()).c_str(),
+        absl_ports::StrJoin(arg.child_scored_document_hits(), ",", formatter)
+            .c_str());
     return false;
   }
   return true;
@@ -79,7 +261,6 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
   const SchemaStore::SetSchemaResult& actual = arg;
 
   if (actual.success == expected.success &&
-      actual.index_incompatible == expected.index_incompatible &&
       actual.old_schema_type_ids_changed ==
           expected.old_schema_type_ids_changed &&
       actual.schema_types_deleted_by_name ==
@@ -89,7 +270,14 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
       actual.schema_types_incompatible_by_name ==
           expected.schema_types_incompatible_by_name &&
       actual.schema_types_incompatible_by_id ==
-          expected.schema_types_incompatible_by_id) {
+          expected.schema_types_incompatible_by_id &&
+      actual.schema_types_new_by_name == expected.schema_types_new_by_name &&
+      actual.schema_types_changed_fully_compatible_by_name ==
+          expected.schema_types_changed_fully_compatible_by_name &&
+      actual.schema_types_index_incompatible_by_name ==
+          expected.schema_types_index_incompatible_by_name &&
+      actual.schema_types_join_incompatible_by_name ==
+          expected.schema_types_join_incompatible_by_name) {
     return true;
   }
 
@@ -149,81 +337,133 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
                           absl_ports::NumberFormatter()),
       "]");
 
+  // Format schema_types_new_by_name
+  std::string actual_schema_types_new_by_name = absl_ports::StrCat(
+      "[", absl_ports::StrJoin(actual.schema_types_new_by_name, ","), "]");
+
+  std::string expected_schema_types_new_by_name = absl_ports::StrCat(
+      "[", absl_ports::StrJoin(expected.schema_types_new_by_name, ","), "]");
+
+  // Format schema_types_changed_fully_compatible_by_name
+  std::string actual_schema_types_changed_fully_compatible_by_name =
+      absl_ports::StrCat(
+          "[",
+          absl_ports::StrJoin(
+              actual.schema_types_changed_fully_compatible_by_name, ","),
+          "]");
+
+  std::string expected_schema_types_changed_fully_compatible_by_name =
+      absl_ports::StrCat(
+          "[",
+          absl_ports::StrJoin(
+              expected.schema_types_changed_fully_compatible_by_name, ","),
+          "]");
+
+  // Format schema_types_deleted_by_id
+  std::string actual_schema_types_index_incompatible_by_name =
+      absl_ports::StrCat(
+          "[",
+          absl_ports::StrJoin(actual.schema_types_index_incompatible_by_name,
+                              ","),
+          "]");
+
+  std::string expected_schema_types_index_incompatible_by_name =
+      absl_ports::StrCat(
+          "[",
+          absl_ports::StrJoin(expected.schema_types_index_incompatible_by_name,
+                              ","),
+          "]");
+
+  // Format schema_types_join_incompatible_by_name
+  std::string actual_schema_types_join_incompatible_by_name =
+      absl_ports::StrCat(
+          "[",
+          absl_ports::StrJoin(actual.schema_types_join_incompatible_by_name,
+                              ","),
+          "]");
+
+  std::string expected_schema_types_join_incompatible_by_name =
+      absl_ports::StrCat(
+          "[",
+          absl_ports::StrJoin(expected.schema_types_join_incompatible_by_name,
+                              ","),
+          "]");
+
   *result_listener << IcingStringUtil::StringPrintf(
       "\nExpected {\n"
       "\tsuccess=%d,\n"
-      "\tindex_incompatible=%d,\n"
       "\told_schema_type_ids_changed=%s,\n"
       "\tschema_types_deleted_by_name=%s,\n"
       "\tschema_types_deleted_by_id=%s,\n"
       "\tschema_types_incompatible_by_name=%s,\n"
       "\tschema_types_incompatible_by_id=%s\n"
+      "\tschema_types_new_by_name=%s,\n"
+      "\tschema_types_changed_fully_compatible_by_name=%s\n"
+      "\tschema_types_index_incompatible_by_name=%s,\n"
+      "\tschema_types_join_incompatible_by_name=%s\n"
       "}\n"
       "Actual {\n"
       "\tsuccess=%d,\n"
-      "\tindex_incompatible=%d,\n"
       "\told_schema_type_ids_changed=%s,\n"
       "\tschema_types_deleted_by_name=%s,\n"
       "\tschema_types_deleted_by_id=%s,\n"
       "\tschema_types_incompatible_by_name=%s,\n"
       "\tschema_types_incompatible_by_id=%s\n"
+      "\tschema_types_new_by_name=%s,\n"
+      "\tschema_types_changed_fully_compatible_by_name=%s\n"
+      "\tschema_types_index_incompatible_by_name=%s,\n"
+      "\tschema_types_join_incompatible_by_name=%s\n"
       "}\n",
-      expected.success, expected.index_incompatible,
-      expected_old_schema_type_ids_changed.c_str(),
+      expected.success, expected_old_schema_type_ids_changed.c_str(),
       expected_schema_types_deleted_by_name.c_str(),
       expected_schema_types_deleted_by_id.c_str(),
       expected_schema_types_incompatible_by_name.c_str(),
-      expected_schema_types_incompatible_by_id.c_str(), actual.success,
-      actual.index_incompatible, actual_old_schema_type_ids_changed.c_str(),
+      expected_schema_types_incompatible_by_id.c_str(),
+      expected_schema_types_new_by_name.c_str(),
+      expected_schema_types_changed_fully_compatible_by_name.c_str(),
+      expected_schema_types_index_incompatible_by_name.c_str(),
+      expected_schema_types_join_incompatible_by_name.c_str(), actual.success,
+      actual_old_schema_type_ids_changed.c_str(),
       actual_schema_types_deleted_by_name.c_str(),
       actual_schema_types_deleted_by_id.c_str(),
       actual_schema_types_incompatible_by_name.c_str(),
-      actual_schema_types_incompatible_by_id.c_str());
-
+      actual_schema_types_incompatible_by_id.c_str(),
+      actual_schema_types_new_by_name.c_str(),
+      actual_schema_types_changed_fully_compatible_by_name.c_str(),
+      actual_schema_types_index_incompatible_by_name.c_str(),
+      actual_schema_types_join_incompatible_by_name.c_str());
   return false;
 }
 
-std::string StatusCodeToString(libtextclassifier3::StatusCode code) {
-  switch (code) {
-    case libtextclassifier3::StatusCode::OK:
-      return "OK";
-    case libtextclassifier3::StatusCode::CANCELLED:
-      return "CANCELLED";
-    case libtextclassifier3::StatusCode::UNKNOWN:
-      return "UNKNOWN";
-    case libtextclassifier3::StatusCode::INVALID_ARGUMENT:
-      return "INVALID_ARGUMENT";
-    case libtextclassifier3::StatusCode::DEADLINE_EXCEEDED:
-      return "DEADLINE_EXCEEDED";
-    case libtextclassifier3::StatusCode::NOT_FOUND:
-      return "NOT_FOUND";
-    case libtextclassifier3::StatusCode::ALREADY_EXISTS:
-      return "ALREADY_EXISTS";
-    case libtextclassifier3::StatusCode::PERMISSION_DENIED:
-      return "PERMISSION_DENIED";
-    case libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED:
-      return "RESOURCE_EXHAUSTED";
-    case libtextclassifier3::StatusCode::FAILED_PRECONDITION:
-      return "FAILED_PRECONDITION";
-    case libtextclassifier3::StatusCode::ABORTED:
-      return "ABORTED";
-    case libtextclassifier3::StatusCode::OUT_OF_RANGE:
-      return "OUT_OF_RANGE";
-    case libtextclassifier3::StatusCode::UNIMPLEMENTED:
-      return "UNIMPLEMENTED";
-    case libtextclassifier3::StatusCode::INTERNAL:
-      return "INTERNAL";
-    case libtextclassifier3::StatusCode::UNAVAILABLE:
-      return "UNAVAILABLE";
-    case libtextclassifier3::StatusCode::DATA_LOSS:
-      return "DATA_LOSS";
-    case libtextclassifier3::StatusCode::UNAUTHENTICATED:
-      return "UNAUTHENTICATED";
-    default:
-      return "";
-  }
+MATCHER_P3(EqualsSectionMetadata, expected_id, expected_property_path,
+           expected_property_config_proto, "") {
+  const SectionMetadata& actual = arg;
+  return actual.id == expected_id && actual.path == expected_property_path &&
+         actual.data_type == expected_property_config_proto.data_type() &&
+         actual.tokenizer ==
+             expected_property_config_proto.string_indexing_config()
+                 .tokenizer_type() &&
+         actual.term_match_type ==
+             expected_property_config_proto.string_indexing_config()
+                 .term_match_type() &&
+         actual.numeric_match_type ==
+             expected_property_config_proto.integer_indexing_config()
+                 .numeric_match_type();
+}
+
+MATCHER_P3(EqualsJoinablePropertyMetadata, expected_id, expected_property_path,
+           expected_property_config_proto, "") {
+  const JoinablePropertyMetadata& actual = arg;
+  return actual.id == expected_id && actual.path == expected_property_path &&
+         actual.data_type == expected_property_config_proto.data_type() &&
+         actual.value_type ==
+             expected_property_config_proto.joinable_config().value_type();
 }
 
+std::string StatusCodeToString(libtextclassifier3::StatusCode code);
+
+std::string ProtoStatusCodeToString(StatusProto::Code code);
+
 MATCHER(IsOk, "") {
   libtextclassifier3::StatusAdapter adapter(arg);
   if (adapter.status().ok()) {
@@ -274,6 +514,68 @@ MATCHER_P2(StatusIs, status_code, error_matcher, "") {
                             result_listener);
 }
 
+MATCHER(ProtoIsOk, "") {
+  if (arg.code() == StatusProto::OK) {
+    return true;
+  }
+  *result_listener << IcingStringUtil::StringPrintf(
+      "Expected OK, actual was (%s:%s)",
+      ProtoStatusCodeToString(arg.code()).c_str(), arg.message().c_str());
+  return false;
+}
+
+MATCHER_P(ProtoStatusIs, status_code, "") {
+  if (arg.code() == status_code) {
+    return true;
+  }
+  *result_listener << IcingStringUtil::StringPrintf(
+      "Expected (%s:), actual was (%s:%s)",
+      ProtoStatusCodeToString(status_code).c_str(),
+      ProtoStatusCodeToString(arg.code()).c_str(), arg.message().c_str());
+  return false;
+}
+
+MATCHER_P2(ProtoStatusIs, status_code, error_matcher, "") {
+  if (arg.code() != status_code) {
+    *result_listener << IcingStringUtil::StringPrintf(
+        "Expected (%s:), actual was (%s:%s)",
+        ProtoStatusCodeToString(status_code).c_str(),
+        ProtoStatusCodeToString(arg.code()).c_str(), arg.message().c_str());
+    return false;
+  }
+  return ExplainMatchResult(error_matcher, arg.message(), result_listener);
+}
+
+MATCHER_P(EqualsSearchResultIgnoreStatsAndScores, expected, "") {
+  SearchResultProto actual_copy = arg;
+  actual_copy.clear_query_stats();
+  actual_copy.clear_debug_info();
+  for (SearchResultProto::ResultProto& result :
+       *actual_copy.mutable_results()) {
+    // Joined results
+    for (SearchResultProto::ResultProto& joined_result :
+         *result.mutable_joined_results()) {
+      joined_result.clear_score();
+    }
+    result.clear_score();
+  }
+
+  SearchResultProto expected_copy = expected;
+  expected_copy.clear_query_stats();
+  expected_copy.clear_debug_info();
+  for (SearchResultProto::ResultProto& result :
+       *expected_copy.mutable_results()) {
+    // Joined results
+    for (SearchResultProto::ResultProto& joined_result :
+         *result.mutable_joined_results()) {
+      joined_result.clear_score();
+    }
+    result.clear_score();
+  }
+  return ExplainMatchResult(portable_equals_proto::EqualsProto(expected_copy),
+                            actual_copy, result_listener);
+}
+
 // TODO(tjbarron) Remove this once icing has switched to depend on TC3 Status
 #define ICING_STATUS_MACROS_CONCAT_NAME(x, y) \
   ICING_STATUS_MACROS_CONCAT_IMPL(x, y)
@@ -290,6 +592,10 @@ MATCHER_P2(StatusIs, status_code, error_matcher, "") {
   ICING_ASSERT_OK(statusor.status());                         \
   lhs = std::move(statusor).ValueOrDie()
 
+#define ICING_ASSERT_HAS_VALUE_AND_ASSIGN(lhs, rexpr) \
+  ASSERT_TRUE(rexpr);                                 \
+  lhs = rexpr.value()
+
 }  // namespace lib
 }  // namespace icing
 
diff --git a/icing/testing/fake-clock.h b/icing/testing/fake-clock.h
index 54b56c3..f451753 100644
--- a/icing/testing/fake-clock.h
+++ b/icing/testing/fake-clock.h
@@ -20,6 +20,22 @@
 namespace icing {
 namespace lib {
 
+// A fake timer class for tests. It makes sure that the elapsed time changes
+// every time it's requested.
+class FakeTimer : public Timer {
+ public:
+  int64_t GetElapsedMilliseconds() const override {
+    return fake_elapsed_milliseconds_;
+  }
+
+  void SetElapsedMilliseconds(int64_t elapsed_milliseconds) {
+    fake_elapsed_milliseconds_ = elapsed_milliseconds;
+  }
+
+ private:
+  int64_t fake_elapsed_milliseconds_ = 0;
+};
+
 // Wrapper around real-time clock functions. This is separated primarily so
 // tests can override this clock and inject it into the class under test.
 class FakeClock : public Clock {
@@ -30,8 +46,17 @@ class FakeClock : public Clock {
     milliseconds_ = milliseconds;
   }
 
+  std::unique_ptr<Timer> GetNewTimer() const override {
+    return std::make_unique<FakeTimer>(fake_timer_);
+  }
+
+  void SetTimerElapsedMilliseconds(int64_t timer_elapsed_milliseconds) {
+    fake_timer_.SetElapsedMilliseconds(timer_elapsed_milliseconds);
+  }
+
  private:
   int64_t milliseconds_ = 0;
+  FakeTimer fake_timer_;
 };
 
 }  // namespace lib
diff --git a/icing/testing/fake-clock_test.cc b/icing/testing/fake-clock_test.cc
index 3c75ae9..4b36727 100644
--- a/icing/testing/fake-clock_test.cc
+++ b/icing/testing/fake-clock_test.cc
@@ -24,7 +24,7 @@ namespace {
 
 using ::testing::Eq;
 
-TEST(FakeClockTest, GetSetOk) {
+TEST(FakeClockTest, GetSetSystemTimeOk) {
   FakeClock fake_clock;
   EXPECT_THAT(fake_clock.GetSystemTimeMilliseconds(), Eq(0));
 
@@ -35,6 +35,17 @@ TEST(FakeClockTest, GetSetOk) {
   EXPECT_THAT(fake_clock.GetSystemTimeMilliseconds(), Eq(-1));
 }
 
+TEST(FakeClockTest, GetSetTimerElapsedTimeOk) {
+  FakeClock fake_clock;
+  EXPECT_THAT(fake_clock.GetNewTimer()->GetElapsedMilliseconds(), Eq(0));
+
+  fake_clock.SetTimerElapsedMilliseconds(10);
+  EXPECT_THAT(fake_clock.GetNewTimer()->GetElapsedMilliseconds(), Eq(10));
+
+  fake_clock.SetTimerElapsedMilliseconds(-1);
+  EXPECT_THAT(fake_clock.GetNewTimer()->GetElapsedMilliseconds(), Eq(-1));
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/testing/hit-test-utils.cc b/icing/testing/hit-test-utils.cc
new file mode 100644
index 0000000..7ad8a64
--- /dev/null
+++ b/icing/testing/hit-test-utils.cc
@@ -0,0 +1,59 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/testing/hit-test-utils.h"
+
+namespace icing {
+namespace lib {
+
+// Returns a hit that has a delta of desired_byte_length from last_hit.
+Hit CreateHit(Hit last_hit, int desired_byte_length) {
+  Hit hit = (last_hit.section_id() == kMinSectionId)
+                ? Hit(kMaxSectionId, last_hit.document_id() + 1,
+                      last_hit.term_frequency())
+                : Hit(last_hit.section_id() - 1, last_hit.document_id(),
+                      last_hit.term_frequency());
+  uint8_t buf[5];
+  while (VarInt::Encode(last_hit.value() - hit.value(), buf) <
+         desired_byte_length) {
+    hit = (hit.section_id() == kMinSectionId)
+              ? Hit(kMaxSectionId, hit.document_id() + 1, hit.term_frequency())
+              : Hit(hit.section_id() - 1, hit.document_id(),
+                    hit.term_frequency());
+  }
+  return hit;
+}
+
+// Returns a vector of num_hits Hits with the first hit starting at start_docid
+// and with 1-byte deltas.
+std::vector<Hit> CreateHits(DocumentId start_docid, int num_hits,
+                            int desired_byte_length) {
+  std::vector<Hit> hits;
+  if (num_hits < 1) {
+    return hits;
+  }
+  hits.push_back(Hit(/*section_id=*/1, /*document_id=*/start_docid,
+                     Hit::kDefaultTermFrequency));
+  while (hits.size() < num_hits) {
+    hits.push_back(CreateHit(hits.back(), desired_byte_length));
+  }
+  return hits;
+}
+
+std::vector<Hit> CreateHits(int num_hits, int desired_byte_length) {
+  return CreateHits(/*start_docid=*/0, num_hits, desired_byte_length);
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/testing/hit-test-utils.h b/icing/testing/hit-test-utils.h
new file mode 100644
index 0000000..e236ec0
--- /dev/null
+++ b/icing/testing/hit-test-utils.h
@@ -0,0 +1,43 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_HIT_TEST_UTILS_H_
+#define ICING_TESTING_HIT_TEST_UTILS_H_
+
+#include <vector>
+
+#include "icing/index/hit/hit.h"
+#include "icing/legacy/index/icing-bit-util.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// Returns a hit that has a delta of desired_byte_length from last_hit.
+Hit CreateHit(Hit last_hit, int desired_byte_length);
+
+// Returns a vector of num_hits Hits with the first hit starting at start_docid
+// and with desired_byte_length deltas.
+std::vector<Hit> CreateHits(DocumentId start_docid, int num_hits,
+                            int desired_byte_length);
+
+// Returns a vector of num_hits Hits with the first hit starting at 0 and each
+// with desired_byte_length deltas.
+std::vector<Hit> CreateHits(int num_hits, int desired_byte_length);
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_HIT_TEST_UTILS_H_
diff --git a/icing/helpers/icu/icu-data-file-helper.cc b/icing/testing/icu-data-file-helper.cc
index 5cf6a1d..aaeb738 100644
--- a/icing/helpers/icu/icu-data-file-helper.cc
+++ b/icing/testing/icu-data-file-helper.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/testing/icu-data-file-helper.h"
 
 #include <sys/mman.h>
 
@@ -49,8 +49,6 @@ libtextclassifier3::Status SetUpICUDataFile(
     return absl_ports::InternalError("Unable to open file at provided path");
   }
 
-  // TODO(samzheng): figure out why icing::MemoryMappedFile causes
-  // segmentation fault here.
   const void* data =
       mmap(nullptr, file_size, PROT_READ, MAP_PRIVATE, fd.get(), 0);
 
diff --git a/icing/helpers/icu/icu-data-file-helper.h b/icing/testing/icu-data-file-helper.h
index 90f5bc7..d0276e7 100644
--- a/icing/helpers/icu/icu-data-file-helper.h
+++ b/icing/testing/icu-data-file-helper.h
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef ICING_HELPERS_ICU_ICU_DATA_FILE_HELPER
-#define ICING_HELPERS_ICU_ICU_DATA_FILE_HELPER
+#ifndef ICING_TESTING_ICU_DATA_FILE_HELPER
+#define ICING_TESTING_ICU_DATA_FILE_HELPER
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 
@@ -40,4 +40,4 @@ libtextclassifier3::Status SetUpICUDataFile(
 }  // namespace lib
 }  // namespace icing
 
-#endif  // ICING_HELPERS_ICU_ICU_DATA_FILE_HELPER
+#endif  // ICING_TESTING_ICU_DATA_FILE_HELPER
diff --git a/icing/testing/icu-i18n-test-utils.cc b/icing/testing/icu-i18n-test-utils.cc
index 09878db..50dc26c 100644
--- a/icing/testing/icu-i18n-test-utils.cc
+++ b/icing/testing/icu-i18n-test-utils.cc
@@ -29,7 +29,7 @@ std::string UCharToString(UChar32 uchar) {
   uint8_t utf8_buffer[4];  // U8_APPEND writes 0 to 4 bytes
 
   int utf8_index = 0;
-  UBool has_error = FALSE;
+  UBool has_error = false;
 
   // utf8_index is advanced to the end of the contents if successful
   U8_APPEND(utf8_buffer, utf8_index, sizeof(utf8_buffer), uchar, has_error);
diff --git a/icing/testing/jni-test-helpers.h b/icing/testing/jni-test-helpers.h
index adc469a..67a98c3 100644
--- a/icing/testing/jni-test-helpers.h
+++ b/icing/testing/jni-test-helpers.h
@@ -15,6 +15,8 @@
 #ifndef ICING_TESTING_JNI_TEST_HELPERS_H_
 #define ICING_TESTING_JNI_TEST_HELPERS_H_
 
+#include <memory>
+
 #include "icing/jni/jni-cache.h"
 
 #ifdef ICING_REVERSE_JNI_SEGMENTATION
diff --git a/icing/testing/numeric/normal-distribution-number-generator.h b/icing/testing/numeric/normal-distribution-number-generator.h
new file mode 100644
index 0000000..73cdd1f
--- /dev/null
+++ b/icing/testing/numeric/normal-distribution-number-generator.h
@@ -0,0 +1,42 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_NUMERIC_NORMAL_DISTRIBUTION_NUMBER_GENERATOR_H_
+#define ICING_TESTING_NUMERIC_NORMAL_DISTRIBUTION_NUMBER_GENERATOR_H_
+
+#include <cmath>
+#include <random>
+
+#include "icing/testing/numeric/number-generator.h"
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class NormalDistributionNumberGenerator : public NumberGenerator<T> {
+ public:
+  explicit NormalDistributionNumberGenerator(int seed, double mean,
+                                             double stddev)
+      : NumberGenerator<T>(seed), distribution_(mean, stddev) {}
+
+  T Generate() override { return std::round(distribution_(this->engine_)); }
+
+ private:
+  std::normal_distribution<> distribution_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_NUMERIC_NORMAL_DISTRIBUTION_NUMBER_GENERATOR_H_
diff --git a/icing/testing/numeric/number-generator.h b/icing/testing/numeric/number-generator.h
new file mode 100644
index 0000000..bb601b4
--- /dev/null
+++ b/icing/testing/numeric/number-generator.h
@@ -0,0 +1,39 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_NUMERIC_NUMBER_GENERATOR_H_
+#define ICING_TESTING_NUMERIC_NUMBER_GENERATOR_H_
+
+#include <random>
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class NumberGenerator {
+ public:
+  virtual ~NumberGenerator() = default;
+
+  virtual T Generate() = 0;
+
+ protected:
+  explicit NumberGenerator(int seed) : engine_(seed) {}
+
+  std::default_random_engine engine_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_NUMERIC_NUMBER_GENERATOR_H_
diff --git a/icing/testing/numeric/uniform-distribution-integer-generator.h b/icing/testing/numeric/uniform-distribution-integer-generator.h
new file mode 100644
index 0000000..569eebd
--- /dev/null
+++ b/icing/testing/numeric/uniform-distribution-integer-generator.h
@@ -0,0 +1,41 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_NUMERIC_UNIFORM_DISTRIBUTION_INTEGER_GENERATOR_H_
+#define ICING_TESTING_NUMERIC_UNIFORM_DISTRIBUTION_INTEGER_GENERATOR_H_
+
+#include <random>
+
+#include "icing/testing/numeric/number-generator.h"
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class UniformDistributionIntegerGenerator : public NumberGenerator<T> {
+ public:
+  explicit UniformDistributionIntegerGenerator(int seed, T range_lower,
+                                               T range_upper)
+      : NumberGenerator<T>(seed), distribution_(range_lower, range_upper) {}
+
+  T Generate() override { return distribution_(this->engine_); }
+
+ private:
+  std::uniform_int_distribution<T> distribution_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_NUMERIC_UNIFORM_DISTRIBUTION_INTEGER_GENERATOR_H_
diff --git a/icing/testing/random-string.cc b/icing/testing/random-string.cc
new file mode 100644
index 0000000..27f83bc
--- /dev/null
+++ b/icing/testing/random-string.cc
@@ -0,0 +1,54 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/testing/random-string.h"
+
+namespace icing {
+namespace lib {
+
+std::vector<std::string> GenerateUniqueTerms(int num_terms) {
+  char before_a = 'a' - 1;
+  std::string term(1, before_a);
+  std::vector<std::string> terms;
+  int current_char = 0;
+  for (int permutation = 0; permutation < num_terms; ++permutation) {
+    if (term[current_char] != 'z') {
+      ++term[current_char];
+    } else {
+      if (current_char < term.length() - 1) {
+        // The string currently looks something like this "zzzaa"
+        // 1. Find the first char after this one that isn't
+        current_char = term.find_first_not_of('z', current_char);
+        if (current_char != std::string::npos) {
+          // 2. Increment that character
+          ++term[current_char];
+
+          // 3. Set every character prior to current_char to 'a'
+          term.replace(0, current_char, current_char, 'a');
+        } else {
+          // Every character in this string is a 'z'. We need to grow.
+          term = std::string(term.length() + 1, 'a');
+        }
+      } else {
+        term = std::string(term.length() + 1, 'a');
+      }
+      current_char = 0;
+    }
+    terms.push_back(term);
+  }
+  return terms;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/testing/random-string.h b/icing/testing/random-string.h
index 1510e15..a313c1c 100644
--- a/icing/testing/random-string.h
+++ b/icing/testing/random-string.h
@@ -15,6 +15,7 @@
 #ifndef ICING_TESTING_RANDOM_STRING_H_
 #define ICING_TESTING_RANDOM_STRING_H_
 
+#include <algorithm>
 #include <random>
 #include <string>
 
@@ -24,10 +25,19 @@ namespace lib {
 inline constexpr std::string_view kAlNumAlphabet =
     "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
 
+// Average length of word in English is 4.7 characters.
+inline constexpr int kAvgTokenLen = 5;
+// Made up value. This results in a fairly reasonable language - the majority of
+// generated words are 3-9 characters, ~3% of words are >=20 chars, and the
+// longest ones are 27 chars, (roughly consistent with the longest,
+// non-contrived English words
+// https://en.wikipedia.org/wiki/Longest_word_in_English)
+inline constexpr int kTokenStdDev = 7;
+
 template <typename Gen>
 std::string RandomString(const std::string_view alphabet, size_t len,
                          Gen* gen) {
-  std::uniform_int_distribution<size_t> uniform(0u, alphabet.size());
+  std::uniform_int_distribution<size_t> uniform(0u, alphabet.size() - 1);
   std::string result(len, '\0');
   std::generate(
       std::begin(result), std::end(result),
@@ -36,6 +46,26 @@ std::string RandomString(const std::string_view alphabet, size_t len,
   return result;
 }
 
+// Creates a vector containing num_words randomly-generated words for use by
+// documents.
+template <typename Rand>
+std::vector<std::string> CreateLanguages(int num_words, Rand* r) {
+  std::vector<std::string> language;
+  std::normal_distribution<> norm_dist(kAvgTokenLen, kTokenStdDev);
+  while (--num_words >= 0) {
+    int word_length = 0;
+    while (word_length < 1) {
+      word_length = std::round(norm_dist(*r));
+    }
+    language.push_back(RandomString(kAlNumAlphabet, word_length, r));
+  }
+  return language;
+}
+
+// Returns a vector containing num_terms unique terms. Terms are created in
+// non-random order starting with "a" to "z" to "aa" to "zz", etc.
+std::vector<std::string> GenerateUniqueTerms(int num_terms);
+
 }  // namespace lib
 }  // namespace icing
 
diff --git a/icing/testing/random-string_test.cc b/icing/testing/random-string_test.cc
new file mode 100644
index 0000000..759fec0
--- /dev/null
+++ b/icing/testing/random-string_test.cc
@@ -0,0 +1,54 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/testing/random-string.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+TEST(RandomStringTest, GenerateUniqueTerms) {
+  EXPECT_THAT(GenerateUniqueTerms(0), IsEmpty());
+  EXPECT_THAT(GenerateUniqueTerms(1), ElementsAre("a"));
+  EXPECT_THAT(GenerateUniqueTerms(4), ElementsAre("a", "b", "c", "d"));
+  EXPECT_THAT(GenerateUniqueTerms(29),
+              ElementsAre("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k",
+                          "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v",
+                          "w", "x", "y", "z", "aa", "ba", "ca"));
+  EXPECT_THAT(GenerateUniqueTerms(56),
+              ElementsAre("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k",
+                          "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v",
+                          "w", "x", "y", "z", "aa", "ba", "ca", "da", "ea",
+                          "fa", "ga", "ha", "ia", "ja", "ka", "la", "ma", "na",
+                          "oa", "pa", "qa", "ra", "sa", "ta", "ua", "va", "wa",
+                          "xa", "ya", "za", "ab", "bb", "cb", "db"));
+  EXPECT_THAT(GenerateUniqueTerms(56).at(54), Eq("cb"));
+  EXPECT_THAT(GenerateUniqueTerms(26 * 26 * 26).at(26), Eq("aa"));
+  EXPECT_THAT(GenerateUniqueTerms(26 * 26 * 26).at(26 * 27), Eq("aaa"));
+  EXPECT_THAT(GenerateUniqueTerms(26 * 26 * 26).at(26 * 27 - 6), Eq("uz"));
+  EXPECT_THAT(GenerateUniqueTerms(26 * 26 * 26).at(26 * 27 + 5), Eq("faa"));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/testing/schema-generator.h b/icing/testing/schema-generator.h
index e733612..8de8d06 100644
--- a/icing/testing/schema-generator.h
+++ b/icing/testing/schema-generator.h
@@ -18,8 +18,8 @@
 #include <random>
 #include <string>
 
-#include "icing/proto/schema.proto.h"
 #include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
 
 namespace icing {
 namespace lib {
@@ -31,13 +31,16 @@ class ExactStringPropertyGenerator {
     prop.set_property_name(name.data(), name.length());
     prop.set_data_type(PropertyConfigProto::DataType::STRING);
     prop.set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    IndexingConfig* indexing_config = prop.mutable_indexing_config();
-    indexing_config->set_term_match_type(TermMatchType::EXACT_ONLY);
-    indexing_config->set_tokenizer_type(IndexingConfig::TokenizerType::PLAIN);
+    StringIndexingConfig* string_indexing_config =
+        prop.mutable_string_indexing_config();
+    string_indexing_config->set_term_match_type(TermMatchType::EXACT_ONLY);
+    string_indexing_config->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
     return prop;
   }
 };
 
+// Schema generator with random number of properties
 template <typename Rand, typename PropertyGenerator>
 class RandomSchemaGenerator {
  public:
@@ -69,6 +72,37 @@ class RandomSchemaGenerator {
   PropertyGenerator* prop_generator_;
 };
 
+// Schema generator with number of properties specified by the caller
+template <typename PropertyGenerator>
+class SchemaGenerator {
+ public:
+  explicit SchemaGenerator(int num_properties,
+                           PropertyGenerator* prop_generator)
+      : num_properties_(num_properties), prop_generator_(prop_generator) {}
+
+  SchemaProto GenerateSchema(int num_types) {
+    SchemaProto schema;
+    while (--num_types >= 0) {
+      SetType(schema.add_types(), "Type" + std::to_string(num_types),
+              num_properties_);
+    }
+    return schema;
+  }
+
+ private:
+  void SetType(SchemaTypeConfigProto* type_config, std::string_view name,
+               int num_properties) const {
+    type_config->set_schema_type(name.data(), name.length());
+    while (--num_properties >= 0) {
+      std::string prop_name = "Prop" + std::to_string(num_properties);
+      (*type_config->add_properties()) = (*prop_generator_)(prop_name);
+    }
+  }
+
+  int num_properties_;
+  PropertyGenerator* prop_generator_;
+};
+
 }  // namespace lib
 }  // namespace icing
 
diff --git a/icing/testing/snippet-helpers.cc b/icing/testing/snippet-helpers.cc
deleted file mode 100644
index fde0004..0000000
--- a/icing/testing/snippet-helpers.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/testing/snippet-helpers.h"
-
-#include <algorithm>
-#include <string_view>
-
-#include "icing/proto/search.pb.h"
-
-namespace icing {
-namespace lib {
-
-const SnippetMatchProto* GetSnippetMatch(const SnippetProto& snippet_proto,
-                                         const std::string& property_name,
-                                         int snippet_index) {
-  auto iterator = std::find_if(
-      snippet_proto.entries().begin(), snippet_proto.entries().end(),
-      [&property_name](const SnippetProto::EntryProto& entry) {
-        return entry.property_name() == property_name;
-      });
-  if (iterator == snippet_proto.entries().end() ||
-      iterator->snippet_matches_size() <= snippet_index) {
-    return nullptr;
-  }
-  return &iterator->snippet_matches(snippet_index);
-}
-
-const PropertyProto* GetProperty(const DocumentProto& document,
-                                 const std::string& property_name) {
-  const PropertyProto* property = nullptr;
-  for (const PropertyProto& prop : document.properties()) {
-    if (prop.name() == property_name) {
-      property = &prop;
-    }
-  }
-  return property;
-}
-
-std::string GetWindow(const DocumentProto& document,
-                      const SnippetProto& snippet_proto,
-                      const std::string& property_name, int snippet_index) {
-  const SnippetMatchProto* match =
-      GetSnippetMatch(snippet_proto, property_name, snippet_index);
-  const PropertyProto* property = GetProperty(document, property_name);
-  if (match == nullptr || property == nullptr) {
-    return "";
-  }
-  std::string_view value = property->string_values(match->values_index());
-  return std::string(
-      value.substr(match->window_position(), match->window_bytes()));
-}
-
-std::string GetMatch(const DocumentProto& document,
-                     const SnippetProto& snippet_proto,
-                     const std::string& property_name, int snippet_index) {
-  const SnippetMatchProto* match =
-      GetSnippetMatch(snippet_proto, property_name, snippet_index);
-  const PropertyProto* property = GetProperty(document, property_name);
-  if (match == nullptr || property == nullptr) {
-    return "";
-  }
-  std::string_view value = property->string_values(match->values_index());
-  return std::string(
-      value.substr(match->exact_match_position(), match->exact_match_bytes()));
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/testing/snippet-helpers.h b/icing/testing/snippet-helpers.h
deleted file mode 100644
index 124e421..0000000
--- a/icing/testing/snippet-helpers.h
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TESTING_SNIPPET_HELPERS_H_
-#define ICING_TESTING_SNIPPET_HELPERS_H_
-
-#include <string>
-
-#include "icing/proto/document.pb.h"
-#include "icing/proto/search.pb.h"
-
-namespace icing {
-namespace lib {
-
-// Retrieve pointer to the snippet_index'th SnippetMatchProto within the
-// EntryProto identified by property_name within snippet_proto.
-// Returns nullptr
-//   - if there is no EntryProto within snippet_proto corresponding to
-//     property_name.
-//   - if there is no SnippetMatchProto at snippet_index within the EntryProto
-const SnippetMatchProto* GetSnippetMatch(const SnippetProto& snippet_proto,
-                                         const std::string& property_name,
-                                         int snippet_index);
-
-// Retrieve pointer to the PropertyProto identified by property_name.
-// Returns nullptr if no such property exists.
-const PropertyProto* GetProperty(const DocumentProto& document,
-                                 const std::string& property_name);
-
-// Retrieves the window defined by the SnippetMatchProto returned by
-// GetSnippetMatch(snippet_proto, property_name, snippet_index) for the property
-// returned by GetProperty(document, property_name).
-// Returns "" if no such property, snippet or window exists.
-std::string GetWindow(const DocumentProto& document,
-                      const SnippetProto& snippet_proto,
-                      const std::string& property_name, int snippet_index);
-
-// Retrieves the match defined by the SnippetMatchProto returned by
-// GetSnippetMatch(snippet_proto, property_name, snippet_index) for the property
-// returned by GetProperty(document, property_name).
-// Returns "" if no such property or snippet exists.
-std::string GetMatch(const DocumentProto& document,
-                     const SnippetProto& snippet_proto,
-                     const std::string& property_name, int snippet_index);
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_TESTING_SNIPPET_HELPERS_H_
diff --git a/icing/text_classifier/lib3/utils/base/logging.h b/icing/text_classifier/lib3/utils/base/logging.h
index bf02f65..92d775e 100644
--- a/icing/text_classifier/lib3/utils/base/logging.h
+++ b/icing/text_classifier/lib3/utils/base/logging.h
@@ -22,7 +22,6 @@
 #include "icing/text_classifier/lib3/utils/base/logging_levels.h"
 #include "icing/text_classifier/lib3/utils/base/port.h"
 
-
 namespace libtextclassifier3 {
 namespace logging {
 
diff --git a/icing/text_classifier/lib3/utils/base/statusor.h b/icing/text_classifier/lib3/utils/base/statusor.h
index f5fae7a..aa1e598 100644
--- a/icing/text_classifier/lib3/utils/base/statusor.h
+++ b/icing/text_classifier/lib3/utils/base/statusor.h
@@ -86,6 +86,8 @@ class StatusOr {
   // Conversion assignment operator, T must be assignable from U
   template <typename U>
   inline StatusOr& operator=(const StatusOr<U>& other);
+  template <typename U>
+  inline StatusOr& operator=(StatusOr<U>&& other);
 
   inline ~StatusOr();
 
@@ -134,6 +136,40 @@ class StatusOr {
   friend class StatusOr;
 
  private:
+  void Clear() {
+    if (ok()) {
+      value_.~T();
+    }
+  }
+
+  // Construct the value through placement new with the passed argument.
+  template <typename... Arg>
+  void MakeValue(Arg&&... arg) {
+    new (&value_) T(std::forward<Arg>(arg)...);
+  }
+
+  // Creates a valid instance of type T constructed with U and assigns it to
+  // value_. Handles how to properly assign to value_ if value_ was never
+  // actually initialized (if this is currently non-OK).
+  template <typename U>
+  void AssignValue(U&& value) {
+    if (ok()) {
+      value_ = std::forward<U>(value);
+    } else {
+      MakeValue(std::forward<U>(value));
+      status_ = Status::OK;
+    }
+  }
+
+  // Creates a status constructed with U and assigns it to status_. It also
+  // properly destroys value_ if this is OK and value_ represents a valid
+  // instance of T.
+  template <typename U>
+  void AssignStatus(U&& v) {
+    Clear();
+    status_ = static_cast<Status>(std::forward<U>(v));
+  }
+
   Status status_;
   // The members of unions do not require initialization and are not destructed
   // unless specifically called. This allows us to construct instances of
@@ -165,12 +201,19 @@ template <typename T>
 inline StatusOr<T>::StatusOr(T&& value) : value_(std::move(value)) {}
 
 template <typename T>
-inline StatusOr<T>::StatusOr(const StatusOr& other)
-    : status_(other.status_), value_(other.value_) {}
+inline StatusOr<T>::StatusOr(const StatusOr& other) : status_(other.status_) {
+  if (other.ok()) {
+    MakeValue(other.value_);
+  }
+}
 
 template <typename T>
 inline StatusOr<T>::StatusOr(StatusOr&& other)
-    : status_(other.status_), value_(std::move(other.value_)) {}
+    : status_(std::move(other.status_)) {
+  if (other.ok()) {
+    MakeValue(std::move(other.value_));
+  }
+}
 
 template <typename T>
 template <
@@ -180,7 +223,11 @@ template <
                                       std::is_convertible<const U&, T>>::value,
                      int>>
 inline StatusOr<T>::StatusOr(const StatusOr<U>& other)
-    : status_(other.status_), value_(other.value_) {}
+    : status_(other.status_) {
+  if (other.ok()) {
+    MakeValue(other.value_);
+  }
+}
 
 template <typename T>
 template <typename U,
@@ -189,7 +236,11 @@ template <typename U,
                                             std::is_convertible<U&&, T>>::value,
                            int>>
 inline StatusOr<T>::StatusOr(StatusOr<U>&& other)
-    : status_(other.status_), value_(std::move(other.value_)) {}
+    : status_(std::move(other.status_)) {
+  if (other.ok()) {
+    MakeValue(std::move(other.value_));
+  }
+}
 
 template <typename T>
 template <
@@ -210,35 +261,47 @@ inline StatusOr<T>::StatusOr(U&& value) : StatusOr(T(std::forward<U>(value))) {}
 
 template <typename T>
 inline StatusOr<T>& StatusOr<T>::operator=(const StatusOr& other) {
-  status_ = other.status_;
-  if (status_.ok()) {
-    value_ = other.value_;
+  if (other.ok()) {
+    AssignValue(other.value_);
+  } else {
+    AssignStatus(other.status_);
   }
   return *this;
 }
 
 template <typename T>
 inline StatusOr<T>& StatusOr<T>::operator=(StatusOr&& other) {
-  status_ = other.status_;
-  if (status_.ok()) {
-    value_ = std::move(other.value_);
+  if (other.ok()) {
+    AssignValue(std::move(other.value_));
+  } else {
+    AssignStatus(std::move(other.status_));
   }
   return *this;
 }
 
 template <typename T>
 inline StatusOr<T>::~StatusOr() {
-  if (ok()) {
-    value_.~T();
-  }
+  Clear();
 }
 
 template <typename T>
 template <typename U>
 inline StatusOr<T>& StatusOr<T>::operator=(const StatusOr<U>& other) {
-  status_ = other.status_;
-  if (status_.ok()) {
-    value_ = other.value_;
+  if (other.ok()) {
+    AssignValue(other.value_);
+  } else {
+    AssignStatus(other.status_);
+  }
+  return *this;
+}
+
+template <typename T>
+template <typename U>
+inline StatusOr<T>& StatusOr<T>::operator=(StatusOr<U>&& other) {
+  if (other.ok()) {
+    AssignValue(std::move(other.value_));
+  } else {
+    AssignStatus(std::move(other.status_));
   }
   return *this;
 }
diff --git a/icing/text_classifier/lib3/utils/java/jni-base.cc b/icing/text_classifier/lib3/utils/java/jni-base.cc
index 897628c..e97e8b9 100644
--- a/icing/text_classifier/lib3/utils/java/jni-base.cc
+++ b/icing/text_classifier/lib3/utils/java/jni-base.cc
@@ -22,11 +22,13 @@ bool EnsureLocalCapacity(JNIEnv* env, int capacity) {
   return env->EnsureLocalCapacity(capacity) == JNI_OK;
 }
 
-bool JniExceptionCheckAndClear(JNIEnv* env) {
+bool JniExceptionCheckAndClear(JNIEnv* env, bool print_exception_on_error) {
   TC3_CHECK(env != nullptr);
   const bool result = env->ExceptionCheck();
   if (result) {
-    env->ExceptionDescribe();
+    if (print_exception_on_error) {
+      env->ExceptionDescribe();
+    }
     env->ExceptionClear();
   }
   return result;
diff --git a/icing/text_classifier/lib3/utils/java/jni-base.h b/icing/text_classifier/lib3/utils/java/jni-base.h
index 5876eba..f86434b 100644
--- a/icing/text_classifier/lib3/utils/java/jni-base.h
+++ b/icing/text_classifier/lib3/utils/java/jni-base.h
@@ -17,6 +17,7 @@
 
 #include <jni.h>
 
+#include <memory>
 #include <string>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -63,7 +64,8 @@ namespace libtextclassifier3 {
 bool EnsureLocalCapacity(JNIEnv* env, int capacity);
 
 // Returns true if there was an exception. Also it clears the exception.
-bool JniExceptionCheckAndClear(JNIEnv* env);
+bool JniExceptionCheckAndClear(JNIEnv* env,
+                               bool print_exception_on_error = true);
 
 // A deleter to be used with std::unique_ptr to delete JNI global references.
 class GlobalRefDeleter {
diff --git a/icing/text_classifier/lib3/utils/java/jni-helper.h b/icing/text_classifier/lib3/utils/java/jni-helper.h
index 907ad0d..4e548ec 100644
--- a/icing/text_classifier/lib3/utils/java/jni-helper.h
+++ b/icing/text_classifier/lib3/utils/java/jni-helper.h
@@ -150,8 +150,10 @@ class JniHelper {
                                            jmethodID method_id, ...);
 
   template <class T>
-  static StatusOr<T> CallStaticIntMethod(JNIEnv* env, jclass clazz,
-                                         jmethodID method_id, ...);
+  static StatusOr<T> CallStaticIntMethod(JNIEnv* env,
+                                         bool print_exception_on_error,
+                                         jclass clazz, jmethodID method_id,
+                                         ...);
 };
 
 template <typename T>
@@ -167,14 +169,19 @@ StatusOr<ScopedLocalRef<T>> JniHelper::GetObjectArrayElement(JNIEnv* env,
 }
 
 template <class T>
-StatusOr<T> JniHelper::CallStaticIntMethod(JNIEnv* env, jclass clazz,
-                                           jmethodID method_id, ...) {
+StatusOr<T> JniHelper::CallStaticIntMethod(JNIEnv* env,
+                                           bool print_exception_on_error,
+                                           jclass clazz, jmethodID method_id,
+                                           ...) {
   va_list args;
   va_start(args, method_id);
   jint result = env->CallStaticIntMethodV(clazz, method_id, args);
   va_end(args);
 
-  TC3_NO_EXCEPTION_OR_RETURN;
+  if (JniExceptionCheckAndClear(env, print_exception_on_error)) {
+    return {Status::UNKNOWN};
+  }
+
   return result;
 }
 
diff --git a/icing/tokenization/combined-tokenizer_test.cc b/icing/tokenization/combined-tokenizer_test.cc
new file mode 100644
index 0000000..0e400e2
--- /dev/null
+++ b/icing/tokenization/combined-tokenizer_test.cc
@@ -0,0 +1,262 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string_view>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/tokenizer-factory.h"
+#include "icing/tokenization/tokenizer.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+
+// This test exists to ensure that the different tokenizers treat different
+// segments of text in the same manner.
+class CombinedTokenizerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+    jni_cache_ = GetTestJniCache();
+
+    language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                         jni_cache_.get());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        lang_segmenter_,
+        language_segmenter_factory::Create(std::move(options)));
+  }
+
+  std::unique_ptr<const JniCache> jni_cache_;
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+};
+
+std::vector<std::string> GetTokenTerms(const std::vector<Token>& tokens) {
+  std::vector<std::string> terms;
+  terms.reserve(tokens.size());
+  for (const Token& token : tokens) {
+    if (token.type == Token::Type::REGULAR) {
+      terms.push_back(std::string(token.text));
+    }
+  }
+  return terms;
+}
+
+}  // namespace
+
+TEST_F(CombinedTokenizerTest, SpecialCharacters) {
+  const std::string_view kText = "😊 Hello! Goodbye?";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> indexing_tokenizer,
+      tokenizer_factory::CreateIndexingTokenizer(
+          StringIndexingConfig::TokenizerType::PLAIN, lang_segmenter_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> query_tokenizer,
+      CreateQueryTokenizer(tokenizer_factory::QueryTokenizerType::RAW_QUERY,
+                           lang_segmenter_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> indexing_tokens,
+                             indexing_tokenizer->TokenizeAll(kText));
+  std::vector<std::string> indexing_terms = GetTokenTerms(indexing_tokens);
+  EXPECT_THAT(indexing_terms, ElementsAre("😊", "Hello", "Goodbye"));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
+                             query_tokenizer->TokenizeAll(kText));
+  std::vector<std::string> query_terms = GetTokenTerms(query_tokens);
+  EXPECT_THAT(query_terms, ElementsAre("😊", "Hello", "Goodbye"));
+}
+
+TEST_F(CombinedTokenizerTest, Parentheses) {
+  const std::string_view kText = "((paren1)(paren2) (last paren))";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> indexing_tokenizer,
+      tokenizer_factory::CreateIndexingTokenizer(
+          StringIndexingConfig::TokenizerType::PLAIN, lang_segmenter_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> query_tokenizer,
+      CreateQueryTokenizer(tokenizer_factory::QueryTokenizerType::RAW_QUERY,
+                           lang_segmenter_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> indexing_tokens,
+                             indexing_tokenizer->TokenizeAll(kText));
+  std::vector<std::string> indexing_terms = GetTokenTerms(indexing_tokens);
+  EXPECT_THAT(indexing_terms, ElementsAre("paren1", "paren2", "last", "paren"));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
+                             query_tokenizer->TokenizeAll(kText));
+  std::vector<std::string> query_terms = GetTokenTerms(query_tokens);
+  EXPECT_THAT(query_terms, ElementsAre("paren1", "paren2", "last", "paren"));
+}
+
+TEST_F(CombinedTokenizerTest, Negation) {
+  const std::string_view kText = "-foo -bar -baz";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> indexing_tokenizer,
+      tokenizer_factory::CreateIndexingTokenizer(
+          StringIndexingConfig::TokenizerType::PLAIN, lang_segmenter_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> query_tokenizer,
+      CreateQueryTokenizer(tokenizer_factory::QueryTokenizerType::RAW_QUERY,
+                           lang_segmenter_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> indexing_tokens,
+                             indexing_tokenizer->TokenizeAll(kText));
+  std::vector<std::string> indexing_terms = GetTokenTerms(indexing_tokens);
+  EXPECT_THAT(indexing_terms, ElementsAre("foo", "bar", "baz"));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
+                             query_tokenizer->TokenizeAll(kText));
+  std::vector<std::string> query_terms = GetTokenTerms(query_tokens);
+  EXPECT_THAT(query_terms, ElementsAre("foo", "bar", "baz"));
+}
+
+// TODO(b/254874614): Handle colon word breaks in ICU 72+
+TEST_F(CombinedTokenizerTest, Colons) {
+  const std::string_view kText = ":foo: :bar baz:";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> indexing_tokenizer,
+      tokenizer_factory::CreateIndexingTokenizer(
+          StringIndexingConfig::TokenizerType::PLAIN, lang_segmenter_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> query_tokenizer,
+      CreateQueryTokenizer(tokenizer_factory::QueryTokenizerType::RAW_QUERY,
+                           lang_segmenter_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> indexing_tokens,
+                             indexing_tokenizer->TokenizeAll(kText));
+  std::vector<std::string> indexing_terms = GetTokenTerms(indexing_tokens);
+  EXPECT_THAT(indexing_terms, ElementsAre("foo", "bar", "baz"));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
+                             query_tokenizer->TokenizeAll(kText));
+  std::vector<std::string> query_terms = GetTokenTerms(query_tokens);
+  EXPECT_THAT(query_terms, ElementsAre("foo", "bar", "baz"));
+}
+
+// TODO(b/254874614): Handle colon word breaks in ICU 72+
+TEST_F(CombinedTokenizerTest, ColonsPropertyRestricts) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> indexing_tokenizer,
+      tokenizer_factory::CreateIndexingTokenizer(
+          StringIndexingConfig::TokenizerType::PLAIN, lang_segmenter_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> query_tokenizer,
+      CreateQueryTokenizer(tokenizer_factory::QueryTokenizerType::RAW_QUERY,
+                           lang_segmenter_.get()));
+
+  if (GetIcuTokenizationVersion() >= 72) {
+    // In ICU 72+ and above, ':' are no longer considered word connectors. The
+    // query tokenizer should still consider them to be property restricts.
+    constexpr std::string_view kText = "foo:bar";
+    ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> indexing_tokens,
+                               indexing_tokenizer->TokenizeAll(kText));
+    std::vector<std::string> indexing_terms = GetTokenTerms(indexing_tokens);
+    EXPECT_THAT(indexing_terms, ElementsAre("foo", "bar"));
+
+    ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
+                               query_tokenizer->TokenizeAll(kText));
+    std::vector<std::string> query_terms = GetTokenTerms(query_tokens);
+    EXPECT_THAT(query_terms, ElementsAre("bar"));
+
+    // This difference, however, should only apply to the first ':'. Both should
+    // consider a second ':' to be a word break.
+    constexpr std::string_view kText2 = "foo:bar:baz";
+    ICING_ASSERT_OK_AND_ASSIGN(indexing_tokens,
+                               indexing_tokenizer->TokenizeAll(kText2));
+    indexing_terms = GetTokenTerms(indexing_tokens);
+    EXPECT_THAT(indexing_terms, ElementsAre("foo", "bar", "baz"));
+
+    ICING_ASSERT_OK_AND_ASSIGN(query_tokens,
+                               query_tokenizer->TokenizeAll(kText2));
+    query_terms = GetTokenTerms(query_tokens);
+    EXPECT_THAT(query_terms, ElementsAre("bar", "baz"));
+  } else {
+    // This is a difference between the two tokenizers. "foo:bar" is a single
+    // token to the plain tokenizer because ':' is a word connector. But
+    // "foo:bar" is a property restrict to the query tokenizer - so "foo" is the
+    // property and "bar" is the only text term.
+    constexpr std::string_view kText = "foo:bar";
+    ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> indexing_tokens,
+                               indexing_tokenizer->TokenizeAll(kText));
+    std::vector<std::string> indexing_terms = GetTokenTerms(indexing_tokens);
+    EXPECT_THAT(indexing_terms, ElementsAre("foo:bar"));
+
+    ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
+                               query_tokenizer->TokenizeAll(kText));
+    std::vector<std::string> query_terms = GetTokenTerms(query_tokens);
+    EXPECT_THAT(query_terms, ElementsAre("bar"));
+
+    // This difference, however, should only apply to the first ':'. A
+    // second ':' should be treated by both tokenizers as a word connector.
+    constexpr std::string_view kText2 = "foo:bar:baz";
+    ICING_ASSERT_OK_AND_ASSIGN(indexing_tokens,
+                               indexing_tokenizer->TokenizeAll(kText2));
+    indexing_terms = GetTokenTerms(indexing_tokens);
+    EXPECT_THAT(indexing_terms, ElementsAre("foo:bar:baz"));
+
+    ICING_ASSERT_OK_AND_ASSIGN(query_tokens,
+                               query_tokenizer->TokenizeAll(kText2));
+    query_terms = GetTokenTerms(query_tokens);
+    EXPECT_THAT(query_terms, ElementsAre("bar:baz"));
+  }
+}
+
+TEST_F(CombinedTokenizerTest, Punctuation) {
+  const std::string_view kText = "Who? What!? Why & How.";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> indexing_tokenizer,
+      tokenizer_factory::CreateIndexingTokenizer(
+          StringIndexingConfig::TokenizerType::PLAIN, lang_segmenter_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> query_tokenizer,
+      CreateQueryTokenizer(tokenizer_factory::QueryTokenizerType::RAW_QUERY,
+                           lang_segmenter_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> indexing_tokens,
+                             indexing_tokenizer->TokenizeAll(kText));
+  std::vector<std::string> indexing_terms = GetTokenTerms(indexing_tokens);
+  EXPECT_THAT(indexing_terms, ElementsAre("Who", "What", "Why", "How"));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
+                             query_tokenizer->TokenizeAll(kText));
+  std::vector<std::string> query_terms = GetTokenTerms(query_tokens);
+  EXPECT_THAT(query_terms, ElementsAre("Who", "What", "Why", "How"));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/tokenization/icu/icu-language-segmenter-factory.cc b/icing/tokenization/icu/icu-language-segmenter-factory.cc
index 0ef1824..7b095b4 100644
--- a/icing/tokenization/icu/icu-language-segmenter-factory.cc
+++ b/icing/tokenization/icu/icu-language-segmenter-factory.cc
@@ -15,6 +15,7 @@
 #include "icing/tokenization/icu/icu-language-segmenter.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/util/logging.h"
+#include "unicode/uloc.h"
 
 namespace icing {
 namespace lib {
@@ -31,7 +32,7 @@ constexpr std::string_view kLocaleAmericanEnglishComputer = "en_US_POSIX";
 //   A LanguageSegmenter on success
 //   INVALID_ARGUMENT if locale string is invalid
 //
-// TODO(samzheng): Figure out if we want to verify locale strings and notify
+// TODO(b/156383798): Figure out if we want to verify locale strings and notify
 // users. Right now illegal locale strings will be ignored by ICU. ICU
 // components will be created with its default locale.
 libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create(
@@ -46,7 +47,7 @@ libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create(
                        << " not supported. Converting to locale " << ULOC_US;
     options.locale = ULOC_US;
   }
-  return std::make_unique<IcuLanguageSegmenter>(std::move(options.locale));
+  return IcuLanguageSegmenter::Create(std::move(options.locale));
 }
 
 }  // namespace language_segmenter_factory
diff --git a/icing/tokenization/icu/icu-language-segmenter.cc b/icing/tokenization/icu/icu-language-segmenter.cc
index d43a78d..cac12f7 100644
--- a/icing/tokenization/icu/icu-language-segmenter.cc
+++ b/icing/tokenization/icu/icu-language-segmenter.cc
@@ -24,7 +24,9 @@
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/mutex.h"
 #include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/character-iterator.h"
 #include "icing/util/i18n-utils.h"
 #include "icing/util/status-macros.h"
 #include "unicode/ubrk.h"
@@ -47,9 +49,11 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   //   INTERNAL_ERROR if unable to create
   static libtextclassifier3::StatusOr<
       std::unique_ptr<LanguageSegmenter::Iterator>>
-  Create(std::string_view text, std::string_view locale) {
+  Create(const IcuLanguageSegmenter* creator, UBreakIterator* break_iterator,
+         std::string_view text, std::string_view locale) {
     std::unique_ptr<IcuLanguageSegmenterIterator> iterator(
-        new IcuLanguageSegmenterIterator(text, locale));
+        new IcuLanguageSegmenterIterator(creator, break_iterator, text,
+                                         locale));
     if (iterator->Initialize()) {
       return iterator;
     }
@@ -57,8 +61,8 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   }
 
   ~IcuLanguageSegmenterIterator() {
-    ubrk_close(break_iterator_);
-    utext_close(&u_text_);
+    utext_close(u_text_);
+    creator_.ReturnBreakIterator(break_iterator_);
   }
 
   // Advances to the next term. Returns false if it has reached the end.
@@ -82,9 +86,6 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
       return false;
     }
 
-    if (!IsValidSegment()) {
-      return Advance();
-    }
     return true;
   }
 
@@ -101,78 +102,175 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
     return text_.substr(term_start_index_, term_length);
   }
 
-  libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfter(
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTermStart()
+      override {
+    if (!offset_iterator_.MoveToUtf8(term_start_index_)) {
+      return absl_ports::AbortedError(
+          "Could not retrieve valid utf8 character!");
+    }
+    return offset_iterator_;
+  }
+
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTermEndExclusive()
+      override {
+    if (!offset_iterator_.MoveToUtf8(term_end_index_exclusive_)) {
+      return absl_ports::AbortedError(
+          "Could not retrieve valid utf8 character!");
+    }
+    return offset_iterator_;
+  }
+
+  libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfterUtf32(
       int32_t offset) override {
-    if (offset < 0 || offset >= text_.length()) {
+    if (offset < 0) {
+      // Very simple. The first term start after a negative offset is the first
+      // term. So just reset to start and Advance.
+      return ResetToStartUtf32();
+    }
+
+    // 1. Find the unicode character that contains the byte at offset.
+    if (!offset_iterator_.MoveToUtf32(offset)) {
+      // An error occurred. Mark as DONE
+      if (offset_iterator_.utf8_index() != text_.length()) {
+        // We returned false for some reason other than hitting the end. This is
+        // a real error. Just return.
+        MarkAsDone();
+        return absl_ports::AbortedError(
+            "Could not retrieve valid utf8 character!");
+      }
+    }
+    if (offset_iterator_.utf8_index() == text_.length()) {
       return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-          "Illegal offset provided! Offset %d is not within bounds of string "
-          "of length %zu",
-          offset, text_.length()));
+          "Illegal offset provided! Offset utf-32:%d, utf-8:%d is not within "
+          "bounds of string of length %zu",
+          offset_iterator_.utf32_index(), offset_iterator_.utf8_index(),
+          text_.length()));
     }
-    term_start_index_ = ubrk_following(break_iterator_, offset);
-    if (term_start_index_ == UBRK_DONE) {
+
+    // 2. We've got the unicode character containing byte offset. Now, we need
+    // to point to the segment that starts after this character.
+    int following_utf8_index =
+        ubrk_following(break_iterator_, offset_iterator_.utf8_index());
+    if (following_utf8_index == UBRK_DONE) {
       MarkAsDone();
       return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
           "No segments begin after provided offset %d.", offset));
     }
-    term_end_index_exclusive_ = ubrk_next(break_iterator_);
-    if (term_end_index_exclusive_ == UBRK_DONE) {
-      MarkAsDone();
+    term_end_index_exclusive_ = following_utf8_index;
+
+    // 3. The term_end_exclusive_ points to the start of the term that we want
+    // to return. We need to Advance so that term_start_ will now point to this
+    // term.
+    if (!Advance()) {
       return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
           "No segments begin after provided offset %d.", offset));
     }
-    if (!IsValidSegment()) {
-      if (!Advance()) {
-        return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
-            "No segments begin after provided offset %d.", offset));
-      }
+    if (!offset_iterator_.MoveToUtf8(term_start_index_)) {
+      return absl_ports::AbortedError(
+          "Could not retrieve valid utf8 character!");
     }
-    return term_start_index_;
+    return offset_iterator_.utf32_index();
   }
 
-  libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBefore(
+  libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBeforeUtf32(
       int32_t offset) override {
-    if (offset < 0 || offset >= text_.length()) {
+    if (offset < 0) {
       return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
           "Illegal offset provided! Offset %d is not within bounds of string "
           "of length %zu",
           offset, text_.length()));
     }
-    ICING_RETURN_IF_ERROR(ResetToTermStartingBefore(offset));
-    if (term_end_index_exclusive_ > offset) {
-      // This term ends after offset. So we need to get the term just before
-      // this one.
-      ICING_RETURN_IF_ERROR(ResetToTermStartingBefore(term_start_index_));
+
+    if (!offset_iterator_.MoveToUtf32(offset)) {
+      // An error occurred. Mark as DONE
+      if (offset_iterator_.utf8_index() != text_.length()) {
+        // We returned false for some reason other than hitting the end. This is
+        // a real error. Just return.
+        MarkAsDone();
+        return absl_ports::AbortedError(
+            "Could not retrieve valid utf8 character!");
+      }
+      // If it returned false because we hit the end. Then that's fine. We'll
+      // just treat it as if the request was for the end.
     }
-    return term_start_index_;
+
+    // 2. We've got the unicode character containing byte offset. Now, we need
+    // to point to the segment that ends before this character.
+    int starting_utf8_index =
+        ubrk_preceding(break_iterator_, offset_iterator_.utf8_index());
+    if (starting_utf8_index == UBRK_DONE) {
+      // Rewind the end indices.
+      MarkAsDone();
+      return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+          "No segments end before provided offset %d.", offset));
+    }
+    term_start_index_ = starting_utf8_index;
+
+    // 3. We've correctly set the start index and the iterator currently points
+    // to that position. Now we need to find the correct end position and
+    // advance the iterator to that position.
+    int ending_utf8_index = ubrk_next(break_iterator_);
+    if (ending_utf8_index == UBRK_DONE) {
+      // This shouldn't ever happen.
+      MarkAsDone();
+      return absl_ports::AbortedError(IcingStringUtil::StringPrintf(
+          "No segments end before provided offset %d.", offset));
+    }
+    term_end_index_exclusive_ = ending_utf8_index;
+
+    // 4. The start and end indices point to a segment, but we need to ensure
+    // that this segment is 1) valid and 2) ends before offset. Otherwise, we'll
+    // need a segment prior to this one.
+    CharacterIterator term_start_iterator = offset_iterator_;
+    if (!term_start_iterator.MoveToUtf8(term_start_index_)) {
+      return absl_ports::AbortedError(
+          "Could not retrieve valid utf8 character!");
+    }
+    if (term_end_index_exclusive_ > offset_iterator_.utf8_index()) {
+      return ResetToTermEndingBeforeUtf32(term_start_iterator.utf32_index());
+    }
+    return term_start_iterator.utf32_index();
   }
 
-  libtextclassifier3::StatusOr<int32_t> ResetToStart() override {
+  libtextclassifier3::StatusOr<int32_t> ResetToStartUtf32() override {
     term_start_index_ = 0;
     term_end_index_exclusive_ = 0;
     if (!Advance()) {
-      return absl_ports::NotFoundError("");
+      return absl_ports::NotFoundError(
+          "Unable to find any valid terms in text.");
+    }
+    if (!offset_iterator_.MoveToUtf8(term_start_index_)) {
+      return absl_ports::AbortedError(
+          "Could not retrieve valid utf8 character!");
     }
-    return term_start_index_;
+    return offset_iterator_.utf32_index();
   }
 
  private:
-  explicit IcuLanguageSegmenterIterator(std::string_view text,
+  explicit IcuLanguageSegmenterIterator(const IcuLanguageSegmenter* creator,
+                                        UBreakIterator* break_iterator,
+                                        std::string_view text,
                                         std::string_view locale)
-      : break_iterator_(nullptr),
+      : creator_(*creator),
+        break_iterator_(break_iterator),
         text_(text),
         locale_(locale),
-        u_text_(UTEXT_INITIALIZER),
+        u_text_(nullptr),
+        offset_iterator_(text),
         term_start_index_(0),
         term_end_index_exclusive_(0) {}
 
   // Returns true on success
   bool Initialize() {
+    if (break_iterator_ == nullptr) {
+      return false;
+    }
     UErrorCode status = U_ZERO_ERROR;
-    utext_openUTF8(&u_text_, text_.data(), /*length=*/-1, &status);
-    break_iterator_ = ubrk_open(UBRK_WORD, locale_.data(), /*text=*/nullptr,
-                                /*textLength=*/0, &status);
-    ubrk_setUText(break_iterator_, &u_text_, &status);
+    u_text_ = utext_openUTF8(nullptr, text_.data(), text_.length(), &status);
+    if (u_text_ == nullptr) {
+      return false;
+    }
+    ubrk_setUText(break_iterator_, u_text_, &status);
     return !U_FAILURE(status);
   }
 
@@ -199,26 +297,11 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
     term_start_index_ = 0;
   }
 
-  bool IsValidSegment() const {
-    // Rule 1: all ASCII terms will be returned.
-    // We know it's a ASCII term by checking the first char.
-    if (i18n_utils::IsAscii(text_[term_start_index_])) {
-      return true;
-    }
-
-    UChar32 uchar32 = i18n_utils::GetUChar32At(text_.data(), text_.length(),
-                                               term_start_index_);
-    // Rule 2: for non-ASCII terms, only the alphabetic terms are returned.
-    // We know it's an alphabetic term by checking the first unicode character.
-    if (u_isUAlphabetic(uchar32)) {
-      return true;
-    }
-    return false;
-  }
+  const IcuLanguageSegmenter& creator_;  // Does not own.
 
   // The underlying class that does the segmentation, ubrk_close() must be
   // called after using.
-  UBreakIterator* break_iterator_;
+  UBreakIterator* break_iterator_;  // Does not own
 
   // Text to be segmented
   std::string_view text_;
@@ -229,8 +312,17 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   std::string_view locale_;
 
   // A thin wrapper around the input UTF8 text, needed by break_iterator_.
-  // utext_close() must be called after using.
-  UText u_text_;
+  // Allocated by calling utext_openUtf8() and freed by calling utext_close().
+  UText* u_text_;
+
+  // Offset iterator. This iterator is not guaranteed to point to any particular
+  // character, but is guaranteed to point to a valid UTF character sequence.
+  //
+  // This iterator is used to save some amount of linear traversal when seeking
+  // to a specific UTF-32 offset. Each function that uses it could just create
+  // a CharacterIterator starting at the beginning of the text and traverse
+  // forward from there.
+  CharacterIterator offset_iterator_;
 
   // The start and end indices are used to track the positions of current
   // term.
@@ -238,18 +330,61 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   int term_end_index_exclusive_;
 };
 
-IcuLanguageSegmenter::IcuLanguageSegmenter(std::string locale)
-    : locale_(std::move(locale)) {}
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IcuLanguageSegmenter>>
+IcuLanguageSegmenter::Create(std::string&& locale) {
+  UErrorCode status = U_ZERO_ERROR;
+  UBreakIterator* break_iterator = ubrk_open(
+      UBRK_WORD, locale.c_str(), /*text=*/nullptr, /*textLength=*/0, &status);
+  if (U_FAILURE(status) || break_iterator == nullptr) {
+    return absl_ports::AbortedError(
+        "Unable to create ICU break_iterator for language segmentation");
+  }
+  return std::unique_ptr<IcuLanguageSegmenter>(
+      new IcuLanguageSegmenter(std::move(locale), break_iterator));
+}
+
+UBreakIterator* IcuLanguageSegmenter::ProduceBreakIterator() const {
+  UBreakIterator* itr = nullptr;
+  {
+    absl_ports::unique_lock l(&mutex_);
+    if (cached_break_iterator_ != nullptr) {
+      itr = cached_break_iterator_;
+      cached_break_iterator_ = nullptr;
+    }
+  }
+  if (itr == nullptr) {
+    UErrorCode status = U_ZERO_ERROR;
+    itr = ubrk_open(UBRK_WORD, locale_.c_str(), /*text=*/nullptr,
+                    /*textLength=*/0, &status);
+    if (U_FAILURE(status)) {
+      itr = nullptr;
+    }
+  }
+  return itr;
+}
+
+void IcuLanguageSegmenter::ReturnBreakIterator(UBreakIterator* itr) const {
+  {
+    absl_ports::unique_lock l(&mutex_);
+    if (cached_break_iterator_ == nullptr) {
+      cached_break_iterator_ = itr;
+      return;
+    }
+  }
+  ubrk_close(itr);
+}
 
 libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
 IcuLanguageSegmenter::Segment(const std::string_view text) const {
-  return IcuLanguageSegmenterIterator::Create(text, locale_);
+  return IcuLanguageSegmenterIterator::Create(this, ProduceBreakIterator(),
+                                              text, locale_);
 }
 
 libtextclassifier3::StatusOr<std::vector<std::string_view>>
 IcuLanguageSegmenter::GetAllTerms(const std::string_view text) const {
-  ICING_ASSIGN_OR_RETURN(std::unique_ptr<LanguageSegmenter::Iterator> iterator,
-                         Segment(text));
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<LanguageSegmenter::Iterator> iterator,
+      Segment(text));
   std::vector<std::string_view> terms;
   while (iterator->Advance()) {
     terms.push_back(iterator->GetTerm());
diff --git a/icing/tokenization/icu/icu-language-segmenter.h b/icing/tokenization/icu/icu-language-segmenter.h
index 4115461..44de5a2 100644
--- a/icing/tokenization/icu/icu-language-segmenter.h
+++ b/icing/tokenization/icu/icu-language-segmenter.h
@@ -22,7 +22,9 @@
 #include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/mutex.h"
 #include "icing/tokenization/language-segmenter.h"
+#include "unicode/ubrk.h"
 
 namespace icing {
 namespace lib {
@@ -41,7 +43,14 @@ namespace lib {
 // class. Other special tokenization logic will be in each tokenizer.
 class IcuLanguageSegmenter : public LanguageSegmenter {
  public:
-  explicit IcuLanguageSegmenter(std::string locale);
+  static libtextclassifier3::StatusOr<std::unique_ptr<IcuLanguageSegmenter>>
+  Create(std::string&& locale);
+
+  ~IcuLanguageSegmenter() override {
+    if (cached_break_iterator_ != nullptr) {
+      ubrk_close(cached_break_iterator_);
+    }
+  }
 
   IcuLanguageSegmenter(const IcuLanguageSegmenter&) = delete;
   IcuLanguageSegmenter& operator=(const IcuLanguageSegmenter&) = delete;
@@ -69,8 +78,32 @@ class IcuLanguageSegmenter : public LanguageSegmenter {
       std::string_view text) const override;
 
  private:
+  // Declared a friend so that it can call AcceptBreakIterator.
+  friend class IcuLanguageSegmenterIterator;
+
+  explicit IcuLanguageSegmenter(std::string&& locale, UBreakIterator* iterator)
+      : locale_(std::move(locale)), cached_break_iterator_(iterator) {}
+
+  // Returns a UBreakIterator that the caller owns.
+  // If cached_break_iterator_ is non-null, transfers ownership to caller and
+  // sets cached_break_iterator_ to null.
+  // If cached_break_iterator is null, creates a new UBreakIterator and
+  // transfers ownership to caller.
+  UBreakIterator* ProduceBreakIterator() const;
+
+  // Caller transfers ownership of itr to IcuLanguageSegmenter.
+  // If cached_break_iterator_ is null, itr becomes the cached_break_iterator_
+  // If cached_break_iterator_ is non-null, then itr will be closed.
+  void ReturnBreakIterator(UBreakIterator* itr) const;
+
   // Used to help segment text
   const std::string locale_;
+
+  // The underlying class that does the segmentation, ubrk_close() must be
+  // called after using.
+  mutable UBreakIterator* cached_break_iterator_ ICING_GUARDED_BY(mutex_);
+
+  mutable absl_ports::shared_mutex mutex_;
 };
 
 }  // namespace lib
diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc
index 31c2726..a7f7419 100644
--- a/icing/tokenization/icu/icu-language-segmenter_test.cc
+++ b/icing/tokenization/icu/icu-language-segmenter_test.cc
@@ -12,24 +12,40 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <memory>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/portable/platform.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/icu-i18n-test-utils.h"
+#include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
+#include "icing/util/character-iterator.h"
 #include "unicode/uloc.h"
 
 namespace icing {
 namespace lib {
-namespace {
+
 using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::IsEmpty;
 
+namespace {
+
+language_segmenter_factory::SegmenterOptions GetSegmenterOptions(
+    const std::string& locale, const JniCache* jni_cache) {
+  return language_segmenter_factory::SegmenterOptions(locale, jni_cache);
+}
+
 // Returns a vector containing all terms retrieved by Advancing on the iterator.
 std::vector<std::string_view> GetAllTermsAdvance(
     LanguageSegmenter::Iterator* itr) {
@@ -40,70 +56,61 @@ std::vector<std::string_view> GetAllTermsAdvance(
   return terms;
 }
 
-// Returns a vector containing all terms retrieved by calling
-// ResetToStart/ResetAfter with the current position to simulate Advancing on
-// the iterator.
-std::vector<std::string_view> GetAllTermsResetAfter(
+// Returns a vector containing all terms retrieved by calling ResetAfter with
+// the UTF-32 position of the current term start to simulate Advancing on the
+// iterator.
+std::vector<std::string_view> GetAllTermsResetAfterUtf32(
     LanguageSegmenter::Iterator* itr) {
   std::vector<std::string_view> terms;
-  if (!itr->ResetToStart().ok()) {
-    return terms;
-  }
-  terms.push_back(itr->GetTerm());
-  const char* text_begin = itr->GetTerm().data();
-  // Calling ResetToTermStartingAfter with the current position should get the
-  // very next term in the sequence.
-  for (int current_pos = 0; itr->ResetToTermStartingAfter(current_pos).ok();
-       current_pos = itr->GetTerm().data() - text_begin) {
+  // Calling ResetToTermStartingAfterUtf32 with -1 should get the first term in
+  // the sequence.
+  bool is_ok = itr->ResetToTermStartingAfterUtf32(-1).ok();
+  while (is_ok) {
     terms.push_back(itr->GetTerm());
+    // Calling ResetToTermStartingAfterUtf32 with the current position should
+    // get the very next term in the sequence.
+    CharacterIterator char_itr = itr->CalculateTermStart().ValueOrDie();
+    is_ok = itr->ResetToTermStartingAfterUtf32(char_itr.utf32_index()).ok();
   }
   return terms;
 }
 
 // Returns a vector containing all terms retrieved by alternating calls to
-// Advance and calls to ResetAfter with the current position to simulate
-// Advancing.
-std::vector<std::string_view> GetAllTermsAdvanceAndResetAfter(
+// Advance and calls to ResetAfter with the UTF-32 position of the current term
+// start to simulate Advancing.
+std::vector<std::string_view> GetAllTermsAdvanceAndResetAfterUtf32(
     LanguageSegmenter::Iterator* itr) {
-  const char* text_begin = itr->GetTerm().data();
   std::vector<std::string_view> terms;
-
-  bool is_ok = true;
-  int current_pos = 0;
+  bool is_ok = itr->Advance();
   while (is_ok) {
+    terms.push_back(itr->GetTerm());
     // Alternate between using Advance and ResetToTermAfter.
     if (terms.size() % 2 == 0) {
       is_ok = itr->Advance();
     } else {
-      // Calling ResetToTermStartingAfter with the current position should get
-      // the very next term in the sequence.
-      current_pos = itr->GetTerm().data() - text_begin;
-      is_ok = itr->ResetToTermStartingAfter(current_pos).ok();
-    }
-    if (is_ok) {
-      terms.push_back(itr->GetTerm());
+      // Calling ResetToTermStartingAfterUtf32 with the current position should
+      // get the very next term in the sequence.
+      CharacterIterator char_itr = itr->CalculateTermStart().ValueOrDie();
+      is_ok = itr->ResetToTermStartingAfterUtf32(char_itr.utf32_index()).ok();
     }
   }
   return terms;
 }
 
 // Returns a vector containing all terms retrieved by calling ResetBefore with
-// the current position, starting at the end of the text. This vector should be
-// in reverse order of GetAllTerms and missing the last term.
-std::vector<std::string_view> GetAllTermsResetBefore(
+// the UTF-32 position of the current term start, starting at the end of the
+// text. This vector should be in reverse order of GetAllTerms and missing the
+// last term.
+std::vector<std::string_view> GetAllTermsResetBeforeUtf32(
     LanguageSegmenter::Iterator* itr) {
-  const char* text_begin = itr->GetTerm().data();
-  int last_pos = 0;
-  while (itr->Advance()) {
-    last_pos = itr->GetTerm().data() - text_begin;
-  }
   std::vector<std::string_view> terms;
-  // Calling ResetToTermEndingBefore with the current position should get the
-  // previous term in the sequence.
-  for (int current_pos = last_pos;
-       itr->ResetToTermEndingBefore(current_pos).ok();
-       current_pos = itr->GetTerm().data() - text_begin) {
+  bool is_ok = itr->ResetToTermEndingBeforeUtf32(1000).ok();
+  while (is_ok) {
     terms.push_back(itr->GetTerm());
+    // Calling ResetToTermEndingBeforeUtf32 with the current position should get
+    // the previous term in the sequence.
+    CharacterIterator char_itr = itr->CalculateTermStart().ValueOrDie();
+    is_ok = itr->ResetToTermEndingBeforeUtf32(char_itr.utf32_index()).ok();
   }
   return terms;
 }
@@ -112,6 +119,9 @@ class IcuLanguageSegmenterAllLocalesTest
     : public testing::TestWithParam<const char*> {
  protected:
   void SetUp() override {
+    if (!IsIcuTokenization()) {
+      GTEST_SKIP() << "ICU tokenization not enabled!";
+    }
     ICING_ASSERT_OK(
         // File generated via icu_data_file rule in //icing/BUILD.
         icu_data_file_helper::SetUpICUDataFile(
@@ -119,27 +129,34 @@ class IcuLanguageSegmenterAllLocalesTest
   }
 
   static std::string GetLocale() { return GetParam(); }
-  static language_segmenter_factory::SegmenterOptions GetOptions() {
-    return language_segmenter_factory::SegmenterOptions(GetLocale());
-  }
+
+  std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
 };
 
+}  // namespace
+
 TEST_P(IcuLanguageSegmenterAllLocalesTest, EmptyText) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   EXPECT_THAT(language_segmenter->GetAllTerms(""), IsOkAndHolds(IsEmpty()));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, SimpleText) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   EXPECT_THAT(language_segmenter->GetAllTerms("Hello World"),
               IsOkAndHolds(ElementsAre("Hello", " ", "World")));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, ASCII_Punctuation) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // ASCII punctuation marks are kept
   EXPECT_THAT(
       language_segmenter->GetAllTerms("Hello, World!!!"),
@@ -153,8 +170,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ASCII_Punctuation) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, ASCII_SpecialCharacter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // ASCII special characters are kept
   EXPECT_THAT(language_segmenter->GetAllTerms("Pay $1000"),
               IsOkAndHolds(ElementsAre("Pay", " ", "$", "1000")));
@@ -169,19 +188,23 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ASCII_SpecialCharacter) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Non_ASCII_Non_Alphabetic) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Full-width (non-ASCII) punctuation marks and special characters are left
   // out.
   EXPECT_THAT(language_segmenter->GetAllTerms("。？·Hello！×"),
-              IsOkAndHolds(ElementsAre("Hello")));
+              IsOkAndHolds(ElementsAre("。", "？", "·", "Hello", "！", "×")));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Acronym) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
-  EXPECT_THAT(language_segmenter->GetAllTerms("U.S. Bank"),
-              IsOkAndHolds(ElementsAre("U.S", ".", " ", "Bank")));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  EXPECT_THAT(language_segmenter->GetAllTerms("U.S.𡔖 Bank"),
+              IsOkAndHolds(ElementsAre("U.S", ".", "𡔖", " ", "Bank")));
   EXPECT_THAT(language_segmenter->GetAllTerms("I.B.M."),
               IsOkAndHolds(ElementsAre("I.B.M", ".")));
   EXPECT_THAT(language_segmenter->GetAllTerms("I,B,M"),
@@ -191,8 +214,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Acronym) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, WordConnector) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // According to unicode word break rules
   // WB6(https://unicode.org/reports/tr29/#WB6),
   // WB7(https://unicode.org/reports/tr29/#WB7), and a few others, some
@@ -202,16 +227,42 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, WordConnector) {
   // Word connecters
   EXPECT_THAT(language_segmenter->GetAllTerms("com.google.android"),
               IsOkAndHolds(ElementsAre("com.google.android")));
-  EXPECT_THAT(language_segmenter->GetAllTerms("com:google:android"),
-              IsOkAndHolds(ElementsAre("com:google:android")));
   EXPECT_THAT(language_segmenter->GetAllTerms("com'google'android"),
               IsOkAndHolds(ElementsAre("com'google'android")));
   EXPECT_THAT(language_segmenter->GetAllTerms("com_google_android"),
               IsOkAndHolds(ElementsAre("com_google_android")));
 
   // Word connecters can be mixed
-  EXPECT_THAT(language_segmenter->GetAllTerms("com.google.android:icing"),
-              IsOkAndHolds(ElementsAre("com.google.android:icing")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("com.google.android_icing"),
+              IsOkAndHolds(ElementsAre("com.google.android_icing")));
+
+  // Connectors that don't have valid terms on both sides of it are not
+  // considered connectors.
+  EXPECT_THAT(language_segmenter->GetAllTerms("'bar'baz"),
+              IsOkAndHolds(ElementsAre("'", "bar'baz")));
+
+  EXPECT_THAT(language_segmenter->GetAllTerms("bar.baz."),
+              IsOkAndHolds(ElementsAre("bar.baz", ".")));
+
+  // Connectors that don't have valid terms on both sides of it are not
+  // considered connectors.
+  EXPECT_THAT(language_segmenter->GetAllTerms(" .bar.baz"),
+              IsOkAndHolds(ElementsAre(" ", ".", "bar.baz")));
+
+  EXPECT_THAT(language_segmenter->GetAllTerms("bar'baz' "),
+              IsOkAndHolds(ElementsAre("bar'baz", "'", " ")));
+
+  // Connectors don't connect if one side is an invalid term (？)
+  EXPECT_THAT(language_segmenter->GetAllTerms("bar.baz.？"),
+              IsOkAndHolds(ElementsAre("bar.baz", ".", "？")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("？'bar'baz"),
+              IsOkAndHolds(ElementsAre("？", "'", "bar'baz")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("私'は"),
+              IsOkAndHolds(ElementsAre("私", "'", "は")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("我.每"),
+              IsOkAndHolds(ElementsAre("我", ".", "每")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("เดิน'ไป"),
+              IsOkAndHolds(ElementsAre("เดิน'ไป")));
 
   // Any heading and trailing characters are not connecters
   EXPECT_THAT(language_segmenter->GetAllTerms(".com.google.android."),
@@ -226,8 +277,6 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, WordConnector) {
               IsOkAndHolds(ElementsAre("com", "+", "google", "+", "android")));
   EXPECT_THAT(language_segmenter->GetAllTerms("com*google*android"),
               IsOkAndHolds(ElementsAre("com", "*", "google", "*", "android")));
-  EXPECT_THAT(language_segmenter->GetAllTerms("com@google@android"),
-              IsOkAndHolds(ElementsAre("com", "@", "google", "@", "android")));
   EXPECT_THAT(language_segmenter->GetAllTerms("com^google^android"),
               IsOkAndHolds(ElementsAre("com", "^", "google", "^", "android")));
   EXPECT_THAT(language_segmenter->GetAllTerms("com&google&android"),
@@ -241,11 +290,43 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, WordConnector) {
   EXPECT_THAT(
       language_segmenter->GetAllTerms("com\"google\"android"),
       IsOkAndHolds(ElementsAre("com", "\"", "google", "\"", "android")));
+
+  // In ICU 72, there were a few changes:
+  //   1. ':' stopped being a word connector
+  //   2. '@' became a word connector
+  //   3. <numeric><word-connector><numeric> such as "3'14" is now considered as
+  //      a single token.
+  if (GetIcuTokenizationVersion() >= 72) {
+    EXPECT_THAT(
+        language_segmenter->GetAllTerms("com:google:android"),
+        IsOkAndHolds(ElementsAre("com", ":", "google", ":", "android")));
+    // In ICU 74, the rules for '@' were reverted.
+    if (GetIcuTokenizationVersion() >= 74) {
+      EXPECT_THAT(
+          language_segmenter->GetAllTerms("com@google@android"),
+          IsOkAndHolds(ElementsAre("com", "@", "google", "@", "android")));
+    } else {
+      EXPECT_THAT(language_segmenter->GetAllTerms("com@google@android"),
+                  IsOkAndHolds(ElementsAre("com@google@android")));
+    }
+    EXPECT_THAT(language_segmenter->GetAllTerms("3'14"),
+                IsOkAndHolds(ElementsAre("3'14")));
+  } else {
+    EXPECT_THAT(language_segmenter->GetAllTerms("com:google:android"),
+                IsOkAndHolds(ElementsAre("com:google:android")));
+    EXPECT_THAT(
+        language_segmenter->GetAllTerms("com@google@android"),
+        IsOkAndHolds(ElementsAre("com", "@", "google", "@", "android")));
+    EXPECT_THAT(language_segmenter->GetAllTerms("3'14"),
+                IsOkAndHolds(ElementsAre("3", "'", "14")));
+  }
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Apostrophes) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   EXPECT_THAT(language_segmenter->GetAllTerms("It's ok."),
               IsOkAndHolds(ElementsAre("It's", " ", "ok", ".")));
   EXPECT_THAT(language_segmenter->GetAllTerms("He'll be back."),
@@ -265,8 +346,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Apostrophes) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Parentheses) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
 
   EXPECT_THAT(language_segmenter->GetAllTerms("(Hello)"),
               IsOkAndHolds(ElementsAre("(", "Hello", ")")));
@@ -276,8 +359,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Parentheses) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Quotes) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
 
   EXPECT_THAT(language_segmenter->GetAllTerms("\"Hello\""),
               IsOkAndHolds(ElementsAre("\"", "Hello", "\"")));
@@ -287,8 +372,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Quotes) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Alphanumeric) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
 
   // Alphanumeric terms are allowed
   EXPECT_THAT(language_segmenter->GetAllTerms("Se7en A4 3a"),
@@ -296,8 +383,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Alphanumeric) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, Number) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
 
   // Alphanumeric terms are allowed
   EXPECT_THAT(
@@ -311,9 +400,20 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, Number) {
               IsOkAndHolds(ElementsAre("-", "123")));
 }
 
+TEST_P(IcuLanguageSegmenterAllLocalesTest, FullWidthNumbers) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  EXPECT_THAT(language_segmenter->GetAllTerms("０１２３４５６７８９"),
+              IsOkAndHolds(ElementsAre("０１２３４５６７８９")));
+}
+
 TEST_P(IcuLanguageSegmenterAllLocalesTest, ContinuousWhitespaces) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Multiple continuous whitespaces are treated as one.
   const int kNumSeparators = 256;
   std::string text_with_spaces =
@@ -337,21 +437,24 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ContinuousWhitespaces) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, CJKT) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that don't
   // have whitespaces as word delimiter.
 
   // Chinese
-  EXPECT_THAT(language_segmenter->GetAllTerms("我每天走路去上班。"),
-              IsOkAndHolds(ElementsAre("我", "每天", "走路", "去", "上班")));
+  EXPECT_THAT(
+      language_segmenter->GetAllTerms("我每天走路去上班。"),
+      IsOkAndHolds(ElementsAre("我", "每天", "走路", "去", "上班", "。")));
   // Japanese
   EXPECT_THAT(language_segmenter->GetAllTerms("私は毎日仕事に歩いています。"),
               IsOkAndHolds(ElementsAre("私", "は", "毎日", "仕事", "に", "歩",
-                                       "い", "てい", "ます")));
+                                       "い", "てい", "ます", "。")));
   // Khmer
   EXPECT_THAT(language_segmenter->GetAllTerms("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
-              IsOkAndHolds(ElementsAre("ញុំ", "ដើរទៅ", "ធ្វើការ", "រាល់ថ្ងៃ")));
+              IsOkAndHolds(ElementsAre("ញុំ", "ដើរទៅ", "ធ្វើការ", "រាល់ថ្ងៃ", "។")));
   // Thai
   EXPECT_THAT(
       language_segmenter->GetAllTerms("ฉันเดินไปทำงานทุกวัน"),
@@ -359,16 +462,19 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, CJKT) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, LatinLettersWithAccents) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   EXPECT_THAT(language_segmenter->GetAllTerms("āăąḃḅḇčćç"),
               IsOkAndHolds(ElementsAre("āăąḃḅḇčćç")));
 }
 
-// TODO(samzheng): test cases for more languages (e.g. top 20 in the world)
 TEST_P(IcuLanguageSegmenterAllLocalesTest, WhitespaceSplitLanguages) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Turkish
   EXPECT_THAT(language_segmenter->GetAllTerms("merhaba dünya"),
               IsOkAndHolds(ElementsAre("merhaba", " ", "dünya")));
@@ -378,10 +484,11 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, WhitespaceSplitLanguages) {
       IsOkAndHolds(ElementsAre("나는", " ", "매일", " ", "출근합니다", ".")));
 }
 
-// TODO(samzheng): more mixed languages test cases
 TEST_P(IcuLanguageSegmenterAllLocalesTest, MixedLanguages) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   EXPECT_THAT(language_segmenter->GetAllTerms("How are you你好吗お元気ですか"),
               IsOkAndHolds(ElementsAre("How", " ", "are", " ", "you", "你好",
                                        "吗", "お", "元気", "です", "か")));
@@ -392,8 +499,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, MixedLanguages) {
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest, NotCopyStrings) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Validates that the input strings are not copied
   const std::string text = "Hello World";
   const char* word1_address = text.c_str();
@@ -409,26 +518,141 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, NotCopyStrings) {
   EXPECT_THAT(word2_address, Eq(word2_result_address));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterOutOfBounds) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToStartUtf32WordConnector) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "com.google.android is package";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:      "com.google.android is package"
+  //               ^                 ^^ ^^
+  // UTF-8 idx:    0              18 19 21 22
+  // UTF-32 idx:   0              18 19 21 22
+  auto position_or = itr->ResetToStartUtf32();
+  EXPECT_THAT(position_or, IsOk());
+  ASSERT_THAT(itr->GetTerm(), Eq("com.google.android"));
+}
+
+TEST_P(IcuLanguageSegmenterAllLocalesTest, NewIteratorResetToStartUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "How are you你好吗お元気ですか";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
+}
+
+TEST_P(IcuLanguageSegmenterAllLocalesTest,
+       IteratorOneAdvanceResetToStartUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "How are you你好吗お元気ですか";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  ASSERT_TRUE(itr->Advance());  // itr points to 'How'
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
+}
+
+TEST_P(IcuLanguageSegmenterAllLocalesTest,
+       IteratorMultipleAdvancesResetToStartUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  ASSERT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(8)));
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  ASSERT_TRUE(itr->Advance());
+  ASSERT_TRUE(itr->Advance());
+  ASSERT_TRUE(itr->Advance());
+  ASSERT_TRUE(itr->Advance());  // itr points to ' '
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
+}
+
+TEST_P(IcuLanguageSegmenterAllLocalesTest, IteratorDoneResetToStartUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "How are you你好吗お元気ですか";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  while (itr->Advance()) {
+    // Do nothing.
+  }
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
+}
+
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterUtf32WordConnector) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "package com.google.android name";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:     "package com.google.android name"
+  //              ^      ^^                 ^^
+  // UTF-8 idx:   0      7 8               26 27
+  // UTF-32 idx:  0      7 8               26 27
+  auto position_or = itr->ResetToTermStartingAfterUtf32(8);
+  EXPECT_THAT(position_or, IsOk());
+  EXPECT_THAT(position_or.ValueOrDie(), Eq(26));
+  ASSERT_THAT(itr->GetTerm(), Eq(" "));
+
+  position_or = itr->ResetToTermStartingAfterUtf32(7);
+  EXPECT_THAT(position_or, IsOk());
+  EXPECT_THAT(position_or.ValueOrDie(), Eq(8));
+  ASSERT_THAT(itr->GetTerm(), Eq("com.google.android"));
+}
+
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterUtf32OutOfBounds) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "How are you你好吗お元気ですか";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  ASSERT_THAT(itr->ResetToTermStartingAfterUtf32(7), IsOkAndHolds(Eq(8)));
   ASSERT_THAT(itr->GetTerm(), Eq("you"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(-1),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(itr->GetTerm(), Eq("you"));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(-1), IsOk());
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(kText.length()),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(21),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(itr->GetTerm(), Eq("you"));
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
 }
 
 // Tests that ResetToTermAfter and Advance produce the same output. With the
@@ -437,9 +661,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterOutOfBounds) {
 // terms produced by ResetToTermAfter calls with the current position
 // provided as the argument.
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       MixedLanguagesResetToTermAfterEquivalentToAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       MixedLanguagesResetToTermAfterUtf32EquivalentToAdvance) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
@@ -451,16 +676,17 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetAfter(reset_to_term_itr.get());
+      GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       ThaiResetToTermAfterEquivalentToAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       ThaiResetToTermAfterUtf32EquivalentToAdvance) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
@@ -472,16 +698,17 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetAfter(reset_to_term_itr.get());
+      GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       KoreanResetToTermAfterEquivalentToAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       KoreanResetToTermAfterUtf32EquivalentToAdvance) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kKorean = "나는 매일 출근합니다.";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
@@ -493,7 +720,7 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetAfter(reset_to_term_itr.get());
+      GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
@@ -504,9 +731,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
 // should be able to mix ResetToTermAfter(current_position) calls and Advance
 // calls to mimic calling Advance.
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       MixedLanguagesResetToTermAfterInteroperableWithAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       MixedLanguagesResetToTermAfterUtf32InteroperableWithAdvance) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
@@ -518,7 +746,7 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
       std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> advance_and_reset_terms =
-      GetAllTermsAdvanceAndResetAfter(advance_and_reset_itr.get());
+      GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
 
   EXPECT_THAT(advance_and_reset_terms,
               testing::ElementsAreArray(advance_terms));
@@ -526,9 +754,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       ThaiResetToTermAfterInteroperableWithAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       ThaiResetToTermAfterUtf32InteroperableWithAdvance) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
@@ -540,7 +769,7 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
       std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> advance_and_reset_terms =
-      GetAllTermsAdvanceAndResetAfter(advance_and_reset_itr.get());
+      GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
 
   EXPECT_THAT(advance_and_reset_terms,
               testing::ElementsAreArray(advance_terms));
@@ -548,9 +777,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       KoreanResetToTermAfterInteroperableWithAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       KoreanResetToTermAfterUtf32InteroperableWithAdvance) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kKorean = "나는 매일 출근합니다.";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
@@ -562,190 +792,243 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
       std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> advance_and_reset_terms =
-      GetAllTermsAdvanceAndResetAfter(advance_and_reset_itr.get());
+      GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
 
   EXPECT_THAT(advance_and_reset_terms,
               testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(advance_and_reset_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, MixedLanguagesResetToTermAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest,
+       MixedLanguagesResetToTermAfterUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> itr,
       language_segmenter->Segment("How are you你好吗お元気ですか"));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  EXPECT_THAT(itr->ResetToTermStartingAfter(2), IsOkAndHolds(Eq(3)));
+  // String:      "How are you你好吗お元気ですか"
+  //               ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:    0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:   0  3 4 7 8 11 131415 17 19
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(10), IsOkAndHolds(Eq(11)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(10), IsOkAndHolds(Eq(11)));
   EXPECT_THAT(itr->GetTerm(), Eq("你好"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(8)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(7), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("you"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(32), IsOkAndHolds(Eq(35)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(18), IsOkAndHolds(Eq(19)));
   EXPECT_THAT(itr->GetTerm(), Eq("か"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(14), IsOkAndHolds(Eq(17)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(12), IsOkAndHolds(Eq(13)));
   EXPECT_THAT(itr->GetTerm(), Eq("吗"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(3)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(35),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(19),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       ContinuousWhitespacesResetToTermAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       ContinuousWhitespacesResetToTermAfterUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Multiple continuous whitespaces are treated as one.
   constexpr std::string_view kTextWithSpace = "Hello          World";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kTextWithSpace));
 
-  // String: "Hello          World"
-  //          ^    ^         ^
-  // Bytes:   0    5         15
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(5)));
+  // String:      "Hello          World"
+  //               ^    ^         ^
+  // UTF-8 idx:    0    5         15
+  // UTF-32 idx:   0    5         15
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(2), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(10), IsOkAndHolds(Eq(15)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(10), IsOkAndHolds(Eq(15)));
   EXPECT_THAT(itr->GetTerm(), Eq("World"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(5), IsOkAndHolds(Eq(15)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(5), IsOkAndHolds(Eq(15)));
   EXPECT_THAT(itr->GetTerm(), Eq("World"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(15),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(15),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(17),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(17),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(19),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(19),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermAfterUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that
   // don't have whitespaces as word delimiter. Chinese
   constexpr std::string_view kChinese = "我每天走路去上班。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kChinese));
-  // String: "我每天走路去上班。"
-  //          ^ ^  ^   ^^
-  // Bytes:   0 3  9  15 18
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(3)));
+  // String:       "我每天走路去上班。"
+  //                ^ ^  ^   ^^   ^
+  // UTF-8 idx:     0 3  9  15 18 24
+  // UTF-832 idx:   0 1  3   5 6  8
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("每天"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(9)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("走路"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(19),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(7), IsOkAndHolds(Eq(8)));
+  EXPECT_THAT(itr->GetTerm(), Eq("。"));
+
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(8),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermAfterUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Japanese
   constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kJapanese));
-  // String: "私は毎日仕事に歩いています。"
-  //          ^ ^ ^  ^  ^ ^ ^ ^  ^
-  // Bytes:   0 3 6  12 18212427 33
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(3)));
+  // String:       "私は毎日仕事に歩いています。"
+  //                ^ ^ ^  ^  ^ ^ ^ ^  ^  ^
+  // UTF-8 idx:     0 3 6  12 18212427 33 39
+  // UTF-32 idx:    0 1 2  4  6 7 8 9  11 13
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("は"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(33),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(13),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(12)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(3), IsOkAndHolds(Eq(4)));
   EXPECT_THAT(itr->GetTerm(), Eq("仕事"));
+
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(12), IsOkAndHolds(Eq(13)));
+  EXPECT_THAT(itr->GetTerm(), Eq("。"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermAfterUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kKhmer));
-  // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
-  //          ^ ^   ^   ^
-  // Bytes:   0 9   24  45
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(9)));
+  // String:            "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
+  //                     ^ ^   ^   ^  ^
+  // UTF-8 idx:          0 9   24  45 69
+  // UTF-32 idx:         0 3   8   15 23
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("ដើរទៅ"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(47),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(15), IsOkAndHolds(Eq(23)));
+  EXPECT_THAT(itr->GetTerm(), Eq("។"));
+
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(23),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(14), IsOkAndHolds(Eq(24)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(6), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("ធ្វើការ"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermAfterUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Thai
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kThai));
-  // String: "ฉันเดินไปทำงานทุกวัน"
-  //          ^ ^  ^ ^    ^ ^
-  // Bytes:   0 9 21 27  42 51
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(9)));
+  // String:      "ฉันเดินไปทำงานทุกวัน"
+  //               ^ ^  ^ ^    ^ ^
+  // UTF-8 idx:    0 9 21 27  42 51
+  // UTF-32 idx:   0 3  7 9   14 17
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("เดิน"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(51),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(17),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(13), IsOkAndHolds(Eq(21)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(6), IsOkAndHolds(Eq(7)));
   EXPECT_THAT(itr->GetTerm(), Eq("ไป"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(34), IsOkAndHolds(Eq(42)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(12), IsOkAndHolds(Eq(14)));
   EXPECT_THAT(itr->GetTerm(), Eq("ทุก"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermBeforeOutOfBounds) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest,
+       ResetToTermBeforeWordConnectorUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "package name com.google.android!";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:      "package name com.google.android!"
+  //               ^      ^^   ^^                 ^
+  // UTF-8 idx:    0      7 8 12 13               31
+  // UTF-32 idx:   0      7 8 12 13               31
+  auto position_or = itr->ResetToTermEndingBeforeUtf32(31);
+  EXPECT_THAT(position_or, IsOk());
+  EXPECT_THAT(position_or.ValueOrDie(), Eq(13));
+  ASSERT_THAT(itr->GetTerm(), Eq("com.google.android"));
+
+  position_or = itr->ResetToTermEndingBeforeUtf32(21);
+  EXPECT_THAT(position_or, IsOk());
+  EXPECT_THAT(position_or.ValueOrDie(), Eq(12));
+  ASSERT_THAT(itr->GetTerm(), Eq(" "));
+}
+
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermBeforeOutOfBoundsUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  ASSERT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(4)));
+  // String:      "How are you你好吗お元気ですか"
+  //               ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:    0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:   0  3 4 7 8 11 131415 17 19
+  ASSERT_THAT(itr->ResetToTermEndingBeforeUtf32(7), IsOkAndHolds(Eq(4)));
   ASSERT_THAT(itr->GetTerm(), Eq("are"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(-1),
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(-1),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
   EXPECT_THAT(itr->GetTerm(), Eq("are"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(kText.length()),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(itr->GetTerm(), Eq("are"));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(29), IsOk());
+  EXPECT_THAT(itr->GetTerm(), Eq("か"));
 }
 
 // Tests that ResetToTermBefore and Advance produce the same output. With the
@@ -754,26 +1037,22 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermBeforeOutOfBounds) {
 // terms produced by ResetToTermBefore calls with the current position
 // provided as the argument (after their order has been reversed).
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       MixedLanguagesResetToTermBeforeEquivalentToAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       MixedLanguagesResetToTermBeforeEquivalentToAdvanceUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> advance_terms =
       GetAllTermsAdvance(advance_itr.get());
-  // Can't produce the last term via calls to ResetToTermBefore. So skip
-  // past that one.
-  auto itr = advance_terms.begin();
-  std::advance(itr, advance_terms.size() - 1);
-  advance_terms.erase(itr);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetBefore(reset_to_term_itr.get());
+      GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
   std::reverse(reset_terms.begin(), reset_terms.end());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
@@ -782,26 +1061,22 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       ThaiResetToTermBeforeEquivalentToAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       ThaiResetToTermBeforeEquivalentToAdvanceUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> advance_terms =
       GetAllTermsAdvance(advance_itr.get());
-  // Can't produce the last term via calls to ResetToTermBefore. So skip
-  // past that one.
-  auto itr = advance_terms.begin();
-  std::advance(itr, advance_terms.size() - 1);
-  advance_terms.erase(itr);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetBefore(reset_to_term_itr.get());
+      GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
   std::reverse(reset_terms.begin(), reset_terms.end());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
@@ -809,189 +1084,263 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       KoreanResetToTermBeforeEquivalentToAdvance) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       KoreanResetToTermBeforeEquivalentToAdvanceUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kKorean = "나는 매일 출근합니다.";
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> advance_terms =
       GetAllTermsAdvance(advance_itr.get());
-  // Can't produce the last term via calls to ResetToTermBefore. So skip
-  // past that one.
-  auto itr = advance_terms.begin();
-  std::advance(itr, advance_terms.size() - 1);
-  advance_terms.erase(itr);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetBefore(reset_to_term_itr.get());
+      GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
   std::reverse(reset_terms.begin(), reset_terms.end());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, MixedLanguagesResetToTermBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest,
+       MixedLanguagesResetToTermBeforeUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> itr,
       language_segmenter->Segment("How are you你好吗お元気ですか"));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  EXPECT_THAT(itr->ResetToTermEndingBefore(2),
+  // String:      "How are you你好吗お元気ですか"
+  //               ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:    0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:   0  3 4 7 8 11 131415 17 19
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(2),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(10), IsOkAndHolds(Eq(7)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(10), IsOkAndHolds(Eq(7)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(4)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(7), IsOkAndHolds(Eq(4)));
   EXPECT_THAT(itr->GetTerm(), Eq("are"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(32), IsOkAndHolds(Eq(23)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(18), IsOkAndHolds(Eq(15)));
   EXPECT_THAT(itr->GetTerm(), Eq("元気"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(14), IsOkAndHolds(Eq(8)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(12), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("you"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(35), IsOkAndHolds(Eq(29)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(19), IsOkAndHolds(Eq(17)));
   EXPECT_THAT(itr->GetTerm(), Eq("です"));
 }
 
 TEST_P(IcuLanguageSegmenterAllLocalesTest,
-       ContinuousWhitespacesResetToTermBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+       ContinuousWhitespacesResetToTermBeforeUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Multiple continuous whitespaces are treated as one.
   constexpr std::string_view kTextWithSpace = "Hello          World";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kTextWithSpace));
 
-  // String: "Hello          World"
-  //          ^    ^         ^
-  // Bytes:   0    5         15
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "Hello          World"
+  //               ^    ^         ^
+  // UTF-8 idx:    0    5         15
+  // UTF-32 idx:   0    5         15
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(2),
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(2),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(10), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(10), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("Hello"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(5), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(5), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("Hello"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(15), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(15), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(17), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(17), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(19), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(19), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermBeforeUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that
   // don't have whitespaces as word delimiter. Chinese
   constexpr std::string_view kChinese = "我每天走路去上班。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kChinese));
-  // String: "我每天走路去上班。"
-  //          ^ ^  ^   ^^
-  // Bytes:   0 3  9  15 18
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "我每天走路去上班。"
+  //               ^ ^  ^   ^^
+  // UTF-8 idx:    0 3  9  15 18
+  // UTF-32 idx:   0 1  3   5 6
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(2), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("我"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(19), IsOkAndHolds(Eq(15)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(7), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq("去"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermBeforeUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Japanese
   constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kJapanese));
-  // String: "私は毎日仕事に歩いています。"
-  //          ^ ^ ^  ^  ^ ^ ^ ^  ^
-  // Bytes:   0 3 6  12 18212427 33
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "私は毎日仕事に歩いています。"
+  //               ^ ^ ^  ^  ^ ^ ^ ^  ^
+  // UTF-8 idx:    0 3 6  12 18212427 33
+  // UTF-32 idx:   0 1 2  4  6 7 8 9  11
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(33), IsOkAndHolds(Eq(27)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(11), IsOkAndHolds(Eq(9)));
   EXPECT_THAT(itr->GetTerm(), Eq("てい"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(3)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(3), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("は"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermBeforeUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kKhmer));
-  // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
-  //          ^ ^   ^   ^
-  // Bytes:   0 9   24  45
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
+  //               ^ ^   ^   ^
+  // UTF-8 idx:    0 9   24  45
+  // UTF-32 idx:   0 3   8   15
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(47), IsOkAndHolds(Eq(24)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(16), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("ធ្វើការ"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(14), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(5), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("ញុំ"));
 }
 
-TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create(GetOptions()));
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermBeforeUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
   // Thai
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kThai));
-  // String: "ฉันเดินไปทำงานทุกวัน"
-  //          ^ ^  ^ ^    ^ ^
-  // Bytes:   0 9 21 27  42 51
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "ฉันเดินไปทำงานทุกวัน"
+  //               ^ ^  ^ ^    ^ ^
+  // UTF-8 idx:    0 9 21 27  42 51
+  // UTF-32 idx:   0 3  7 9   14 17
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(51), IsOkAndHolds(Eq(42)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(17), IsOkAndHolds(Eq(14)));
   EXPECT_THAT(itr->GetTerm(), Eq("ทุก"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(13), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(4), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("ฉัน"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(34), IsOkAndHolds(Eq(21)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(11), IsOkAndHolds(Eq(7)));
   EXPECT_THAT(itr->GetTerm(), Eq("ไป"));
 }
 
+TEST_P(IcuLanguageSegmenterAllLocalesTest, QuerySyntax) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  // Validates that the input strings are not copied
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<std::string_view> terms,
+      language_segmenter->GetAllTerms(
+          "(-term1 OR term2) AND property1.subproperty2:term3"));
+  EXPECT_THAT(terms, ElementsAre("(", "-", "term1", " ", "OR", " ", "term2",
+                                 ")", " ", "AND", " ", "property1", ".",
+                                 "subproperty2", ":", "term3"));
+}
+
+TEST_P(IcuLanguageSegmenterAllLocalesTest, MultipleLangSegmentersTest) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<LanguageSegmenter::Iterator> iterator_one,
+      language_segmenter->Segment("foo bar baz"));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<LanguageSegmenter::Iterator> iterator_two,
+      language_segmenter->Segment("abra kadabra alakazam"));
+
+  ASSERT_TRUE(iterator_one->Advance());
+  ASSERT_TRUE(iterator_two->Advance());
+  EXPECT_THAT(iterator_one->GetTerm(), Eq("foo"));
+  EXPECT_THAT(iterator_two->GetTerm(), Eq("abra"));
+
+  ASSERT_TRUE(iterator_one->Advance());
+  ASSERT_TRUE(iterator_two->Advance());
+  EXPECT_THAT(iterator_one->GetTerm(), Eq(" "));
+  EXPECT_THAT(iterator_two->GetTerm(), Eq(" "));
+
+  ASSERT_TRUE(iterator_one->Advance());
+  EXPECT_THAT(iterator_one->GetTerm(), Eq("bar"));
+  EXPECT_THAT(iterator_two->GetTerm(), Eq(" "));
+  ASSERT_TRUE(iterator_two->Advance());
+  EXPECT_THAT(iterator_one->GetTerm(), Eq("bar"));
+  EXPECT_THAT(iterator_two->GetTerm(), Eq("kadabra"));
+
+  ASSERT_TRUE(iterator_one->Advance());
+  ASSERT_TRUE(iterator_two->Advance());
+  EXPECT_THAT(iterator_one->GetTerm(), Eq(" "));
+  EXPECT_THAT(iterator_two->GetTerm(), Eq(" "));
+
+  ASSERT_TRUE(iterator_two->Advance());
+  ASSERT_TRUE(iterator_one->Advance());
+  EXPECT_THAT(iterator_one->GetTerm(), Eq("baz"));
+  EXPECT_THAT(iterator_two->GetTerm(), Eq("alakazam"));
+
+  ASSERT_FALSE(iterator_two->Advance());
+  ASSERT_FALSE(iterator_one->Advance());
+}
+
 INSTANTIATE_TEST_SUITE_P(
     LocaleName, IcuLanguageSegmenterAllLocalesTest,
     testing::Values(ULOC_US, ULOC_UK, ULOC_CANADA, ULOC_CANADA_FRENCH,
@@ -1011,6 +1360,5 @@ INSTANTIATE_TEST_SUITE_P(
                     ""              // Will fall back to ICU default locale
                     ));
 
-}  // namespace
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/tokenization/language-segmenter-factory.h b/icing/tokenization/language-segmenter-factory.h
index ce50d0b..2505a07 100644
--- a/icing/tokenization/language-segmenter-factory.h
+++ b/icing/tokenization/language-segmenter-factory.h
@@ -18,11 +18,9 @@
 #include <memory>
 #include <string_view>
 
-#include "icing/jni/jni-cache.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/jni/jni-cache.h"
 #include "icing/tokenization/language-segmenter.h"
-#include "icing/util/i18n-utils.h"
-#include "unicode/uloc.h"
 
 namespace icing {
 namespace lib {
@@ -30,7 +28,7 @@ namespace lib {
 namespace language_segmenter_factory {
 
 struct SegmenterOptions {
-  explicit SegmenterOptions(std::string locale = ULOC_US,
+  explicit SegmenterOptions(std::string locale,
                             const JniCache* jni_cache = nullptr)
       : locale(std::move(locale)), jni_cache(jni_cache) {}
 
@@ -46,7 +44,7 @@ struct SegmenterOptions {
 //   A LanguageSegmenter on success
 //   INVALID_ARGUMENT if locale string is invalid
 libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create(
-    SegmenterOptions options = SegmenterOptions());
+    SegmenterOptions options);
 
 }  // namespace language_segmenter_factory
 
diff --git a/icing/tokenization/language-segmenter-iterator-test-jni-layer.cc b/icing/tokenization/language-segmenter-iterator-test-jni-layer.cc
new file mode 100644
index 0000000..3a94af3
--- /dev/null
+++ b/icing/tokenization/language-segmenter-iterator-test-jni-layer.cc
@@ -0,0 +1,37 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <jni.h>
+
+#include "gtest/gtest.h"
+#include "icing/testing/logging-event-listener.h"
+
+// Global variable used so that the test implementation can access the JNIEnv.
+JNIEnv* g_jenv = nullptr;
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_icing_jni_LanguageSegmenterIteratorJniTest_testsMain(JNIEnv* env,
+                                                          jclass ignored) {
+  g_jenv = env;
+
+  std::vector<char*> my_argv;
+  char arg[] = "jni-test-lib";
+  my_argv.push_back(arg);
+  int argc = 1;
+  char** argv = &(my_argv[0]);
+  testing::InitGoogleTest(&argc, argv);
+  testing::UnitTest::GetInstance()->listeners().Append(
+      new icing::lib::LoggingEventListener());
+  return RUN_ALL_TESTS() == 0;
+}
diff --git a/icing/tokenization/language-segmenter-iterator_test.cc b/icing/tokenization/language-segmenter-iterator_test.cc
index c7b068d..3aff45c 100644
--- a/icing/tokenization/language-segmenter-iterator_test.cc
+++ b/icing/tokenization/language-segmenter-iterator_test.cc
@@ -15,8 +15,10 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/portable/platform.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
@@ -35,16 +37,23 @@ using ::testing::Eq;
 class LanguageSegmenterIteratorTest : public testing::Test {
  protected:
   void SetUp() override {
-    ICING_ASSERT_OK(
-        // File generated via icu_data_file rule in //icing/BUILD.
-        icu_data_file_helper::SetUpICUDataFile(
-            GetTestFilePath("icing/icu.dat")));
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
   }
+
+  std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
 };
 
 TEST_F(LanguageSegmenterIteratorTest, AdvanceAndGetTerm) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
                              language_segmenter->Segment("foo bar"));
 
@@ -61,111 +70,135 @@ TEST_F(LanguageSegmenterIteratorTest, AdvanceAndGetTerm) {
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermStartingAfterWithOffsetInText) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+       ResetToTermStartingAfterUtf32WithOffsetInText) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
                              language_segmenter->Segment("foo bar"));
 
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/0),
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/0),
               IsOkAndHolds(3));  // The term " "
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/3),
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/3),
               IsOkAndHolds(4));  // The term "bar"
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/4),
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/4),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermStartingAfterWithNegativeOffsetNotOk) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+       ResetToTermStartingAfterUtf32WithNegativeOffsetNotOk) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
                              language_segmenter->Segment("foo bar"));
 
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/-1),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/-1), IsOk());
 
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/-100),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/-100), IsOk());
 
-  EXPECT_THAT(iterator->ResetToStart(), IsOkAndHolds(0));
+  EXPECT_THAT(iterator->ResetToStartUtf32(), IsOkAndHolds(0));
   EXPECT_THAT(iterator->GetTerm(), Eq("foo"));
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermStartingAfterWithTextLengthOffsetInvalidArgument) {
+       ResetToTermStartingAfterUtf32WithTextLengthOffsetInvalidArgument) {
   std::string text = "foo bar";
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator, language_segmenter->Segment(text));
 
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/text.size()),
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/text.length()),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermStartingAfterWithOffsetPastTextLengthInvalidArgument) {
+       ResetToTermStartingAfterUtf32WithOffsetPastTextLengthInvalidArgument) {
   std::string text = "foo bar";
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator, language_segmenter->Segment(text));
 
-  EXPECT_THAT(iterator->ResetToTermStartingAfter(/*offset=*/100),
+  EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/100),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(LanguageSegmenterIteratorTest, ResetToTermEndingBeforeWithOffsetInText) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+TEST_F(LanguageSegmenterIteratorTest,
+       ResetToTermEndingBeforeUtf32WithOffsetInText) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
                              language_segmenter->Segment("foo bar"));
 
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/6),
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/6),
               IsOkAndHolds(3));  // The term " "
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/3),
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/3),
               IsOkAndHolds(0));  // The term "foo"
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/2),
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/2),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermEndingBeforeWithZeroNotFound) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+       ResetToTermEndingBeforeUtf32WithZeroNotFound) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
                              language_segmenter->Segment("foo bar"));
 
   // Zero is a valid argument, but there aren't any terms that end before it.
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/0),
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermEndingBeforeWithNegativeOffsetInvalidArgument) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+       ResetToTermEndingBeforeUtf32WithNegativeOffsetInvalidArgument) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
                              language_segmenter->Segment("foo bar"));
 
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/-1),
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/-1),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/-100),
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/-100),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
 TEST_F(LanguageSegmenterIteratorTest,
-       ResetToTermEndingBeforeWithOffsetPastTextEndInvalidArgument) {
+       ResetToTermEndingBeforeUtf32WithOffsetPastTextEndInvalidArgument) {
   std::string text = "foo bar";
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(auto iterator, language_segmenter->Segment(text));
 
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/text.length()),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/text.length()),
+              IsOk());
 
-  EXPECT_THAT(iterator->ResetToTermEndingBefore(/*offset=*/text.length() + 1),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(
+      iterator->ResetToTermEndingBeforeUtf32(/*offset=*/text.length() + 1),
+      IsOk());
 }
 
 }  // namespace
diff --git a/icing/tokenization/language-segmenter.h b/icing/tokenization/language-segmenter.h
index fdb1846..913386a 100644
--- a/icing/tokenization/language-segmenter.h
+++ b/icing/tokenization/language-segmenter.h
@@ -21,6 +21,8 @@
 #include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/util/character-iterator.h"
 
 namespace icing {
 namespace lib {
@@ -56,53 +58,100 @@ class LanguageSegmenter {
     // true.
     virtual std::string_view GetTerm() const = 0;
 
-    // Resets the iterator to point to the first term that starts after offset.
+    // RETURNS:
+    //   On success, a CharacterIterator pointing to the beginning of the
+    //   current term.
+    //   ABORTED if an invalid unicode character is encountered while
+    //   calculating the term start.
+    virtual libtextclassifier3::StatusOr<CharacterIterator>
+    CalculateTermStart() {
+      return absl_ports::UnimplementedError("");
+    }
+
+    // RETURNS:
+    //   On success, a CharacterIterator pointing just past the end of the
+    //   current term.
+    //   ABORTED if an invalid unicode character is encountered while
+    //   calculating the term end.
+    virtual libtextclassifier3::StatusOr<CharacterIterator>
+    CalculateTermEndExclusive() {
+      return absl_ports::UnimplementedError("");
+    }
+
+    // Resets the iterator to point to the first term that starts after UTF-32
+    // offset.
     // GetTerm will now return that term. For example:
     //
     //   language_segmenter = language_segmenter_factory::Create(type);
     //   iterator = language_segmenter->Segment("foo bar baz");
-    //   iterator.ResetToTermStartingAfter(4);
+    //   iterator.ResetToTermStartingAfterUtf32(4);
     //   iterator.GetTerm() // returns "baz";
     //
     // Return types of OK and NOT_FOUND indicate that the function call was
     // valid and the state of the iterator has changed. Return type of
-    // INVALID_ARGUMENT will leave the iterator unchanged.
+    // INVALID_ARGUMENT will leave the iterator unchanged. Lastly, a return type
+    // of ABORTED means that the iterator may be left in an undefined state and
+    // no longer be usable.
     //
     // Returns:
-    //   On success, the starting position of the first term that starts after
+    //   On success, the UTF-32 offset of the first term that starts after
     //   offset.
     //   NOT_FOUND if an error occurred or there are no terms that start after
     //   offset.
-    //   INVALID_ARGUMENT if offset is out of bounds for the provided text.
+    //   INVALID_ARGUMENT if offset is beyond the end of the text.
     //   ABORTED if an invalid unicode character is encountered while
     //   traversing the text.
-    virtual libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfter(
-        int32_t offset) = 0;
+    virtual libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfterUtf32(
+        int32_t offset) {
+      return absl_ports::UnimplementedError("");
+    }
 
-    // Resets the iterator to point to the first term that ends before offset.
+    // Resets the iterator to point to the first term that ends before UTF-32
+    // offset.
     // GetTerm will now return that term. For example:
     //
     //   language_segmenter = language_segmenter_factory::Create(type);
     //   iterator = language_segmenter->Segment("foo bar baz");
-    //   iterator.ResetToTermEndingBefore(7);
+    //   iterator.ResetToTermEndingBeforeUtf32(7);
     //   iterator.GetTerm() // returns "bar";
     //
     // Return types of OK and NOT_FOUND indicate that the function call was
     // valid and the state of the iterator has changed. Return type of
-    // INVALID_ARGUMENT will leave the iterator unchanged.
+    // INVALID_ARGUMENT will leave the iterator unchanged. Lastly, a return type
+    // of ABORTED means that the iterator may be left in an undefined state and
+    // no longer be usable.
     //
     // Returns:
-    //   On success, the starting position of the first term that ends before
+    //   On success, the UTF-32 offset of the first term that ends before
     //   offset.
     //   NOT_FOUND if an error occurred or there are no terms that ends before
     //   offset.
-    //   INVALID_ARGUMENT if offset is out of bounds for the provided text.
+    //   INVALID_ARGUMENT if offset is negative
     //   ABORTED if an invalid unicode character is encountered while
     //   traversing the text.
-    virtual libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBefore(
-        int32_t offset) = 0;
+    virtual libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBeforeUtf32(
+        int32_t offset) {
+      return absl_ports::UnimplementedError("");
+    }
 
-    virtual libtextclassifier3::StatusOr<int32_t> ResetToStart() = 0;
+    // Resets the iterator to point to the first term.
+    // GetTerm will now return that term. For example:
+    //
+    //   language_segmenter = language_segmenter_factory::Create(type);
+    //   iterator = language_segmenter->Segment("foo bar baz");
+    //   iterator.Advance();
+    //   iterator.ResetToStartUtf32();
+    //   iterator.GetTerm() // returns "foo";
+    //
+    // Return types of OK and NOT_FOUND indicate that the function call was
+    // valid and the state of the iterator has changed.
+    //
+    // Returns:
+    //   On success, the starting position of the first term.
+    //   NOT_FOUND if an error occurred or there are no valid terms in the text.
+    //   ABORTED if an invalid unicode character is encountered while
+    //   traversing the text.
+    virtual libtextclassifier3::StatusOr<int32_t> ResetToStartUtf32() = 0;
   };
 
   // Segments the input text into terms.
diff --git a/icing/tokenization/language-segmenter_benchmark.cc b/icing/tokenization/language-segmenter_benchmark.cc
index 49ddfca..748a322 100644
--- a/icing/tokenization/language-segmenter_benchmark.cc
+++ b/icing/tokenization/language-segmenter_benchmark.cc
@@ -14,19 +14,20 @@
 
 #include "testing/base/public/benchmark.h"
 #include "gmock/gmock.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/transform/normalizer.h"
+#include "unicode/uloc.h"
 
 // Run on a Linux workstation:
 //    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
 //    //icing/tokenization:language-segmenter_benchmark
 //
 //    $ blaze-bin/icing/tokenization/language-segmenter_benchmark
-//    --benchmarks=all
+//    --benchmark_filter=all
 //
 // Run on an Android device:
 //    Make target //icing/tokenization:language-segmenter depend on
@@ -40,7 +41,7 @@
 //    blaze-bin/icing/tokenization/language-segmenter_benchmark
 //    /data/local/tmp/
 //
-//    $ adb shell /data/local/tmp/language-segmenter_benchmark --benchmarks=all
+//    $ adb shell /data/local/tmp/language-segmenter_benchmark --benchmark_filter=all
 //    --adb
 
 // Flag to tell the benchmark that it'll be run on an Android device via adb,
@@ -59,8 +60,9 @@ void BM_SegmentNoSpace(benchmark::State& state) {
         GetTestFilePath("icing/icu.dat")));
   }
 
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
   std::unique_ptr<LanguageSegmenter> language_segmenter =
-      language_segmenter_factory::Create().ValueOrDie();
+      language_segmenter_factory::Create(std::move(options)).ValueOrDie();
 
   std::string input_string(state.range(0), 'A');
 
@@ -95,8 +97,9 @@ void BM_SegmentWithSpaces(benchmark::State& state) {
         GetTestFilePath("icing/icu.dat")));
   }
 
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
   std::unique_ptr<LanguageSegmenter> language_segmenter =
-      language_segmenter_factory::Create().ValueOrDie();
+      language_segmenter_factory::Create(std::move(options)).ValueOrDie();
 
   std::string input_string(state.range(0), 'A');
   for (int i = 1; i < input_string.length(); i += 2) {
@@ -134,8 +137,9 @@ void BM_SegmentCJK(benchmark::State& state) {
         GetTestFilePath("icing/icu.dat")));
   }
 
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
   std::unique_ptr<LanguageSegmenter> language_segmenter =
-      language_segmenter_factory::Create().ValueOrDie();
+      language_segmenter_factory::Create(std::move(options)).ValueOrDie();
 
   std::string input_string;
   while (input_string.length() < state.range(0)) {
diff --git a/icing/tokenization/plain-tokenizer-test-jni-layer.cc b/icing/tokenization/plain-tokenizer-test-jni-layer.cc
new file mode 100644
index 0000000..efa6427
--- /dev/null
+++ b/icing/tokenization/plain-tokenizer-test-jni-layer.cc
@@ -0,0 +1,36 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <jni.h>
+
+#include "gtest/gtest.h"
+#include "icing/testing/logging-event-listener.h"
+
+// Global variable used so that the test implementation can access the JNIEnv.
+JNIEnv* g_jenv = nullptr;
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_icing_jni_PlainTokenizerJniTest_testsMain(JNIEnv* env, jclass ignored) {
+  g_jenv = env;
+
+  std::vector<char*> my_argv;
+  char arg[] = "jni-test-lib";
+  my_argv.push_back(arg);
+  int argc = 1;
+  char** argv = &(my_argv[0]);
+  testing::InitGoogleTest(&argc, argv);
+  testing::UnitTest::GetInstance()->listeners().Append(
+      new icing::lib::LoggingEventListener());
+  return RUN_ALL_TESTS() == 0;
+}
diff --git a/icing/tokenization/plain-tokenizer.cc b/icing/tokenization/plain-tokenizer.cc
index 6e54af9..d40022b 100644
--- a/icing/tokenization/plain-tokenizer.cc
+++ b/icing/tokenization/plain-tokenizer.cc
@@ -14,10 +14,13 @@
 
 #include "icing/tokenization/plain-tokenizer.h"
 
+#include <algorithm>
 #include <cstdint>
+#include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/tokenization/language-segmenter.h"
+#include "icing/util/character-iterator.h"
 #include "icing/util/i18n-utils.h"
 #include "icing/util/status-macros.h"
 
@@ -63,15 +66,26 @@ class PlainTokenIterator : public Tokenizer::Iterator {
     return found_next_valid_term;
   }
 
-  Token GetToken() const override {
-    if (current_term_.empty()) {
-      return Token(Token::INVALID);
+  std::vector<Token> GetTokens() const override {
+    std::vector<Token> result;
+    if (!current_term_.empty()) {
+      result.push_back(Token(Token::Type::REGULAR, current_term_));
     }
-    return Token(Token::REGULAR, current_term_);
+    return result;
   }
 
-  bool ResetToTokenAfter(int32_t offset) override {
-    if (!base_iterator_->ResetToTermStartingAfter(offset).ok()) {
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenStart()
+      override {
+    return base_iterator_->CalculateTermStart();
+  }
+
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenEndExclusive()
+      override {
+    return base_iterator_->CalculateTermEndExclusive();
+  }
+
+  bool ResetToTokenStartingAfter(int32_t utf32_offset) override {
+    if (!base_iterator_->ResetToTermStartingAfterUtf32(utf32_offset).ok()) {
       return false;
     }
     current_term_ = base_iterator_->GetTerm();
@@ -82,22 +96,24 @@ class PlainTokenIterator : public Tokenizer::Iterator {
     return true;
   }
 
-  bool ResetToTokenBefore(int32_t offset) override {
+  bool ResetToTokenEndingBefore(int32_t utf32_offset) override {
     ICING_ASSIGN_OR_RETURN(
-        offset, base_iterator_->ResetToTermEndingBefore(offset), false);
+        utf32_offset,
+        base_iterator_->ResetToTermEndingBeforeUtf32(utf32_offset), false);
     current_term_ = base_iterator_->GetTerm();
     while (!IsValidTerm(current_term_)) {
       // Haven't found a valid term yet. Retrieve the term prior to this one
       // from the segmenter.
       ICING_ASSIGN_OR_RETURN(
-          offset, base_iterator_->ResetToTermEndingBefore(offset), false);
+          utf32_offset,
+          base_iterator_->ResetToTermEndingBeforeUtf32(utf32_offset), false);
       current_term_ = base_iterator_->GetTerm();
     }
     return true;
   }
 
   bool ResetToStart() override {
-    if (!base_iterator_->ResetToStart().ok()) {
+    if (!base_iterator_->ResetToStartUtf32().ok()) {
       return false;
     }
     current_term_ = base_iterator_->GetTerm();
@@ -127,7 +143,8 @@ libtextclassifier3::StatusOr<std::vector<Token>> PlainTokenizer::TokenizeAll(
                          Tokenize(text));
   std::vector<Token> tokens;
   while (iterator->Advance()) {
-    tokens.push_back(iterator->GetToken());
+    std::vector<Token> batch_tokens = iterator->GetTokens();
+    tokens.insert(tokens.end(), batch_tokens.begin(), batch_tokens.end());
   }
   return tokens;
 }
diff --git a/icing/tokenization/plain-tokenizer_test.cc b/icing/tokenization/plain-tokenizer_test.cc
index f2fc678..6c426da 100644
--- a/icing/tokenization/plain-tokenizer_test.cc
+++ b/icing/tokenization/plain-tokenizer_test.cc
@@ -18,12 +18,15 @@
 
 #include "gmock/gmock.h"
 #include "icing/absl_ports/str_cat.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/portable/platform.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/icu-i18n-test-utils.h"
+#include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/tokenizer-factory.h"
+#include "unicode/uloc.h"
 
 namespace icing {
 namespace lib {
@@ -34,59 +37,111 @@ using ::testing::IsEmpty;
 class PlainTokenizerTest : public ::testing::Test {
  protected:
   void SetUp() override {
-    ICING_ASSERT_OK(
-        // File generated via icu_data_file rule in //icing/BUILD.
-        icu_data_file_helper::SetUpICUDataFile(
-            GetTestFilePath("icing/icu.dat")));
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
   }
+
+  std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
 };
 
 TEST_F(PlainTokenizerTest, CreationWithNullPointerShouldFail) {
-  EXPECT_THAT(
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, /*lang_segmenter=*/nullptr),
-      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(tokenizer_factory::CreateIndexingTokenizer(
+                  StringIndexingConfig::TokenizerType::PLAIN,
+                  /*lang_segmenter=*/nullptr),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(PlainTokenizerTest, NoTokensBeforeAdvancing) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  ICING_ASSERT_OK_AND_ASSIGN(auto token_iterator,
+                             plain_tokenizer->Tokenize(kText));
+
+  // We should get no tokens if we get the token before advancing.
+  EXPECT_THAT(token_iterator->GetTokens(), IsEmpty());
+}
+
+TEST_F(PlainTokenizerTest, LastTokenAfterFullyAdvanced) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  ICING_ASSERT_OK_AND_ASSIGN(auto token_iterator,
+                             plain_tokenizer->Tokenize(kText));
+
+  while (token_iterator->Advance()) {}
+
+  // After advance returns false, GetTokens will stay on the last token.
+  EXPECT_THAT(token_iterator->GetTokens(),
+              ElementsAre(EqualsToken(Token::Type::REGULAR, "!")));
 }
 
 TEST_F(PlainTokenizerTest, Simple) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   EXPECT_THAT(plain_tokenizer->TokenizeAll(""), IsOkAndHolds(IsEmpty()));
 
-  EXPECT_THAT(plain_tokenizer->TokenizeAll("Hello World"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll("Hello World"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"))));
 
   EXPECT_THAT(
       plain_tokenizer->TokenizeAll(
           "Lorem ipsum dolor sit amet, consectetur adipiscing elit. "
           "Duis efficitur iaculis auctor."),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Lorem"),
-                               EqualsToken(Token::REGULAR, "ipsum"),
-                               EqualsToken(Token::REGULAR, "dolor"),
-                               EqualsToken(Token::REGULAR, "sit"),
-                               EqualsToken(Token::REGULAR, "amet"),
-                               EqualsToken(Token::REGULAR, "consectetur"),
-                               EqualsToken(Token::REGULAR, "adipiscing"),
-                               EqualsToken(Token::REGULAR, "elit"),
-                               EqualsToken(Token::REGULAR, "Duis"),
-                               EqualsToken(Token::REGULAR, "efficitur"),
-                               EqualsToken(Token::REGULAR, "iaculis"),
-                               EqualsToken(Token::REGULAR, "auctor"))));
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Lorem"),
+                               EqualsToken(Token::Type::REGULAR, "ipsum"),
+                               EqualsToken(Token::Type::REGULAR, "dolor"),
+                               EqualsToken(Token::Type::REGULAR, "sit"),
+                               EqualsToken(Token::Type::REGULAR, "amet"),
+                               EqualsToken(Token::Type::REGULAR, "consectetur"),
+                               EqualsToken(Token::Type::REGULAR, "adipiscing"),
+                               EqualsToken(Token::Type::REGULAR, "elit"),
+                               EqualsToken(Token::Type::REGULAR, "Duis"),
+                               EqualsToken(Token::Type::REGULAR, "efficitur"),
+                               EqualsToken(Token::Type::REGULAR, "iaculis"),
+                               EqualsToken(Token::Type::REGULAR, "auctor"))));
 }
 
 TEST_F(PlainTokenizerTest, Whitespace) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   // There're many unicode characters that are whitespaces, here we choose tabs
   // to represent others.
@@ -94,168 +149,249 @@ TEST_F(PlainTokenizerTest, Whitespace) {
   // 0x0009 is horizontal tab, considered as a whitespace
   std::string text_with_horizontal_tab =
       absl_ports::StrCat("Hello", UCharToString(0x0009), "World");
-  EXPECT_THAT(plain_tokenizer->TokenizeAll(text_with_horizontal_tab),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll(text_with_horizontal_tab),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"))));
 
   // 0x000B is vertical tab, considered as a whitespace
   std::string text_with_vertical_tab =
       absl_ports::StrCat("Hello", UCharToString(0x000B), "World");
-  EXPECT_THAT(plain_tokenizer->TokenizeAll(text_with_vertical_tab),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll(text_with_vertical_tab),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"))));
 }
 
 TEST_F(PlainTokenizerTest, Punctuation) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   // Half-width punctuation marks are filtered out.
-  EXPECT_THAT(plain_tokenizer->TokenizeAll(
-                  "Hello, World! Hello: World. \"Hello\" World?"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"),
-                                       EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"),
-                                       EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll(
+          "Hello, World! Hello: World. \"Hello\" World?"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"),
+                               EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"),
+                               EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"))));
 
   // Full-width punctuation marks are filtered out.
-  EXPECT_THAT(
-      plain_tokenizer->TokenizeAll("你好，世界！你好：世界。“你好”世界？"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "你好"),
-                               EqualsToken(Token::REGULAR, "世界"),
-                               EqualsToken(Token::REGULAR, "你好"),
-                               EqualsToken(Token::REGULAR, "世界"),
-                               EqualsToken(Token::REGULAR, "你好"),
-                               EqualsToken(Token::REGULAR, "世界"))));
+  std::vector<std::string_view> exp_tokens;
+  if (IsCfStringTokenization()) {
+    EXPECT_THAT(
+        plain_tokenizer->TokenizeAll("你好，世界！你好：世界。“你好”世界？"),
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "你"),
+                                 EqualsToken(Token::Type::REGULAR, "好"),
+                                 EqualsToken(Token::Type::REGULAR, "世界"),
+                                 EqualsToken(Token::Type::REGULAR, "你"),
+                                 EqualsToken(Token::Type::REGULAR, "好"),
+                                 EqualsToken(Token::Type::REGULAR, "世界"),
+                                 EqualsToken(Token::Type::REGULAR, "你"),
+                                 EqualsToken(Token::Type::REGULAR, "好"),
+                                 EqualsToken(Token::Type::REGULAR, "世界"))));
+  } else {
+    EXPECT_THAT(
+        plain_tokenizer->TokenizeAll("你好，世界！你好：世界。“你好”世界？"),
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "你好"),
+                                 EqualsToken(Token::Type::REGULAR, "世界"),
+                                 EqualsToken(Token::Type::REGULAR, "你好"),
+                                 EqualsToken(Token::Type::REGULAR, "世界"),
+                                 EqualsToken(Token::Type::REGULAR, "你好"),
+                                 EqualsToken(Token::Type::REGULAR, "世界"))));
+  }
 }
 
 TEST_F(PlainTokenizerTest, SpecialCharacters) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   // Right now we don't have special logic for these characters, just output
   // them as tokens.
 
-  EXPECT_THAT(plain_tokenizer->TokenizeAll("1+1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "1"),
-                                       EqualsToken(Token::REGULAR, "+"),
-                                       EqualsToken(Token::REGULAR, "1"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll("1+1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "1"),
+                               EqualsToken(Token::Type::REGULAR, "+"),
+                               EqualsToken(Token::Type::REGULAR, "1"))));
 
-  EXPECT_THAT(plain_tokenizer->TokenizeAll("$50"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "$"),
-                                       EqualsToken(Token::REGULAR, "50"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll("$50"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "$"),
+                               EqualsToken(Token::Type::REGULAR, "50"))));
 }
 
 TEST_F(PlainTokenizerTest, CJKT) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
-
   // In plain tokenizer, CJKT characters are handled the same way as non-CJKT
   // characters, just add these tests as sanity checks.
-
   // Chinese
-  EXPECT_THAT(plain_tokenizer->TokenizeAll("我每天走路去上班。"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "我"),
-                                       EqualsToken(Token::REGULAR, "每天"),
-                                       EqualsToken(Token::REGULAR, "走路"),
-                                       EqualsToken(Token::REGULAR, "去"),
-                                       EqualsToken(Token::REGULAR, "上班"))));
-  // Japanese
+  language_segmenter_factory::SegmenterOptions options(ULOC_SIMPLIFIED_CHINESE,
+                                                       jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
   EXPECT_THAT(
-      plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
-      IsOkAndHolds(ElementsAre(
-          EqualsToken(Token::REGULAR, "私"), EqualsToken(Token::REGULAR, "は"),
-          EqualsToken(Token::REGULAR, "毎日"),
-          EqualsToken(Token::REGULAR, "仕事"),
-          EqualsToken(Token::REGULAR, "に"), EqualsToken(Token::REGULAR, "歩"),
-          EqualsToken(Token::REGULAR, "い"),
-          EqualsToken(Token::REGULAR, "てい"),
-          EqualsToken(Token::REGULAR, "ます"))));
+      plain_tokenizer->TokenizeAll("我每天走路去上班。"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "我"),
+                               EqualsToken(Token::Type::REGULAR, "每天"),
+                               EqualsToken(Token::Type::REGULAR, "走路"),
+                               EqualsToken(Token::Type::REGULAR, "去"),
+                               EqualsToken(Token::Type::REGULAR, "上班"))));
+  // Japanese
+  options = language_segmenter_factory::SegmenterOptions(ULOC_JAPANESE,
+                                                         jni_cache_.get());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
+  if (IsCfStringTokenization()) {
+    EXPECT_THAT(
+        plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "私"),
+                                 EqualsToken(Token::Type::REGULAR, "は"),
+                                 EqualsToken(Token::Type::REGULAR, "毎日"),
+                                 EqualsToken(Token::Type::REGULAR, "仕事"),
+                                 EqualsToken(Token::Type::REGULAR, "に"),
+                                 EqualsToken(Token::Type::REGULAR, "歩い"),
+                                 EqualsToken(Token::Type::REGULAR, "て"),
+                                 EqualsToken(Token::Type::REGULAR, "い"),
+                                 EqualsToken(Token::Type::REGULAR, "ます"))));
+  } else {
+    EXPECT_THAT(
+        plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "私"),
+                                 EqualsToken(Token::Type::REGULAR, "は"),
+                                 EqualsToken(Token::Type::REGULAR, "毎日"),
+                                 EqualsToken(Token::Type::REGULAR, "仕事"),
+                                 EqualsToken(Token::Type::REGULAR, "に"),
+                                 EqualsToken(Token::Type::REGULAR, "歩"),
+                                 EqualsToken(Token::Type::REGULAR, "い"),
+                                 EqualsToken(Token::Type::REGULAR, "てい"),
+                                 EqualsToken(Token::Type::REGULAR, "ます"))));
+  }
+
   // Khmer
-  EXPECT_THAT(plain_tokenizer->TokenizeAll("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "ញុំ"),
-                                       EqualsToken(Token::REGULAR, "ដើរទៅ"),
-                                       EqualsToken(Token::REGULAR, "ធ្វើការ"),
-                                       EqualsToken(Token::REGULAR, "រាល់ថ្ងៃ"))));
-  // Korean
   EXPECT_THAT(
-      plain_tokenizer->TokenizeAll("나는 매일 출근합니다."),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "나는"),
-                               EqualsToken(Token::REGULAR, "매일"),
-                               EqualsToken(Token::REGULAR, "출근합니다"))));
+      plain_tokenizer->TokenizeAll("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "ញុំ"),
+                               EqualsToken(Token::Type::REGULAR, "ដើរទៅ"),
+                               EqualsToken(Token::Type::REGULAR, "ធ្វើការ"),
+                               EqualsToken(Token::Type::REGULAR, "រាល់ថ្ងៃ"))));
+  // Korean
+  EXPECT_THAT(plain_tokenizer->TokenizeAll("나는 매일 출근합니다."),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::REGULAR, "나는"),
+                  EqualsToken(Token::Type::REGULAR, "매일"),
+                  EqualsToken(Token::Type::REGULAR, "출근합니다"))));
 
   // Thai
-  EXPECT_THAT(plain_tokenizer->TokenizeAll("ฉันเดินไปทำงานทุกวัน"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "ฉัน"),
-                                       EqualsToken(Token::REGULAR, "เดิน"),
-                                       EqualsToken(Token::REGULAR, "ไป"),
-                                       EqualsToken(Token::REGULAR, "ทำงาน"),
-                                       EqualsToken(Token::REGULAR, "ทุก"),
-                                       EqualsToken(Token::REGULAR, "วัน"))));
+  // DIFFERENCE!! Disagreement over how to segment "ทุกวัน" (iOS groups).
+  // This difference persists even when locale is set to THAI
+  if (IsCfStringTokenization()) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::vector<Token> tokens,
+        plain_tokenizer->TokenizeAll("ฉันเดินไปทำงานทุกวัน"));
+
+    EXPECT_THAT(tokens, ElementsAre(EqualsToken(Token::Type::REGULAR, "ฉัน"),
+                                    EqualsToken(Token::Type::REGULAR, "เดิน"),
+                                    EqualsToken(Token::Type::REGULAR, "ไป"),
+                                    EqualsToken(Token::Type::REGULAR, "ทำงาน"),
+                                    EqualsToken(Token::Type::REGULAR, "ทุกวัน")));
+  } else {
+    EXPECT_THAT(
+        plain_tokenizer->TokenizeAll("ฉันเดินไปทำงานทุกวัน"),
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "ฉัน"),
+                                 EqualsToken(Token::Type::REGULAR, "เดิน"),
+                                 EqualsToken(Token::Type::REGULAR, "ไป"),
+                                 EqualsToken(Token::Type::REGULAR, "ทำงาน"),
+                                 EqualsToken(Token::Type::REGULAR, "ทุก"),
+                                 EqualsToken(Token::Type::REGULAR, "วัน"))));
+  }
 }
 
-TEST_F(PlainTokenizerTest, ResetToTokenAfterSimple) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+TEST_F(PlainTokenizerTest, ResetToTokenStartingAfterSimple) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   constexpr std::string_view kText = "f b";
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
 
-  EXPECT_TRUE(iterator->ResetToTokenAfter(0));
-  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "b"));
+  EXPECT_TRUE(iterator->ResetToTokenStartingAfter(0));
+  EXPECT_THAT(iterator->GetTokens(),
+              ElementsAre(EqualsToken(Token::Type::REGULAR, "b")));
 
-  EXPECT_FALSE(iterator->ResetToTokenAfter(2));
+  EXPECT_FALSE(iterator->ResetToTokenStartingAfter(2));
 }
 
-TEST_F(PlainTokenizerTest, ResetToTokenBeforeSimple) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+TEST_F(PlainTokenizerTest, ResetToTokenEndingBeforeSimple) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   constexpr std::string_view kText = "f b";
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
 
-  EXPECT_TRUE(iterator->ResetToTokenBefore(2));
-  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "f"));
+  EXPECT_TRUE(iterator->ResetToTokenEndingBefore(2));
+  EXPECT_THAT(iterator->GetTokens(),
+              ElementsAre(EqualsToken(Token::Type::REGULAR, "f")));
 
-  EXPECT_FALSE(iterator->ResetToTokenBefore(0));
+  EXPECT_FALSE(iterator->ResetToTokenEndingBefore(0));
 }
 
-TEST_F(PlainTokenizerTest, ResetToTokenAfter) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+TEST_F(PlainTokenizerTest, ResetToTokenStartingAfter) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   constexpr std::string_view kText = " foo . bar baz.. bat ";
-  EXPECT_THAT(plain_tokenizer->TokenizeAll(kText),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "foo"),
-                                       EqualsToken(Token::REGULAR, "bar"),
-                                       EqualsToken(Token::REGULAR, "baz"),
-                                       EqualsToken(Token::REGULAR, "bat"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll(kText),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "foo"),
+                               EqualsToken(Token::Type::REGULAR, "bar"),
+                               EqualsToken(Token::Type::REGULAR, "baz"),
+                               EqualsToken(Token::Type::REGULAR, "bat"))));
   std::vector<std::string> expected_text = {
       "foo",  //  0: " foo . bar"
       "bar",  //  1: "foo . bar "
@@ -278,32 +414,38 @@ TEST_F(PlainTokenizerTest, ResetToTokenAfter) {
 
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
   EXPECT_TRUE(iterator->Advance());
-  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "foo"));
+  EXPECT_THAT(iterator->GetTokens(),
+              ElementsAre(EqualsToken(Token::Type::REGULAR, "foo")));
   for (int i = 0; i < kText.length(); ++i) {
     if (i < expected_text.size()) {
-      EXPECT_TRUE(iterator->ResetToTokenAfter(i));
-      EXPECT_THAT(iterator->GetToken(),
-                  EqualsToken(Token::REGULAR, expected_text[i]));
+      EXPECT_TRUE(iterator->ResetToTokenStartingAfter(i));
+      EXPECT_THAT(
+          iterator->GetTokens(),
+          ElementsAre(EqualsToken(Token::Type::REGULAR, expected_text[i])));
     } else {
-      EXPECT_FALSE(iterator->ResetToTokenAfter(i));
+      EXPECT_FALSE(iterator->ResetToTokenStartingAfter(i));
     }
   }
 }
 
-TEST_F(PlainTokenizerTest, ResetToTokenBefore) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+TEST_F(PlainTokenizerTest, ResetToTokenEndingBefore) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                       jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   constexpr std::string_view kText = " foo . bar baz.. bat ";
-  EXPECT_THAT(plain_tokenizer->TokenizeAll(kText),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "foo"),
-                                       EqualsToken(Token::REGULAR, "bar"),
-                                       EqualsToken(Token::REGULAR, "baz"),
-                                       EqualsToken(Token::REGULAR, "bat"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll(kText),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "foo"),
+                               EqualsToken(Token::Type::REGULAR, "bar"),
+                               EqualsToken(Token::Type::REGULAR, "baz"),
+                               EqualsToken(Token::Type::REGULAR, "bat"))));
   std::vector<std::string> expected_text = {
       "bat",  // 20: "baz.. bat "
       "baz",  // 19: " baz.. bat"
@@ -326,15 +468,17 @@ TEST_F(PlainTokenizerTest, ResetToTokenBefore) {
 
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
   EXPECT_TRUE(iterator->Advance());
-  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "foo"));
+  EXPECT_THAT(iterator->GetTokens(),
+              ElementsAre(EqualsToken(Token::Type::REGULAR, "foo")));
   for (int i = kText.length() - 1; i >= 0; --i) {
     int expected_index = kText.length() - 1 - i;
     if (expected_index < expected_text.size()) {
-      EXPECT_TRUE(iterator->ResetToTokenBefore(i));
-      EXPECT_THAT(iterator->GetToken(),
-                  EqualsToken(Token::REGULAR, expected_text[expected_index]));
+      EXPECT_TRUE(iterator->ResetToTokenEndingBefore(i));
+      EXPECT_THAT(iterator->GetTokens(),
+                  ElementsAre(EqualsToken(Token::Type::REGULAR,
+                                          expected_text[expected_index])));
     } else {
-      EXPECT_FALSE(iterator->ResetToTokenBefore(i));
+      EXPECT_FALSE(iterator->ResetToTokenEndingBefore(i));
     }
   }
 }
diff --git a/icing/tokenization/raw-query-tokenizer.cc b/icing/tokenization/raw-query-tokenizer.cc
index 8b2edc9..1dcbf9b 100644
--- a/icing/tokenization/raw-query-tokenizer.cc
+++ b/icing/tokenization/raw-query-tokenizer.cc
@@ -14,9 +14,8 @@
 
 #include "icing/tokenization/raw-query-tokenizer.h"
 
-#include <stddef.h>
-
 #include <cctype>
+#include <cstddef>
 #include <memory>
 #include <string>
 #include <string_view>
@@ -26,6 +25,9 @@
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/schema-util.h"
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/tokenization/token.h"
 #include "icing/tokenization/tokenizer.h"
@@ -70,7 +72,7 @@ constexpr char kColon = ':';
 constexpr char kLeftParentheses = '(';
 constexpr char kRightParentheses = ')';
 constexpr char kExclusion = '-';
-constexpr char kOrOperator[] = "OR";
+constexpr std::string_view kOrOperator = "OR";
 
 enum State {
   // Ready to process any terms
@@ -100,10 +102,14 @@ enum State {
   // When seeing right parentheses
   CLOSING_PARENTHESES = 8,
 
+  PROCESSING_NON_ASCII_ALPHANUMERIC_TERM = 9,
+
+  PROCESSING_PROPERTY_TERM_APPENDING = 10,
+
   // Valid state count
-  STATE_COUNT = 9,
+  STATE_COUNT = 11,
 
-  INVALID = 10
+  INVALID = 12
 };
 
 enum TermType {
@@ -111,27 +117,29 @@ enum TermType {
   WHITESPACE = 0,
 
   // A term that consists of unicode alphabetic and numeric characters
-  ALPHANUMERIC_TERM = 1,
+  ASCII_ALPHANUMERIC_TERM = 1,
+
+  NON_ASCII_ALPHANUMERIC_TERM = 2,
 
   // "("
-  LEFT_PARENTHESES = 2,
+  LEFT_PARENTHESES = 3,
 
   // ")"
-  RIGHT_PARENTHESES = 3,
+  RIGHT_PARENTHESES = 4,
 
   // "-"
-  EXCLUSION_OPERATOR = 4,
+  EXCLUSION_OPERATOR = 5,
 
   // "OR"
-  OR_OPERATOR = 5,
+  OR_OPERATOR = 6,
 
   // ":"
-  COLON = 6,
+  COLON = 7,
 
   // All the other characters seen that are not the types above
-  OTHER = 7,
+  OTHER = 8,
 
-  TYPE_COUNT = 8
+  TYPE_COUNT = 9
 };
 
 enum ActionOrError {
@@ -145,6 +153,9 @@ enum ActionOrError {
   // Ignore / throw away the current term
   IGNORE = 2,
 
+  // Concatenate with next term
+  CONCATENATE = 3,
+
   // Errors
   ERROR_UNKNOWN = 100,
   ERROR_NO_WHITESPACE_AROUND_OR = 101,
@@ -154,6 +165,7 @@ enum ActionOrError {
   ERROR_EXCLUSION_PROPERTY_TOGETHER = 105,
   ERROR_EXCLUSION_OR_TOGETHER = 106,
   ERROR_PROPERTY_OR_TOGETHER = 107,
+  ERROR_NON_ASCII_AS_PROPERTY_NAME = 108,
 };
 
 std::string_view GetErrorMessage(ActionOrError maybe_error) {
@@ -175,6 +187,8 @@ std::string_view GetErrorMessage(ActionOrError maybe_error) {
       return "Exclusion and OR operators can't be used together";
     case ERROR_PROPERTY_OR_TOGETHER:
       return "Property restriction and OR operators can't be used together";
+    case ERROR_NON_ASCII_AS_PROPERTY_NAME:
+      return "Characters in property name must all be ASCII.";
     default:
       return "";
   }
@@ -186,7 +200,7 @@ std::string_view GetErrorMessage(ActionOrError maybe_error) {
 // States:
 //
 // READY = 0
-// PROCESSING_ALPHANUMERIC_TERM = 1
+// PROCESSING_ASCII_ALPHANUMERIC_TERM = 1
 // PROCESSING_EXCLUSION = 2
 // PROCESSING_EXCLUSION_TERM = 3
 // PROCESSING_PROPERTY_RESTRICT = 4
@@ -194,24 +208,28 @@ std::string_view GetErrorMessage(ActionOrError maybe_error) {
 // PROCESSING_OR = 6
 // OPENING_PARENTHESES = 7
 // CLOSING_PARENTHESES = 8
+// PROCESSING_NON_ASCII_ALPHANUMERIC_TERM = 9
+// PROCESSING_PROPERTY_TERM_APPENDING = 10
 //
 // Actions:
 //
 // OUTPUT = a
 // KEEP = b
 // IGNORE = c
+// CONCAT = d, concatenate the current term and the new term.
 //
-//                    ========================================================
-//   Transition Table ||  0  |  1  |  2  |  3  |  4  |  5  |  6  |  7  |  8  |
-// ===========================================================================
-//         WHITESPACE || 0,c | 0,a | 0,c | 0,a | 0,a | 0,a | 0,a | 0,a | 0,a |
-//  ALPHANUMERIC_TERM || 1,c | 1,a | 3,a | 1,a | 5,a | 1,a |ERROR| 1,a | 1,a |
-//   LEFT_PARENTHESES || 7,c | 7,a |ERROR| 7,a |ERROR| 7,a | 7,a | 7,a | 7,a |
-//  RIGHT_PARENTHESES || 8,c | 8,a | 8,c | 8,a | 8,a | 8,a | 8,c | 8,a | 8,a |
-// EXCLUSION_OPERATOR || 2,c | 0,a | 2,c | 0,a |ERROR| 0,a |ERROR| 2,a | 2,a |
-//        OR_OPERATOR || 6,c |ERROR|ERROR|ERROR|ERROR|ERROR|ERROR| 7,b | 6,a |
-//              COLON || 0,c | 4,b |ERROR|ERROR| 4,b | 0,a |ERROR| 0,a |ERROR|
-//              OTHER || 0,c | 0,a | 0,c | 0,a | 0,a | 0,a | 0,a | 0,a | 0,a |
+// =============================================================================
+// Transition     ||  0 |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 | 10 |
+// =============================================================================
+//     WHITESPACE || 0,c| 0,a| 0,c| 0,a| 0,a| 0,a| 0,a| 0,a| 0,a| 0,a| 0,a|
+//    ASCII_ALPHA || 1,c| 1,d| 3,a| 1,a| 5,a| 1,a|ERR | 1,a| 1,a| 1,a|10,d|
+// NONASCII_ALPHA || 9,c| 9,a| 3,a| 9,a| 5,a| 9,a|ERR | 9,a| 9,a| 9,a|10,d|
+//     LEFT_PAREN || 7,c| 7,a|ERR | 7,a|ERR | 7,a| 7,a| 7,a| 7,a| 7,a| 7,a|
+//    RIGHT_PAREN || 8,c| 8,a| 8,c| 8,a| 8,a| 8,a| 8,c| 8,a| 8,a| 8,a| 8,a|
+//   EXCLUSION_OP || 2,c| 0,a| 2,c| 0,a|ERR | 0,a|ERR | 2,a| 2,a| 0,a| 0,a|
+//    OR_OPERATOR || 6,c|ERR |ERR |ERR |ERR |ERR |ERR | 7,b| 6,a|ERR |ERR |
+//          COLON || 0,c| 4,b|ERR |ERR | 4,b|10,d|ERR | 0,a|ERR |ERR |10,d|
+//          OTHER || 0,c| 0,a| 0,c| 0,a| 0,a| 0,a| 0,a| 0,a| 0,a| 0,a| 0,a|
 //
 // Each cell is a rule that consists of 4 things:
 // [current state] + [next term type] -> [new state] + [action]
@@ -228,39 +246,56 @@ std::string_view GetErrorMessage(ActionOrError maybe_error) {
 //
 // NOTE: Please update the state transition table above if this is updated.
 //
-// TODO(samzheng): support syntax "-property1:term1", right now we don't allow
+// TODO(tjbarron): support syntax "-property1:term1", right now we don't allow
 // exclusion and property restriction applied on the same term.
 // TODO(b/141007791): figure out how we'd like to support special characters
 // like "+", "&", "@", "#" in indexing and query tokenizers.
 constexpr State state_transition_rules[STATE_COUNT][TYPE_COUNT] = {
     /*State: Ready*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+    {READY, PROCESSING_ALPHANUMERIC_TERM,
+     PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
      CLOSING_PARENTHESES, PROCESSING_EXCLUSION, PROCESSING_OR, READY, READY},
     /*State: PROCESSING_ALPHANUMERIC_TERM*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+    {READY, PROCESSING_ALPHANUMERIC_TERM,
+     PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
      CLOSING_PARENTHESES, READY, INVALID, PROCESSING_PROPERTY_RESTRICT, READY},
     /*State: PROCESSING_EXCLUSION*/
-    {READY, PROCESSING_EXCLUSION_TERM, INVALID, CLOSING_PARENTHESES,
-     PROCESSING_EXCLUSION, INVALID, INVALID, READY},
+    {READY, PROCESSING_EXCLUSION_TERM, PROCESSING_EXCLUSION_TERM, INVALID,
+     CLOSING_PARENTHESES, PROCESSING_EXCLUSION, INVALID, INVALID, READY},
     /*State: PROCESSING_EXCLUSION_TERM*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+    {READY, PROCESSING_ALPHANUMERIC_TERM,
+     PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
      CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
     /*State: PROCESSING_PROPERTY_RESTRICT*/
-    {READY, PROCESSING_PROPERTY_TERM, INVALID, CLOSING_PARENTHESES, INVALID,
-     INVALID, PROCESSING_PROPERTY_RESTRICT, READY},
+    {READY, PROCESSING_PROPERTY_TERM, PROCESSING_PROPERTY_TERM, INVALID,
+     CLOSING_PARENTHESES, INVALID, INVALID, PROCESSING_PROPERTY_RESTRICT,
+     READY},
     /*State: PROCESSING_PROPERTY_TERM*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
-     CLOSING_PARENTHESES, READY, INVALID, READY, READY},
+    {READY, PROCESSING_ALPHANUMERIC_TERM,
+     PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+     CLOSING_PARENTHESES, READY, INVALID, PROCESSING_PROPERTY_TERM_APPENDING,
+     READY},
     /*State: PROCESSING_OR*/
-    {READY, INVALID, OPENING_PARENTHESES, CLOSING_PARENTHESES, INVALID, INVALID,
-     INVALID, READY},
+    {READY, INVALID, INVALID, OPENING_PARENTHESES, CLOSING_PARENTHESES, INVALID,
+     INVALID, INVALID, READY},
     /*State: OPENING_PARENTHESES*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+    {READY, PROCESSING_ALPHANUMERIC_TERM,
+     PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
      CLOSING_PARENTHESES, PROCESSING_EXCLUSION, OPENING_PARENTHESES, READY,
      READY},
     /*State: CLOSING_PARENTHESES*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
-     CLOSING_PARENTHESES, PROCESSING_EXCLUSION, PROCESSING_OR, INVALID, READY}};
+    {READY, PROCESSING_ALPHANUMERIC_TERM,
+     PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+     CLOSING_PARENTHESES, PROCESSING_EXCLUSION, PROCESSING_OR, INVALID, READY},
+    /*State: PROCESSING_NON_ASCII_ALPHANUMERIC_TERM*/
+    {READY, PROCESSING_ALPHANUMERIC_TERM,
+     PROCESSING_NON_ASCII_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+     CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
+    /*State: PROCESSING_PROPERTY_TERM_APPENDING*/
+    {READY, PROCESSING_PROPERTY_TERM_APPENDING,
+     PROCESSING_PROPERTY_TERM_APPENDING, OPENING_PARENTHESES,
+     CLOSING_PARENTHESES, READY, INVALID, PROCESSING_PROPERTY_TERM_APPENDING,
+     READY}};
 
 // We use a 2D array to encode the action rules,
 // The value of action_rules[state1][term_type1] means "what action we need to
@@ -269,62 +304,150 @@ constexpr State state_transition_rules[STATE_COUNT][TYPE_COUNT] = {
 // NOTE: Please update the state transition table above if this is updated.
 constexpr ActionOrError action_rules[STATE_COUNT][TYPE_COUNT] = {
     /*State: Ready*/
-    {IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE},
+    {IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE},
     /*State: PROCESSING_ALPHANUMERIC_TERM*/
-    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR,
-     KEEP, OUTPUT},
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT,
+     ERROR_NO_WHITESPACE_AROUND_OR, KEEP, OUTPUT},
     /*State: PROCESSING_EXCLUSION*/
-    {IGNORE, OUTPUT, ERROR_GROUP_AFTER_EXCLUSION, IGNORE, IGNORE,
+    {IGNORE, OUTPUT, OUTPUT, ERROR_GROUP_AFTER_EXCLUSION, IGNORE, IGNORE,
      ERROR_EXCLUSION_OR_TOGETHER, ERROR_EXCLUSION_PROPERTY_TOGETHER, IGNORE},
     /*State: PROCESSING_EXCLUSION_TERM*/
-    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR,
-     ERROR_EXCLUSION_PROPERTY_TOGETHER, OUTPUT},
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT,
+     ERROR_NO_WHITESPACE_AROUND_OR, ERROR_EXCLUSION_PROPERTY_TOGETHER, OUTPUT},
     /*State: PROCESSING_PROPERTY_RESTRICT*/
-    {OUTPUT, OUTPUT, ERROR_GROUP_AFTER_PROPERTY_RESTRICTION, OUTPUT,
+    {OUTPUT, OUTPUT, OUTPUT, ERROR_GROUP_AFTER_PROPERTY_RESTRICTION, OUTPUT,
      ERROR_EXCLUSION_PROPERTY_TOGETHER, ERROR_PROPERTY_OR_TOGETHER, KEEP,
      OUTPUT},
     /*State: PROCESSING_PROPERTY_TERM*/
-    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR,
-     OUTPUT, OUTPUT},
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT,
+     ERROR_NO_WHITESPACE_AROUND_OR, CONCATENATE, OUTPUT},
     /*State: PROCESSING_OR*/
-    {OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR, OUTPUT, IGNORE,
-     ERROR_NO_WHITESPACE_AROUND_OR, ERROR_NO_WHITESPACE_AROUND_OR,
-     ERROR_NO_WHITESPACE_AROUND_OR, OUTPUT},
+    {OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR, ERROR_NO_WHITESPACE_AROUND_OR,
+     OUTPUT, IGNORE, ERROR_NO_WHITESPACE_AROUND_OR,
+     ERROR_NO_WHITESPACE_AROUND_OR, ERROR_NO_WHITESPACE_AROUND_OR, OUTPUT},
     /*State: OPENING_PARENTHESES*/
-    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, KEEP, OUTPUT, OUTPUT},
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, KEEP, OUTPUT, OUTPUT},
     /*State: CLOSING_PARENTHESES*/
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT,
+     ERROR_GROUP_AS_PROPERTY_NAME, OUTPUT},
+    /*State: PROCESSING_NON_ASCII_ALPHANUMERIC_TERM*/
     {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT,
-     ERROR_GROUP_AS_PROPERTY_NAME, OUTPUT}};
-
-// Helper function to get the TermType of the input term.
-TermType GetTermType(std::string_view term) {
-  if (term.length() == 1) {
-    // Must be an ASCII char
-    const char& first_term_char = term[0];
-    if (first_term_char == kWhitespace) {
-      return WHITESPACE;
-    } else if (first_term_char == kColon) {
-      return COLON;
-    } else if (first_term_char == kLeftParentheses) {
-      return LEFT_PARENTHESES;
-    } else if (first_term_char == kRightParentheses) {
-      return RIGHT_PARENTHESES;
-    } else if (first_term_char == kExclusion) {
-      return EXCLUSION_OPERATOR;
-    }
-  } else if (term.length() == 2 && term == kOrOperator) {
-    return OR_OPERATOR;
+     ERROR_NO_WHITESPACE_AROUND_OR, ERROR_NON_ASCII_AS_PROPERTY_NAME, OUTPUT},
+    /*State: PROCESSING_PROPERTY_TERM_APPENDING*/
+    {OUTPUT, CONCATENATE, CONCATENATE, OUTPUT, OUTPUT, OUTPUT,
+     ERROR_NO_WHITESPACE_AROUND_OR, CONCATENATE, OUTPUT}};
+
+// Determines the length of the whitespace term beginning at text[pos] and
+// returns a pair with the WHITESPACE TermType and a string_view of the
+// whitespace term.
+std::pair<TermType, std::string_view> GetWhitespaceTerm(std::string_view text,
+                                                        size_t pos) {
+  size_t cur = pos;
+  while (cur < text.length() && text[cur] == kWhitespace) {
+    ++cur;
+  }
+  return std::make_pair(WHITESPACE, text.substr(pos, cur - pos));
+}
+
+TermType GetContentTermType(std::string_view text, size_t pos) {
+  if (i18n_utils::IsPunctuationAt(text, pos)) {
+    return OTHER;
+  } else if (i18n_utils::IsAscii(text[pos])) {
+    return ASCII_ALPHANUMERIC_TERM;
+  }
+  return NON_ASCII_ALPHANUMERIC_TERM;
+}
+
+bool IsContentTermType(TermType term_type) {
+  switch (term_type) {
+    case ASCII_ALPHANUMERIC_TERM:
+      [[fallthrough]];
+    case NON_ASCII_ALPHANUMERIC_TERM:
+      [[fallthrough]];
+    case OTHER:
+      return true;
+    case WHITESPACE:
+      [[fallthrough]];
+    case LEFT_PARENTHESES:
+      [[fallthrough]];
+    case RIGHT_PARENTHESES:
+      [[fallthrough]];
+    case EXCLUSION_OPERATOR:
+      [[fallthrough]];
+    case OR_OPERATOR:
+      [[fallthrough]];
+    case COLON:
+      [[fallthrough]];
+    case TYPE_COUNT:
+      return false;
   }
+}
+
+// Determines the length of the potential content term beginning at text[pos]
+// and returns a pair with the appropriate TermType and a string_view of the
+// content term.
+//
+// NOTE: The potential content term could multiple content terms (segmentation
+// is needed to determine this), a property restrict (depending on other
+// neighboring tokens). It could also be multiple content terms surrounding an
+// OR operator (segmentation is also needed to determine this).
+std::pair<TermType, std::string_view> GetContentTerm(std::string_view text,
+                                                     size_t pos) {
+  size_t len = 0;
   // Checks the first char to see if it's an ASCII term
-  if (i18n_utils::IsAscii(term[0])) {
-    if (std::isalnum(term[0])) {
-      return ALPHANUMERIC_TERM;
+  TermType type = GetContentTermType(text, pos);
+  for (size_t cur = pos; cur < text.length() && len == 0; ++cur) {
+    switch (text[cur]) {
+      case kLeftParentheses:
+        [[fallthrough]];
+      case kRightParentheses:
+        [[fallthrough]];
+      case kExclusion:
+        [[fallthrough]];
+      case kWhitespace:
+        [[fallthrough]];
+      case kColon:
+        // If we reach any of our special characters (colon, exclusion or
+        // parentheses), then we've reached the end of the content term. Set len
+        // and exit the loop.
+        len = cur - pos;
+        break;
+      default:
+        break;
     }
-    return OTHER;
   }
-  // All non-ASCII terms are alphabetic since language segmenter already
-  // filters out non-ASCII and non-alphabetic terms
-  return ALPHANUMERIC_TERM;
+  if (len == 0) {
+    // If len isn't set, then we must have reached the end of the string.
+    len = text.length() - pos;
+  }
+  return std::make_pair(type, text.substr(pos, len));
+}
+
+// Determines the type and length of the term beginning at text[pos].
+std::pair<TermType, std::string_view> GetTerm(std::string_view text,
+                                              size_t pos) {
+  switch (text[pos]) {
+    case kLeftParentheses:
+      return std::make_pair(LEFT_PARENTHESES, text.substr(pos, 1));
+    case kRightParentheses:
+      return std::make_pair(RIGHT_PARENTHESES, text.substr(pos, 1));
+    case kExclusion:
+      return std::make_pair(EXCLUSION_OPERATOR, text.substr(pos, 1));
+    case kWhitespace:
+      // Get length of whitespace
+      return GetWhitespaceTerm(text, pos);
+    case kColon:
+      return std::make_pair(COLON, text.substr(pos, 1));
+    case kOrOperator[0]:
+      if (text.length() >= pos + kOrOperator.length() &&
+          text.substr(pos, kOrOperator.length()) == kOrOperator) {
+        return std::make_pair(OR_OPERATOR,
+                              text.substr(pos, kOrOperator.length()));
+      }
+      [[fallthrough]];
+    default:
+      return GetContentTerm(text, pos);
+  }
 }
 
 // Helper function to remove the last token if it's OR operator. This is used to
@@ -332,7 +455,7 @@ TermType GetTermType(std::string_view term) {
 // and [(cat OR)]. This helps assert extra rule 3: "OR" is ignored if there's no
 // valid token on its right.
 void RemoveLastTokenIfOrOperator(std::vector<Token>* tokens) {
-  if (!tokens->empty() && tokens->back().type == Token::QUERY_OR) {
+  if (!tokens->empty() && tokens->back().type == Token::Type::QUERY_OR) {
     tokens->pop_back();
   }
 }
@@ -346,11 +469,11 @@ libtextclassifier3::Status OutputOrOperatorToken(std::vector<Token>* tokens) {
   }
   Token::Type last_token_type = tokens->back().type;
   switch (last_token_type) {
-    case Token::REGULAR:
-    case Token::QUERY_RIGHT_PARENTHESES:
-      tokens->emplace_back(Token::QUERY_OR);
+    case Token::Type::REGULAR:
+    case Token::Type::QUERY_RIGHT_PARENTHESES:
+      tokens->emplace_back(Token::Type::QUERY_OR);
       break;
-    case Token::QUERY_OR:
+    case Token::Type::QUERY_OR:
       // Ignores "OR" because there's already an "OR", e.g. "term1 OR OR term2"
       break;
     default:
@@ -378,28 +501,34 @@ libtextclassifier3::Status OutputToken(State new_state,
                                        TermType current_term_type,
                                        std::vector<Token>* tokens) {
   switch (current_term_type) {
-    case ALPHANUMERIC_TERM:
+    case ASCII_ALPHANUMERIC_TERM:
+      [[fallthrough]];
+    case NON_ASCII_ALPHANUMERIC_TERM:
       if (new_state == PROCESSING_PROPERTY_TERM) {
-        // Asserts extra rule 1: property name must be in ASCII
-        if (!i18n_utils::IsAscii(current_term[0])) {
-          return absl_ports::InvalidArgumentError(
-              "Characters in property name must all be ASCII.");
+        // Asserts extra rule 1: each property name in the property path is a
+        // valid term.
+        for (std::string_view property :
+             property_util::SplitPropertyPathExpr(current_term)) {
+          if (!SchemaUtil::ValidatePropertyName(property).ok()) {
+            return absl_ports::InvalidArgumentError(
+                GetErrorMessage(ERROR_NON_ASCII_AS_PROPERTY_NAME));
+          }
         }
-        tokens->emplace_back(Token::QUERY_PROPERTY, current_term);
+        tokens->emplace_back(Token::Type::QUERY_PROPERTY, current_term);
       } else {
-        tokens->emplace_back(Token::REGULAR, current_term);
+        tokens->emplace_back(Token::Type::REGULAR, current_term);
       }
       break;
     case LEFT_PARENTHESES:
-      tokens->emplace_back(Token::QUERY_LEFT_PARENTHESES);
+      tokens->emplace_back(Token::Type::QUERY_LEFT_PARENTHESES);
       break;
     case RIGHT_PARENTHESES:
       // Ignores "OR" if it's followed by right parentheses.
       RemoveLastTokenIfOrOperator(tokens);
-      tokens->emplace_back(Token::QUERY_RIGHT_PARENTHESES);
+      tokens->emplace_back(Token::Type::QUERY_RIGHT_PARENTHESES);
       break;
     case EXCLUSION_OPERATOR:
-      tokens->emplace_back(Token::QUERY_EXCLUSION);
+      tokens->emplace_back(Token::Type::QUERY_EXCLUSION);
       break;
     case OR_OPERATOR:
       return OutputOrOperatorToken(tokens);
@@ -416,13 +545,11 @@ libtextclassifier3::Status OutputToken(State new_state,
 // Returns:
 //   OK on success
 //   INVALID_ARGUMENT with error message on invalid query syntax
-libtextclassifier3::Status ProcessTerm(State* current_state,
-                                       std::string_view* current_term,
-                                       TermType* current_term_type,
-                                       int* unclosed_parentheses_count,
-                                       const std::string_view next_term,
-                                       TermType next_term_type,
-                                       std::vector<Token>* tokens) {
+libtextclassifier3::Status ProcessTerm(
+    State* current_state, std::string_view* current_term,
+    TermType* current_term_type, int* unclosed_parentheses_count,
+    const std::string_view next_term, TermType next_term_type,
+    const LanguageSegmenter* language_segmenter, std::vector<Token>* tokens) {
   // Asserts extra rule 4: parentheses must appear in pairs.
   if (next_term_type == LEFT_PARENTHESES) {
     ++(*unclosed_parentheses_count);
@@ -440,8 +567,23 @@ libtextclassifier3::Status ProcessTerm(State* current_state,
   }
   switch (action_or_error) {
     case OUTPUT:
-      ICING_RETURN_IF_ERROR(
-          OutputToken(new_state, *current_term, *current_term_type, tokens));
+      if (*current_state == PROCESSING_PROPERTY_TERM_APPENDING) {
+        // We appended multiple terms together in case they actually should have
+        // been connected by a colon connector.
+        ICING_ASSIGN_OR_RETURN(std::vector<std::string_view> content_terms,
+                               language_segmenter->GetAllTerms(*current_term));
+        for (std::string_view term : content_terms) {
+          TermType type = GetContentTermType(term, 0);
+          if (type == OTHER) {
+            // Skip OTHER tokens here.
+            continue;
+          }
+          ICING_RETURN_IF_ERROR(OutputToken(new_state, term, type, tokens));
+        }
+      } else {
+        ICING_RETURN_IF_ERROR(
+            OutputToken(new_state, *current_term, *current_term_type, tokens));
+      }
       [[fallthrough]];
     case IGNORE:
       *current_term = next_term;
@@ -449,6 +591,11 @@ libtextclassifier3::Status ProcessTerm(State* current_state,
       break;
     case KEEP:
       break;
+    case CONCATENATE:
+      *current_term = std::string_view(
+          current_term->data(),
+          next_term.data() - current_term->data() + next_term.length());
+      break;
     default:
       return absl_ports::InvalidArgumentError(GetErrorMessage(ERROR_UNKNOWN));
   }
@@ -463,56 +610,50 @@ libtextclassifier3::Status ProcessTerm(State* current_state,
 //   A list of tokens on success
 //   INVALID_ARGUMENT with error message on invalid query syntax
 libtextclassifier3::StatusOr<std::vector<Token>> ProcessTerms(
-    std::unique_ptr<LanguageSegmenter::Iterator> base_iterator) {
+    const LanguageSegmenter* language_segmenter,
+    std::vector<std::pair<TermType, std::string_view>> prescanned_terms) {
   std::vector<Token> tokens;
   State current_state = READY;
   std::string_view current_term;
   TermType current_term_type;
   int unclosed_parentheses_count = 0;
-  while (base_iterator->Advance()) {
-    const std::string_view next_term = base_iterator->GetTerm();
-    size_t colon_position = next_term.find(kColon);
-    // Since colon ":" is a word connector per ICU's rule
-    // (https://unicode.org/reports/tr29/#Word_Boundaries), strings like
-    // "foo:bar" are returned by LanguageSegmenter as one term. Here we're
-    // trying to find the first colon as it represents property restriction in
-    // raw query.
-    if (colon_position == std::string_view::npos) {
-      // No colon found
-      ICING_RETURN_IF_ERROR(ProcessTerm(&current_state, &current_term,
-                                        &current_term_type,
-                                        &unclosed_parentheses_count, next_term,
-                                        GetTermType(next_term), &tokens));
-    } else if (next_term.size() == 1 && next_term[0] == kColon) {
-      // The whole term is a colon
+  for (int i = 0; i < prescanned_terms.size(); ++i) {
+    const std::pair<TermType, std::string_view>& prescanned_term =
+        prescanned_terms.at(i);
+    if (!IsContentTermType(prescanned_term.first)) {
+      // This can't be a property restrict. Just pass it in.
       ICING_RETURN_IF_ERROR(
           ProcessTerm(&current_state, &current_term, &current_term_type,
-                      &unclosed_parentheses_count, next_term, COLON, &tokens));
+                      &unclosed_parentheses_count, prescanned_term.second,
+                      prescanned_term.first, language_segmenter, &tokens));
     } else {
-      // String before the colon is the property name
-      std::string_view property_name = next_term.substr(0, colon_position);
-      ICING_RETURN_IF_ERROR(
-          ProcessTerm(&current_state, &current_term, &current_term_type,
-                      &unclosed_parentheses_count, property_name,
-                      GetTermType(property_name), &tokens));
-      ICING_RETURN_IF_ERROR(
-          ProcessTerm(&current_state, &current_term, &current_term_type,
-                      &unclosed_parentheses_count, std::string_view(&kColon, 1),
-                      COLON, &tokens));
-      // String after the colon is the term that property restriction is applied
-      // on.
-      std::string_view property_term = next_term.substr(colon_position + 1);
-      ICING_RETURN_IF_ERROR(
-          ProcessTerm(&current_state, &current_term, &current_term_type,
-                      &unclosed_parentheses_count, property_term,
-                      GetTermType(property_term), &tokens));
+      // There's no colon after this term. Now, we need to segment this.
+      ICING_ASSIGN_OR_RETURN(
+          std::vector<std::string_view> content_terms,
+          language_segmenter->GetAllTerms(prescanned_term.second));
+      for (std::string_view term : content_terms) {
+        TermType type = GetContentTermType(term, 0);
+        if (term == kOrOperator) {
+          // TODO(tjbarron) Decide whether we should revise this and other
+          // handled syntax. This is used to allow queries like "term1,OR,term2"
+          // to succeed. It's not clear if we should allow this or require
+          // clients to ensure that OR operators are always surrounded by
+          // whitespace.
+          // Override the type if this is actually an OR operator.
+          type = OR_OPERATOR;
+        }
+        ICING_RETURN_IF_ERROR(ProcessTerm(&current_state, &current_term,
+                                          &current_term_type,
+                                          &unclosed_parentheses_count, term,
+                                          type, language_segmenter, &tokens));
+      }
     }
   }
   // Adds a fake whitespace at the end to flush the last term.
-  ICING_RETURN_IF_ERROR(
-      ProcessTerm(&current_state, &current_term, &current_term_type,
-                  &unclosed_parentheses_count,
-                  std::string_view(&kWhitespace, 1), WHITESPACE, &tokens));
+  ICING_RETURN_IF_ERROR(ProcessTerm(
+      &current_state, &current_term, &current_term_type,
+      &unclosed_parentheses_count, std::string_view(&kWhitespace, 1),
+      WHITESPACE, language_segmenter, &tokens));
   if (unclosed_parentheses_count > 0) {
     return absl_ports::InvalidArgumentError("Unclosed left parentheses.");
   }
@@ -531,11 +672,14 @@ class RawQueryTokenIterator : public Tokenizer::Iterator {
 
   bool Advance() override { return ++current_ < tokens_.size(); }
 
-  Token GetToken() const override {
-    if (current_ < 0 || current_ >= tokens_.size()) {
-      return Token(Token::INVALID);
+  std::vector<Token> GetTokens() const override {
+    std::vector<Token> result;
+
+    if (current_ >= 0 && current_ < tokens_.size()) {
+      result.push_back(tokens_.at(current_));
     }
-    return tokens_.at(current_);
+
+    return result;
   }
 
  private:
@@ -553,10 +697,16 @@ RawQueryTokenizer::Tokenize(std::string_view text) const {
 
 libtextclassifier3::StatusOr<std::vector<Token>> RawQueryTokenizer::TokenizeAll(
     std::string_view text) const {
-  ICING_ASSIGN_OR_RETURN(
-      std::unique_ptr<LanguageSegmenter::Iterator> base_iterator,
-      language_segmenter_.Segment(text));
-  return ProcessTerms(std::move(base_iterator));
+  // 1. Prescan all terms in the text, to determine which ones are potentially
+  // content and which ones are not.
+  std::vector<std::pair<TermType, std::string_view>> prescanned_terms;
+  for (size_t pos = 0; pos < text.length();) {
+    std::pair<TermType, std::string_view> term_pair = GetTerm(text, pos);
+    pos += term_pair.second.length();
+    prescanned_terms.push_back(term_pair);
+  }
+  // 2. Process the prescanned terms, segmenting content terms as needed.
+  return ProcessTerms(&language_segmenter_, std::move(prescanned_terms));
 }
 
 }  // namespace lib
diff --git a/icing/tokenization/raw-query-tokenizer_test.cc b/icing/tokenization/raw-query-tokenizer_test.cc
index 351f7c1..39cc0ed 100644
--- a/icing/tokenization/raw-query-tokenizer_test.cc
+++ b/icing/tokenization/raw-query-tokenizer_test.cc
@@ -16,26 +16,31 @@
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/portable/platform.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/tokenizer-factory.h"
 #include "icing/tokenization/tokenizer.h"
+#include "unicode/uloc.h"
 
 namespace icing {
 namespace lib {
 namespace {
 using ::testing::ElementsAre;
+using ::testing::IsEmpty;
 using ::testing::HasSubstr;
 
 class RawQueryTokenizerTest : public ::testing::Test {
  protected:
   void SetUp() override {
-    ICING_ASSERT_OK(
-        // File generated via icu_data_file rule in //icing/BUILD.
-        icu_data_file_helper::SetUpICUDataFile(
-            GetTestFilePath("icing/icu.dat")));
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
   }
 };
 
@@ -44,106 +49,168 @@ TEST_F(RawQueryTokenizerTest, CreationWithNullPointerShouldFail) {
                   tokenizer_factory::RAW_QUERY, /*lang_segmenter=*/nullptr),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
+TEST_F(RawQueryTokenizerTest, NoTokensBeforeAdvancing) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> raw_query_tokenizer,
+      tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
+                                              language_segmenter.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  ICING_ASSERT_OK_AND_ASSIGN(auto token_iterator,
+                             raw_query_tokenizer->Tokenize(kText));
+
+  // We should get no tokens if we get the token before advancing.
+  EXPECT_THAT(token_iterator->GetTokens(), IsEmpty());
+}
 
 TEST_F(RawQueryTokenizerTest, Simple) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Tokenizer> raw_query_tokenizer,
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("Hello World!"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("Hello World!"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"))));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("hElLo WORLD"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "hElLo"),
+                               EqualsToken(Token::Type::REGULAR, "WORLD"))));
 }
 
-TEST_F(RawQueryTokenizerTest, Parentheses) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+TEST_F(RawQueryTokenizerTest, Emoji) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Tokenizer> raw_query_tokenizer,
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("()"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
-
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("( )"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("😊 Hello! Goodbye?"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "😊"),
+                               EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "Goodbye"))));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 term2)"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("Hello😊 ! Goodbye?"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "😊"),
+                               EqualsToken(Token::Type::REGULAR, "Goodbye"))));
+}
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("((term1 term2) (term3 term4))"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term3"),
-                  EqualsToken(Token::REGULAR, "term4"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
-
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1(term2)"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+TEST_F(RawQueryTokenizerTest, Parentheses) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Tokenizer> raw_query_tokenizer,
+      tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
+                                              language_segmenter.get()));
 
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Token> query_tokens,
+                             raw_query_tokenizer->TokenizeAll("()"));
   EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("(term1)term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                               EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                               EqualsToken(Token::REGULAR, "term2"))));
+      query_tokens,
+      ElementsAre(EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, "")));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)(term2)"),
-              IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+  ICING_ASSERT_OK_AND_ASSIGN(query_tokens,
+                             raw_query_tokenizer->TokenizeAll("( )"));
+  EXPECT_THAT(
+      query_tokens,
+      ElementsAre(EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, "")));
 
+  ICING_ASSERT_OK_AND_ASSIGN(query_tokens,
+                             raw_query_tokenizer->TokenizeAll("(term1 term2)"));
   EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("(term1)-term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                               EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                               EqualsToken(Token::QUERY_EXCLUSION, ""),
-                               EqualsToken(Token::REGULAR, "term2"))));
+      query_tokens,
+      ElementsAre(EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, "")));
 
+  ICING_ASSERT_OK_AND_ASSIGN(
+      query_tokens,
+      raw_query_tokenizer->TokenizeAll("((term1 term2) (term3 term4))"));
   EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("(term1)OR term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                               EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                               EqualsToken(Token::QUERY_OR, ""),
-                               EqualsToken(Token::REGULAR, "term2"))));
+      query_tokens,
+      ElementsAre(EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term3"),
+                  EqualsToken(Token::Type::REGULAR, "term4"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, "")));
+
+  ICING_ASSERT_OK_AND_ASSIGN(query_tokens,
+                             raw_query_tokenizer->TokenizeAll("term1(term2)"));
+  EXPECT_THAT(
+      query_tokens,
+      ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, "")));
+
+  ICING_ASSERT_OK_AND_ASSIGN(query_tokens,
+                             raw_query_tokenizer->TokenizeAll("(term1)term2"));
+  EXPECT_THAT(query_tokens,
+              ElementsAre(EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                          EqualsToken(Token::Type::REGULAR, "term1"),
+                          EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                          EqualsToken(Token::Type::REGULAR, "term2")));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)(term2)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)-term2"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)OR term2"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_OR, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)OR(term2)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_OR, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_OR, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1):term2"),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
@@ -158,52 +225,59 @@ TEST_F(RawQueryTokenizerTest, Parentheses) {
                        HasSubstr("Too many right parentheses")));
 }
 
-TEST_F(RawQueryTokenizerTest, Exclustion) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+TEST_F(RawQueryTokenizerTest, Exclusion) {
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Tokenizer> raw_query_tokenizer,
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
-                                       EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("-term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                               EqualsToken(Token::Type::REGULAR, "term1"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(-term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_EXCLUSION, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Exclusion operator is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("- term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("- term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // Exclusion operator is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1- term2"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
-                                       EqualsToken(Token::REGULAR, "term2"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("term1- term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
+                               EqualsToken(Token::Type::REGULAR, "term2"))));
 
   // Exclusion operator is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 -)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // First exclusion operator is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("--term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
-                                       EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("--term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                               EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // First "-" is exclusion operator, second is not and will be discarded.
   // In other words, exclusion only applies to the term right after it.
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-term1-term2"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
-                                       EqualsToken(Token::REGULAR, "term1"),
-                                       EqualsToken(Token::REGULAR, "term2"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("-term1-term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                               EqualsToken(Token::Type::REGULAR, "term1"),
+                               EqualsToken(Token::Type::REGULAR, "term2"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-(term1)"),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
@@ -226,74 +300,94 @@ TEST_F(RawQueryTokenizerTest, Exclustion) {
 }
 
 TEST_F(RawQueryTokenizerTest, PropertyRestriction) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Tokenizer> raw_query_tokenizer,
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property1:term1"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                               EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:term1"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "term1"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(property1:term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Colon is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll(":term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll(":term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // Colon is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(:term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Colon is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1:"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("term1:"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // property name can be a path
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("email.title:hello"),
-              IsOkAndHolds(
-                  ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "email.title"),
-                              EqualsToken(Token::REGULAR, "hello"))));
-
-  // The first colon ":" triggers property restriction, the second colon is used
-  // as a word connector per ICU's rule
-  // (https://unicode.org/reports/tr29/#Word_Boundaries).
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property:foo:bar"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property"),
-                               EqualsToken(Token::REGULAR, "foo:bar"))));
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "email.title"),
+                  EqualsToken(Token::Type::REGULAR, "hello"))));
+
+  // The first colon ":" triggers property restriction. Pre ICU 72, ':' was
+  // considered a word connector, so the second ':' will be interepreted as a
+  // connector pre-ICU 72. For ICU 72 and above, it's no longer considered a
+  // connector.
+  // TODO(b/254874614): Handle colon word breaks in ICU 72+
+  if (GetIcuTokenizationVersion() >= 72) {
+      EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property:foo:bar"),
+                  IsOkAndHolds(ElementsAre(
+                      EqualsToken(Token::Type::QUERY_PROPERTY, "property"),
+                      EqualsToken(Token::Type::REGULAR, "foo"),
+                      EqualsToken(Token::Type::REGULAR, "bar"))));
+  } else {
+      EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property:foo:bar"),
+                  IsOkAndHolds(ElementsAre(
+                      EqualsToken(Token::Type::QUERY_PROPERTY, "property"),
+                      EqualsToken(Token::Type::REGULAR, "foo:bar"))));
+  }
 
   // Property restriction only applies to the term right after it.
   // Note: "term1:term2" is not a term but 2 terms because word connectors
   // don't apply to numbers and alphabets.
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property1:term1:term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                               EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::REGULAR, "term2"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:term1:term2"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::REGULAR, "term2"))));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property1:term1-"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                               EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:今天:天气"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "今天"),
+                  EqualsToken(Token::Type::REGULAR, "天气"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:term1-"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // Multiple continuous colons will still be recognized as a property
   // restriction operator
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property1::term1"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                               EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1::term1"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "term1"))));
 
   EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("property1:(term1)"),
@@ -314,112 +408,118 @@ TEST_F(RawQueryTokenizerTest, PropertyRestriction) {
 }
 
 TEST_F(RawQueryTokenizerTest, OR) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Tokenizer> raw_query_tokenizer,
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR term2"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
-                                       EqualsToken(Token::QUERY_OR, ""),
-                                       EqualsToken(Token::REGULAR, "term2"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("term1 OR term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
+                               EqualsToken(Token::Type::QUERY_OR, ""),
+                               EqualsToken(Token::Type::REGULAR, "term2"))));
 
   // Two continuous "OR"s are treated as one
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR OR term2"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
-                                       EqualsToken(Token::QUERY_OR, ""),
-                                       EqualsToken(Token::REGULAR, "term2"))));
-
   EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("(term1) OR term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                               EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                               EqualsToken(Token::QUERY_OR, ""),
-                               EqualsToken(Token::REGULAR, "term2"))));
+      raw_query_tokenizer->TokenizeAll("term1 OR OR term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
+                               EqualsToken(Token::Type::QUERY_OR, ""),
+                               EqualsToken(Token::Type::REGULAR, "term2"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1) OR term2"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_OR, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR (term2)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_OR, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_OR, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("((term1) OR (term2))"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_OR, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_OR, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Only "OR" (all in uppercase) is the operator
   EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("term1 or term2 Or term3 oR term4"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::REGULAR, "or"),
-                               EqualsToken(Token::REGULAR, "term2"),
-                               EqualsToken(Token::REGULAR, "Or"),
-                               EqualsToken(Token::REGULAR, "term3"),
-                               EqualsToken(Token::REGULAR, "oR"),
-                               EqualsToken(Token::REGULAR, "term4"))));
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
+                               EqualsToken(Token::Type::REGULAR, "or"),
+                               EqualsToken(Token::Type::REGULAR, "term2"),
+                               EqualsToken(Token::Type::REGULAR, "Or"),
+                               EqualsToken(Token::Type::REGULAR, "term3"),
+                               EqualsToken(Token::Type::REGULAR, "oR"),
+                               EqualsToken(Token::Type::REGULAR, "term4"))));
 
   // "OR" is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("OR term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("OR term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // "OR" is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("term1 OR"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(OR term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("( OR term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 OR)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 OR )"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("( OR )"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR(term2)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_OR, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_OR, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("term1 OR-term2"),
@@ -435,34 +535,56 @@ TEST_F(RawQueryTokenizerTest, OR) {
 // CJKT are treated the same way by language segmenter and raw tokenizer, so
 // here we test Chinese and Japanese to represent CJKT.
 TEST_F(RawQueryTokenizerTest, CJKT) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Tokenizer> raw_query_tokenizer,
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
   // Exclusion only applies to the term right after it.
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-今天天气很好"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
-                                       EqualsToken(Token::REGULAR, "今天"),
-                                       EqualsToken(Token::REGULAR, "天气"),
-                                       EqualsToken(Token::REGULAR, "很好"))));
+  if (IsCfStringTokenization()) {
+    EXPECT_THAT(
+        raw_query_tokenizer->TokenizeAll("-今天天气很好"),
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                                 EqualsToken(Token::Type::REGULAR, "今天"),
+                                 EqualsToken(Token::Type::REGULAR, "天气"),
+                                 EqualsToken(Token::Type::REGULAR, "很"),
+                                 EqualsToken(Token::Type::REGULAR, "好"))));
+  } else {
+    EXPECT_THAT(
+        raw_query_tokenizer->TokenizeAll("-今天天气很好"),
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                                 EqualsToken(Token::Type::REGULAR, "今天"),
+                                 EqualsToken(Token::Type::REGULAR, "天气"),
+                                 EqualsToken(Token::Type::REGULAR, "很好"))));
+  }
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property1:你好"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                               EqualsToken(Token::REGULAR, "你好"))));
+  if (IsCfStringTokenization()) {
+    EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:你好"),
+                IsOkAndHolds(ElementsAre(
+                    EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                    EqualsToken(Token::Type::REGULAR, "你"),
+                    EqualsToken(Token::Type::REGULAR, "好"))));
+  } else {
+    EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:你好"),
+                IsOkAndHolds(ElementsAre(
+                    EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                    EqualsToken(Token::Type::REGULAR, "你好"))));
+  }
 
   EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("标题:你好"),
       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
                HasSubstr("Characters in property name must all be ASCII")));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("cat OR ねこ"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "cat"),
-                                       EqualsToken(Token::QUERY_OR, ""),
-                                       EqualsToken(Token::REGULAR, "ねこ"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("cat OR ねこ"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "cat"),
+                               EqualsToken(Token::Type::QUERY_OR, ""),
+                               EqualsToken(Token::Type::REGULAR, "ねこ"))));
 
   EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("cat ORねこ"),
@@ -488,73 +610,104 @@ TEST_F(RawQueryTokenizerTest, CJKT) {
 // Raw tokenizer identifies all characters that it doesn't know as OTHER type,
 // so we can choose comma "," to represent all OTHER characters.
 TEST_F(RawQueryTokenizerTest, OtherChars) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Tokenizer> raw_query_tokenizer,
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
   // Comma is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll(",term1, ,"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll(",term1, ,"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(,term1),"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Exclusion operator and comma are ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-,term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("-,term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-term1,"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
-                                       EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("-term1,"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                               EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // Colon and comma are ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:,term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "property1"),
-                                       EqualsToken(Token::REGULAR, "term1"))));
-
   EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property1:term1,term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                               EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::REGULAR, "term2"))));
+      raw_query_tokenizer->TokenizeAll("property1:,term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "property1"),
+                               EqualsToken(Token::Type::REGULAR, "term1"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:term1,term2"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::REGULAR, "term2"))));
 
   // This is a special case for OR, unknown chars are treated the same as
   // whitespaces before and after OR.
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1,OR,term2"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
-                                       EqualsToken(Token::QUERY_OR, ""),
-                                       EqualsToken(Token::REGULAR, "term2"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("term1,OR,term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
+                               EqualsToken(Token::Type::QUERY_OR, ""),
+                               EqualsToken(Token::Type::REGULAR, "term2"))));
 }
 
 TEST_F(RawQueryTokenizerTest, Mix) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
+  language_segmenter_factory::SegmenterOptions options(ULOC_US);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(std::move(options)));
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Tokenizer> raw_query_tokenizer,
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll(
-          "こんにちはgood afternoon, title:今天 OR (ในวันนี้ -B12)"),
-      IsOkAndHolds(ElementsAre(
-          EqualsToken(Token::REGULAR, "こんにちは"),
-          EqualsToken(Token::REGULAR, "good"),
-          EqualsToken(Token::REGULAR, "afternoon"),
-          EqualsToken(Token::QUERY_PROPERTY, "title"),
-          EqualsToken(Token::REGULAR, "今天"), EqualsToken(Token::QUERY_OR, ""),
-          EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-          EqualsToken(Token::REGULAR, "ใน"), EqualsToken(Token::REGULAR, "วัน"),
-          EqualsToken(Token::REGULAR, "นี้"),
-          EqualsToken(Token::QUERY_EXCLUSION, ""),
-          EqualsToken(Token::REGULAR, "B12"),
-          EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+  if (IsCfStringTokenization()) {
+    EXPECT_THAT(raw_query_tokenizer->TokenizeAll(
+                    "こんにちはgood afternoon, title:今天 OR (ในวันนี้ -B12)"),
+                IsOkAndHolds(ElementsAre(
+                    EqualsToken(Token::Type::REGULAR, "こんにちは"),
+                    EqualsToken(Token::Type::REGULAR, "good"),
+                    EqualsToken(Token::Type::REGULAR, "afternoon"),
+                    EqualsToken(Token::Type::QUERY_PROPERTY, "title"),
+                    EqualsToken(Token::Type::REGULAR, "今天"),
+                    EqualsToken(Token::Type::QUERY_OR, ""),
+                    EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                    EqualsToken(Token::Type::REGULAR, "ใน"),
+                    EqualsToken(Token::Type::REGULAR, "วันนี้"),
+                    EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                    EqualsToken(Token::Type::REGULAR, "B12"),
+                    EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+  } else {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::vector<Token> tokens,
+        raw_query_tokenizer->TokenizeAll(
+            "こんにちはgood afternoon, title:今天 OR (ในวันนี้ -B12)"));
+    EXPECT_THAT(
+        tokens,
+        ElementsAre(EqualsToken(Token::Type::REGULAR, "こんにちは"),
+                    EqualsToken(Token::Type::REGULAR, "good"),
+                    EqualsToken(Token::Type::REGULAR, "afternoon"),
+                    EqualsToken(Token::Type::QUERY_PROPERTY, "title"),
+                    EqualsToken(Token::Type::REGULAR, "今天"),
+                    EqualsToken(Token::Type::QUERY_OR, ""),
+                    EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                    EqualsToken(Token::Type::REGULAR, "ใน"),
+                    EqualsToken(Token::Type::REGULAR, "วัน"),
+                    EqualsToken(Token::Type::REGULAR, "นี้"),
+                    EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                    EqualsToken(Token::Type::REGULAR, "B12"),
+                    EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, "")));
+  }
 }
 
 }  // namespace
diff --git a/icing/jni/reverse-jni-break-iterator.cc b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc
index 1a8a799..dbd7f5a 100644
--- a/icing/jni/reverse-jni-break-iterator.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc
@@ -12,20 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "icing/jni/reverse-jni-break-iterator.h"
+#include "icing/tokenization/reverse_jni/reverse-jni-break-iterator.h"
 
 #include <jni.h>
-#include <math.h>
 
 #include <cassert>
 #include <cctype>
+#include <cmath>
 #include <map>
 
-#include "icing/jni/jni-cache.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/text_classifier/lib3/utils/java/jni-base.h"
 #include "icing/text_classifier/lib3/utils/java/jni-helper.h"
 #include "icing/absl_ports/canonical_errors.h"
+#include "icing/jni/jni-cache.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
diff --git a/icing/jni/reverse-jni-break-iterator.h b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.h
index c1f05f4..537666c 100644
--- a/icing/jni/reverse-jni-break-iterator.h
+++ b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.h
@@ -12,16 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef ICING_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
-#define ICING_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
+#ifndef ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
+#define ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
 
 #include <jni.h>
 
 #include <queue>
 #include <string>
 
-#include "icing/jni/jni-cache.h"
 #include "icing/text_classifier/lib3/utils/java/jni-base.h"
+#include "icing/jni/jni-cache.h"
 
 namespace icing {
 namespace lib {
@@ -121,4 +121,4 @@ class ReverseJniBreakIterator {
 }  // namespace lib
 }  // namespace icing
 
-#endif  // ICING_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
+#endif  // ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-factory.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-factory.cc
index f79bc68..a251f90 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-factory.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-factory.cc
@@ -13,9 +13,11 @@
 // limitations under the License.
 
 #include "icing/absl_ports/canonical_errors.h"
+#include "icing/jni/jni-cache.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h"
 #include "icing/util/logging.h"
+#include "unicode/uloc.h"
 
 namespace icing {
 namespace lib {
@@ -32,7 +34,7 @@ constexpr std::string_view kLocaleAmericanEnglishComputer = "en_US_POSIX";
 //   A LanguageSegmenter on success
 //   INVALID_ARGUMENT if locale string is invalid
 //
-// TODO(samzheng): Figure out if we want to verify locale strings and notify
+// TODO(b/156383798): Figure out if we want to verify locale strings and notify
 // users. Right now illegal locale strings will be ignored by ICU. ICU
 // components will be created with its default locale.
 libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create(
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni-layer.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni-layer.cc
new file mode 100644
index 0000000..5f5202c
--- /dev/null
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni-layer.cc
@@ -0,0 +1,37 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <jni.h>
+
+#include "gtest/gtest.h"
+#include "icing/testing/logging-event-listener.h"
+
+// Global variable used so that the test implementation can access the JNIEnv.
+JNIEnv* g_jenv = nullptr;
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_icing_jni_ReverseJniLanguageSegmenterJniTest_testsMain(JNIEnv* env,
+                                                            jclass ignored) {
+  g_jenv = env;
+
+  std::vector<char*> my_argv;
+  char arg[] = "jni-test-lib";
+  my_argv.push_back(arg);
+  int argc = 1;
+  char** argv = &(my_argv[0]);
+  testing::InitGoogleTest(&argc, argv);
+  testing::UnitTest::GetInstance()->listeners().Append(
+      new icing::lib::LoggingEventListener());
+  return RUN_ALL_TESTS() == 0;
+}
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.h b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.h
deleted file mode 100644
index 64b68ec..0000000
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_LANGUAGE_SEGMENTER_TEST_H_
-#define ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_LANGUAGE_SEGMENTER_TEST_H_
-
-#include <jni.h>
-
-#include "icing/jni/jni-cache.h"
-#include "gtest/gtest.h"
-
-extern JNIEnv* g_jenv;
-
-namespace icing {
-namespace lib {
-
-namespace test_internal {
-
-class ReverseJniLanguageSegmenterTest
-    : public testing::TestWithParam<const char*> {
- protected:
-  ReverseJniLanguageSegmenterTest()
-      : jni_cache_(std::move(JniCache::Create(g_jenv)).ValueOrDie()) {}
-
-  static std::string GetLocale() { return GetParam(); }
-
-  std::unique_ptr<JniCache> jni_cache_;
-};
-
-}  // namespace test_internal
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_LANGUAGE_SEGMENTER_TEST_H_
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
index 2256022..bd80718 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
@@ -19,169 +19,18 @@
 #include <string>
 #include <string_view>
 
-#include "icing/jni/reverse-jni-break-iterator.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/reverse_jni/reverse-jni-break-iterator.h"
+#include "icing/util/character-iterator.h"
 #include "icing/util/i18n-utils.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
 namespace lib {
 
-namespace {
-
-// Returns the lead byte of the UTF-8 character that includes the byte at
-// current_byte_index within it.
-int GetUTF8StartPosition(std::string_view text, int current_byte_index) {
-  while (!i18n_utils::IsLeadUtf8Byte(text[current_byte_index])) {
-    --current_byte_index;
-  }
-  return current_byte_index;
-}
-
-class CharacterIterator {
- public:
-  explicit CharacterIterator(std::string_view text)
-      : CharacterIterator(text, 0, 0) {}
-  CharacterIterator(std::string_view text, int utf8_index, int utf16_index)
-      : text_(text), utf8_index_(utf8_index), utf16_index_(utf16_index) {}
-
-  // Moves from current position to the character that includes the specified
-  // UTF-8 index.
-  // REQUIRES: desired_utf8_index <= text_.length()
-  // desired_utf8_index is allowed to point one index past the end, but no
-  // further.
-  bool AdvanceToUtf8(int desired_utf8_index) {
-    if (desired_utf8_index > text_.length()) {
-      // Enforce the requirement.
-      return false;
-    }
-    // Need to work forwards.
-    while (utf8_index_ < desired_utf8_index) {
-      UChar32 uchar32 =
-          i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
-      if (uchar32 == i18n_utils::kInvalidUChar32) {
-        // Unable to retrieve a valid UTF-32 character at the previous position.
-        return false;
-      }
-      int utf8_length = i18n_utils::GetUtf8Length(uchar32);
-      if (utf8_index_ + utf8_length > desired_utf8_index) {
-        // Ah! Don't go too far!
-        break;
-      }
-      utf8_index_ += utf8_length;
-      utf16_index_ += i18n_utils::GetUtf16Length(uchar32);
-    }
-    return true;
-  }
-
-  // Moves from current position to the character that includes the specified
-  // UTF-8 index.
-  // REQUIRES: 0 <= desired_utf8_index
-  bool RewindToUtf8(int desired_utf8_index) {
-    if (desired_utf8_index < 0) {
-      // Enforce the requirement.
-      return false;
-    }
-    // Need to work backwards.
-    while (utf8_index_ > desired_utf8_index) {
-      --utf8_index_;
-      utf8_index_ = GetUTF8StartPosition(text_, utf8_index_);
-      if (utf8_index_ < 0) {
-        // Somehow, there wasn't a single UTF-8 lead byte at
-        // requested_byte_index or an earlier byte.
-        return false;
-      }
-      // We've found the start of a unicode char!
-      UChar32 uchar32 =
-          i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
-      if (uchar32 == i18n_utils::kInvalidUChar32) {
-        // Unable to retrieve a valid UTF-32 character at the previous position.
-        return false;
-      }
-      utf16_index_ -= i18n_utils::GetUtf16Length(uchar32);
-    }
-    return true;
-  }
-
-  // Advances current position to desired_utf16_index.
-  // REQUIRES: desired_utf16_index <= text_.utf16_length()
-  // desired_utf16_index is allowed to point one index past the end, but no
-  // further.
-  bool AdvanceToUtf16(int desired_utf16_index) {
-    while (utf16_index_ < desired_utf16_index) {
-      UChar32 uchar32 =
-          i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
-      if (uchar32 == i18n_utils::kInvalidUChar32) {
-        // Unable to retrieve a valid UTF-32 character at the previous position.
-        return false;
-      }
-      int utf16_length = i18n_utils::GetUtf16Length(uchar32);
-      if (utf16_index_ + utf16_length > desired_utf16_index) {
-        // Ah! Don't go too far!
-        break;
-      }
-      int utf8_length = i18n_utils::GetUtf8Length(uchar32);
-      if (utf8_index_ + utf8_length > text_.length()) {
-        // Enforce the requirement.
-        return false;
-      }
-      utf8_index_ += utf8_length;
-      utf16_index_ += utf16_length;
-    }
-    return true;
-  }
-
-  // Rewinds current position to desired_utf16_index.
-  // REQUIRES: 0 <= desired_utf16_index
-  bool RewindToUtf16(int desired_utf16_index) {
-    if (desired_utf16_index < 0) {
-      return false;
-    }
-    while (utf16_index_ > desired_utf16_index) {
-      --utf8_index_;
-      utf8_index_ = GetUTF8StartPosition(text_, utf8_index_);
-      // We've found the start of a unicode char!
-      UChar32 uchar32 =
-          i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
-      if (uchar32 == i18n_utils::kInvalidUChar32) {
-        // Unable to retrieve a valid UTF-32 character at the previous position.
-        return false;
-      }
-      utf16_index_ -= i18n_utils::GetUtf16Length(uchar32);
-    }
-    return true;
-  }
-
-  bool IsValidCharacter() const {
-    // Rule 1: all ASCII terms will be returned.
-    // We know it's a ASCII term by checking the first char.
-    if (i18n_utils::IsAscii(text_[utf8_index_])) {
-      return true;
-    }
-
-    // Rule 2: for non-ASCII terms, only the alphabetic terms are returned.
-    // We know it's an alphabetic term by checking the first unicode character.
-    if (i18n_utils::IsAlphabeticAt(text_, utf8_index_)) {
-      return true;
-    }
-
-    return false;
-  }
-
-  int utf8_index() const { return utf8_index_; }
-  int utf16_index() const { return utf16_index_; }
-
- private:
-  std::string_view text_;
-  int utf8_index_;
-  int utf16_index_;
-};
-
-}  // namespace
-
 class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
  public:
   explicit ReverseJniLanguageSegmenterIterator(
@@ -195,16 +44,16 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   // Advances to the next term. Returns false if it has reached the end.
   bool Advance() override {
     // Prerequisite check
-    if (term_end_exclusive_.utf16_index() == ReverseJniBreakIterator::kDone) {
+    if (IsDone()) {
       return false;
     }
 
     if (term_end_exclusive_.utf16_index() == 0) {
       int first = break_iterator_->First();
-      if (!term_start_.AdvanceToUtf16(first)) {
-        // First is guaranteed to succeed and return a position within bonds. So
-        // the only possible failure could be an invalid sequence. Mark as DONE
-        // and return.
+      if (!term_start_.MoveToUtf16(first)) {
+        // First is guaranteed to succeed and return a position within bonds.
+        // So the only possible failure could be an invalid sequence. Mark as
+        // DONE and return.
         MarkAsDone();
         return false;
       }
@@ -218,7 +67,7 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
       MarkAsDone();
       return false;
     }
-    if (!term_end_exclusive_.AdvanceToUtf16(next_utf16_index_exclusive)) {
+    if (!term_end_exclusive_.MoveToUtf16(next_utf16_index_exclusive)) {
       // next_utf16_index_exclusive is guaranteed to be within bonds thanks to
       // the check for kDone above. So the only possible failure could be an
       // invalid sequence. Mark as DONE and return.
@@ -226,18 +75,15 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
       return false;
     }
 
-    // Check if the current term is valid. We consider any term valid if its
-    // first character is valid. If it's not valid, then we need to advance to
-    // the next term.
-    if (term_start_.IsValidCharacter()) {
-      return true;
-    }
-    return Advance();
+    return true;
   }
 
   // Returns the current term. It can be called only when Advance() returns
   // true.
   std::string_view GetTerm() const override {
+    if (IsDone()) {
+      return text_.substr(0, 0);
+    }
     int term_length =
         term_end_exclusive_.utf8_index() - term_start_.utf8_index();
     if (term_length > 0 && std::isspace(text_[term_start_.utf8_index()])) {
@@ -247,6 +93,16 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
     return text_.substr(term_start_.utf8_index(), term_length);
   }
 
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTermStart()
+      override {
+    return term_start_;
+  }
+
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTermEndExclusive()
+      override {
+    return term_end_exclusive_;
+  }
+
   // Resets the iterator to point to the first term that starts after offset.
   // GetTerm will now return that term.
   //
@@ -258,15 +114,14 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   //   INVALID_ARGUMENT if offset is out of bounds for the provided text.
   //   ABORTED if an invalid unicode character is encountered while
   //   traversing the text.
-  libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfter(
+  libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfterUtf32(
       int32_t offset) override {
-    if (offset < 0 || offset >= text_.length()) {
-      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-          "Illegal offset provided! Offset %d is not within bounds of string "
-          "of length %zu",
-          offset, text_.length()));
+    if (offset < 0) {
+      // Very simple. The first term start after a negative offset is the first
+      // term. So just reset to start.
+      return ResetToStartUtf32();
     }
-    if (term_end_exclusive_.utf16_index() == ReverseJniBreakIterator::kDone) {
+    if (IsDone()) {
       // We're done. Need to start from the beginning if we're going to reset
       // properly.
       term_start_ = CharacterIterator(text_);
@@ -274,43 +129,48 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
     }
 
     // 1. Find the unicode character that contains the byte at offset.
-    CharacterIterator offset_iterator = term_end_exclusive_;
-    bool success = (offset > offset_iterator.utf8_index())
-                       ? offset_iterator.AdvanceToUtf8(offset)
-                       : offset_iterator.RewindToUtf8(offset);
-    if (!success) {
-      // Offset is guaranteed to be within bounds thanks to the check above. So
-      // the only possible failure could be an invalid sequence. Mark as DONE
-      // and return.
-      MarkAsDone();
-      return absl_ports::AbortedError("Encountered invalid UTF sequence!");
+    CharacterIterator offset_iterator = (offset < term_start_.utf32_index())
+                                            ? term_start_
+                                            : term_end_exclusive_;
+    if (!offset_iterator.MoveToUtf32(offset)) {
+      if (offset_iterator.utf8_index() != text_.length()) {
+        // We returned false for some reason other than hitting the end. This is
+        // a real error. Just return.
+        MarkAsDone();
+        return absl_ports::AbortedError(
+            "Could not retrieve valid utf8 character!");
+      }
+    }
+    // Check to see if offset is past the end of the text. If it is, then
+    // there's no term starting after it. Return an invalid argument.
+    if (offset_iterator.utf8_index() == text_.length()) {
+      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+          "Illegal offset provided! Offset utf-32:%d, utf-8:%d is not within "
+          "bounds of string of length %zu",
+          offset_iterator.utf32_index(), offset_iterator.utf8_index(),
+          text_.length()));
     }
 
     // 2. We've got the unicode character containing byte offset. Now, we need
     // to point to the segment that starts after this character.
     int following_utf16_index =
         break_iterator_->Following(offset_iterator.utf16_index());
-    if (following_utf16_index == ReverseJniBreakIterator::kDone) {
+    if (following_utf16_index == ReverseJniBreakIterator::kDone ||
+        !offset_iterator.MoveToUtf16(following_utf16_index)) {
       MarkAsDone();
       return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
           "No segments begin after provided offset %d.", offset));
     }
-    if (!offset_iterator.AdvanceToUtf16(following_utf16_index)) {
-      // following_utf16_index is guaranteed to be within bonds thanks to the
-      // check for kDone above. So the only possible failure could be an invalid
-      // sequence. Mark as DONE and return.
-      MarkAsDone();
-      return absl_ports::AbortedError("Encountered invalid UTF sequence!");
-    }
     term_end_exclusive_ = offset_iterator;
 
-    // 3. The term_end_exclusive_ points to the term that we want to return. We
-    // need to Advance so that term_start_ will now point to this term.
+    // 3. The term_end_exclusive_ points to the start of the term that we want
+    // to return. We need to Advance so that term_start_ will now point to this
+    // term.
     if (!Advance()) {
       return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
           "No segments begin after provided offset %d.", offset));
     }
-    return term_start_.utf8_index();
+    return term_start_.utf32_index();
   }
 
   // Resets the iterator to point to the first term that ends before offset.
@@ -324,52 +184,48 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   //   INVALID_ARGUMENT if offset is out of bounds for the provided text.
   //   ABORTED if an invalid unicode character is encountered while
   //   traversing the text.
-  libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBefore(
+  libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBeforeUtf32(
       int32_t offset) override {
-    if (offset < 0 || offset >= text_.length()) {
+    if (offset < 0) {
       return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
           "Illegal offset provided! Offset %d is not within bounds of string "
           "of length %zu",
           offset, text_.length()));
     }
-    if (term_end_exclusive_.utf16_index() == ReverseJniBreakIterator::kDone) {
+    if (IsDone()) {
       // We're done. Need to start from the beginning if we're going to reset
       // properly.
       term_start_ = CharacterIterator(text_);
       term_end_exclusive_ = CharacterIterator(text_);
     }
 
-    // 1. Find the unicode character that contains the byte at offset.
-    CharacterIterator offset_iterator = term_end_exclusive_;
-    bool success = (offset > offset_iterator.utf8_index())
-                       ? offset_iterator.AdvanceToUtf8(offset)
-                       : offset_iterator.RewindToUtf8(offset);
-    if (!success) {
-      // Offset is guaranteed to be within bounds thanks to the check above. So
-      // the only possible failure could be an invalid sequence. Mark as DONE
-      // and return.
-      MarkAsDone();
-      return absl_ports::AbortedError(
-          "Could not retrieve valid utf8 character!");
+    CharacterIterator offset_iterator = (offset < term_start_.utf32_index())
+                                            ? term_start_
+                                            : term_end_exclusive_;
+    if (!offset_iterator.MoveToUtf32(offset)) {
+      // An error occurred. Mark as DONE
+      if (offset_iterator.utf8_index() != text_.length()) {
+        // We returned false for some reason other than hitting the end. This is
+        // a real error. Just return.
+        MarkAsDone();
+        return absl_ports::AbortedError(
+            "Could not retrieve valid utf8 character!");
+      }
+      // If it returned false because we hit the end. Then that's fine. We'll
+      // just treat it as if the request was for the end.
     }
 
     // 2. We've got the unicode character containing byte offset. Now, we need
-    // to point to the segment that starts before this character.
+    // to point to the segment that ends before this character.
     int starting_utf16_index =
         break_iterator_->Preceding(offset_iterator.utf16_index());
-    if (starting_utf16_index == ReverseJniBreakIterator::kDone) {
+    if (starting_utf16_index == ReverseJniBreakIterator::kDone ||
+        !offset_iterator.MoveToUtf16(starting_utf16_index)) {
       // Rewind the end indices.
       MarkAsDone();
       return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
           "No segments end before provided offset %d.", offset));
     }
-    if (!offset_iterator.RewindToUtf16(starting_utf16_index)) {
-      // starting_utf16_index is guaranteed to be within bonds thanks to the
-      // check for kDone above. So the only possible failure could be an invalid
-      // sequence. Mark as DONE and return.
-      MarkAsDone();
-      return absl_ports::AbortedError("Encountered invalid UTF sequence!");
-    }
     term_start_ = offset_iterator;
 
     // 3. We've correctly set the start index and the iterator currently points
@@ -377,25 +233,25 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
     // advance the iterator to that position.
     int end_utf16_index = break_iterator_->Next();
     term_end_exclusive_ = term_start_;
-    term_end_exclusive_.AdvanceToUtf16(end_utf16_index);
+    term_end_exclusive_.MoveToUtf16(end_utf16_index);
 
     // 4. The start and end indices point to a segment, but we need to ensure
     // that this segment is 1) valid and 2) ends before offset. Otherwise, we'll
     // need a segment prior to this one.
-    if (term_end_exclusive_.utf8_index() > offset ||
-        !term_start_.IsValidCharacter()) {
-      return ResetToTermEndingBefore(term_start_.utf8_index());
+    if (term_end_exclusive_.utf32_index() > offset) {
+      return ResetToTermEndingBeforeUtf32(term_start_.utf32_index());
     }
-    return term_start_.utf8_index();
+    return term_start_.utf32_index();
   }
 
-  libtextclassifier3::StatusOr<int32_t> ResetToStart() override {
+  libtextclassifier3::StatusOr<int32_t> ResetToStartUtf32() override {
     term_start_ = CharacterIterator(text_);
     term_end_exclusive_ = CharacterIterator(text_);
     if (!Advance()) {
-      return absl_ports::NotFoundError("");
+      return absl_ports::NotFoundError(
+          "Unable to find any valid terms in text.");
     }
-    return term_start_.utf8_index();
+    return term_start_.utf32_index();
   }
 
  private:
@@ -407,11 +263,19 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   // break_iterator_ may be in any state.
   void MarkAsDone() {
     term_start_ =
-        CharacterIterator(text_, /*utf8_index=*/0,
-                          /*utf16_index=*/ReverseJniBreakIterator::kDone);
+        CharacterIterator(text_, /*utf8_index=*/ReverseJniBreakIterator::kDone,
+                          /*utf16_index=*/ReverseJniBreakIterator::kDone,
+                          /*utf32_index=*/ReverseJniBreakIterator::kDone);
     term_end_exclusive_ =
-        CharacterIterator(text_, /*utf8_index=*/0,
-                          /*utf16_index=*/ReverseJniBreakIterator::kDone);
+        CharacterIterator(text_, /*utf8_index=*/ReverseJniBreakIterator::kDone,
+                          /*utf16_index=*/ReverseJniBreakIterator::kDone,
+                          /*utf32_index=*/ReverseJniBreakIterator::kDone);
+  }
+  bool IsDone() const {
+    // We could just as easily check the other utf indices or the values in
+    // term_start_ to check for done. There's no particular reason to choose any
+    // one since they should all hold kDone.
+    return term_end_exclusive_.utf16_index() == ReverseJniBreakIterator::kDone;
   }
 
   // All of ReverseJniBreakIterator's functions return UTF-16 boundaries. So
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h
index f06dac9..29df4ee 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h
@@ -21,8 +21,8 @@
 #include <string_view>
 #include <vector>
 
-#include "icing/jni/jni-cache.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/jni/jni-cache.h"
 #include "icing/tokenization/language-segmenter.h"
 
 namespace icing {
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
index a01d944..47a01fe 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.h"
+#include <jni.h>
 
 #include <memory>
 #include <string_view>
@@ -21,10 +21,13 @@
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "gmock/gmock.h"
 #include "icing/absl_ports/str_cat.h"
+#include "icing/jni/jni-cache.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/icu-i18n-test-utils.h"
+#include "icing/testing/jni-test-helpers.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
+#include "icing/util/character-iterator.h"
 #include "unicode/uloc.h"
 
 namespace icing {
@@ -54,72 +57,72 @@ std::vector<std::string_view> GetAllTermsAdvance(
 }
 
 // Returns a vector containing all terms retrieved by calling ResetAfter with
-// the current position to simulate Advancing on the iterator.
-std::vector<std::string_view> GetAllTermsResetAfter(
+// the UTF-32 position of the current term start to simulate Advancing on the
+// iterator.
+std::vector<std::string_view> GetAllTermsResetAfterUtf32(
     LanguageSegmenter::Iterator* itr) {
   std::vector<std::string_view> terms;
-  if (!itr->ResetToStart().ok()) {
-    return terms;
-  }
-  terms.push_back(itr->GetTerm());
-  const char* text_begin = itr->GetTerm().data();
-  // Calling ResetToTermStartingAfter with the current position should get the
-  // very next term in the sequence.
-  for (int current_pos = 0; itr->ResetToTermStartingAfter(current_pos).ok();
-       current_pos = itr->GetTerm().data() - text_begin) {
+  // Calling ResetToTermStartingAfterUtf32 with -1 should get the first term in
+  // the sequence.
+  bool is_ok = itr->ResetToTermStartingAfterUtf32(-1).ok();
+  while (is_ok) {
     terms.push_back(itr->GetTerm());
+    // Calling ResetToTermStartingAfterUtf32 with the current position should
+    // get the very next term in the sequence.
+    CharacterIterator char_itr = itr->CalculateTermStart().ValueOrDie();
+    is_ok = itr->ResetToTermStartingAfterUtf32(char_itr.utf32_index()).ok();
   }
   return terms;
 }
 
 // Returns a vector containing all terms retrieved by alternating calls to
-// Advance and calls to ResetAfter with the current position to simulate
-// Advancing.
-std::vector<std::string_view> GetAllTermsAdvanceAndResetAfter(
+// Advance and calls to ResetAfter with the UTF-32 position of the current term
+// start to simulate Advancing.
+std::vector<std::string_view> GetAllTermsAdvanceAndResetAfterUtf32(
     LanguageSegmenter::Iterator* itr) {
-  const char* text_begin = itr->GetTerm().data();
   std::vector<std::string_view> terms;
-
-  bool is_ok = true;
-  int current_pos = 0;
+  bool is_ok = itr->Advance();
   while (is_ok) {
+    terms.push_back(itr->GetTerm());
     // Alternate between using Advance and ResetToTermAfter.
     if (terms.size() % 2 == 0) {
       is_ok = itr->Advance();
     } else {
-      // Calling ResetToTermStartingAfter with the current position should get
-      // the very next term in the sequence.
-      current_pos = itr->GetTerm().data() - text_begin;
-      is_ok = itr->ResetToTermStartingAfter(current_pos).ok();
-    }
-    if (is_ok) {
-      terms.push_back(itr->GetTerm());
+      // Calling ResetToTermStartingAfterUtf32 with the current position should
+      // get the very next term in the sequence.
+      CharacterIterator char_itr = itr->CalculateTermStart().ValueOrDie();
+      is_ok = itr->ResetToTermStartingAfterUtf32(char_itr.utf32_index()).ok();
     }
   }
   return terms;
 }
 
 // Returns a vector containing all terms retrieved by calling ResetBefore with
-// the current position, starting at the end of the text. This vector should be
-// in reverse order of GetAllTerms and missing the last term.
-std::vector<std::string_view> GetAllTermsResetBefore(
+// the UTF-32 position of the current term start, starting at the end of the
+// text. This vector should be in reverse order of GetAllTerms and missing the
+// last term.
+std::vector<std::string_view> GetAllTermsResetBeforeUtf32(
     LanguageSegmenter::Iterator* itr) {
-  const char* text_begin = itr->GetTerm().data();
-  int last_pos = 0;
-  while (itr->Advance()) {
-    last_pos = itr->GetTerm().data() - text_begin;
-  }
   std::vector<std::string_view> terms;
-  // Calling ResetToTermEndingBefore with the current position should get the
-  // previous term in the sequence.
-  for (int current_pos = last_pos;
-       itr->ResetToTermEndingBefore(current_pos).ok();
-       current_pos = itr->GetTerm().data() - text_begin) {
+  bool is_ok = itr->ResetToTermEndingBeforeUtf32(1000).ok();
+  while (is_ok) {
     terms.push_back(itr->GetTerm());
+    // Calling ResetToTermEndingBeforeUtf32 with the current position should get
+    // the previous term in the sequence.
+    CharacterIterator char_itr = itr->CalculateTermStart().ValueOrDie();
+    is_ok = itr->ResetToTermEndingBeforeUtf32(char_itr.utf32_index()).ok();
   }
   return terms;
 }
 
+class ReverseJniLanguageSegmenterTest
+    : public testing::TestWithParam<const char*> {
+ protected:
+  static std::string GetLocale() { return GetParam(); }
+
+  std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
+};
+
 }  // namespace
 
 TEST_P(ReverseJniLanguageSegmenterTest, EmptyText) {
@@ -182,7 +185,7 @@ TEST_P(ReverseJniLanguageSegmenterTest, Non_ASCII_Non_Alphabetic) {
   // Full-width (non-ASCII) punctuation marks and special characters are left
   // out.
   EXPECT_THAT(language_segmenter->GetAllTerms("。？·Hello！×"),
-              IsOkAndHolds(ElementsAre("Hello")));
+              IsOkAndHolds(ElementsAre("。", "？", "·", "Hello", "！", "×")));
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest, Acronym) {
@@ -225,6 +228,36 @@ TEST_P(ReverseJniLanguageSegmenterTest, WordConnector) {
   EXPECT_THAT(language_segmenter->GetAllTerms("com.google.android:icing"),
               IsOkAndHolds(ElementsAre("com.google.android:icing")));
 
+  // Connectors that don't have valid terms on both sides of it are not
+  // considered connectors.
+  EXPECT_THAT(language_segmenter->GetAllTerms(":bar:baz"),
+              IsOkAndHolds(ElementsAre(":", "bar:baz")));
+
+  EXPECT_THAT(language_segmenter->GetAllTerms("bar:baz:"),
+              IsOkAndHolds(ElementsAre("bar:baz", ":")));
+
+  // Connectors that don't have valid terms on both sides of it are not
+  // considered connectors.
+  EXPECT_THAT(language_segmenter->GetAllTerms(" :bar:baz"),
+              IsOkAndHolds(ElementsAre(" ", ":", "bar:baz")));
+
+  EXPECT_THAT(language_segmenter->GetAllTerms("bar:baz: "),
+              IsOkAndHolds(ElementsAre("bar:baz", ":", " ")));
+
+  // Connectors don't connect if one side is an invalid term (？)
+  EXPECT_THAT(language_segmenter->GetAllTerms("bar:baz:？"),
+              IsOkAndHolds(ElementsAre("bar:baz", ":", "？")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("？:bar:baz"),
+              IsOkAndHolds(ElementsAre("？", ":", "bar:baz")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("3:14"),
+              IsOkAndHolds(ElementsAre("3", ":", "14")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("私:は"),
+              IsOkAndHolds(ElementsAre("私", ":", "は")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("我:每"),
+              IsOkAndHolds(ElementsAre("我", ":", "每")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("เดิน:ไป"),
+              IsOkAndHolds(ElementsAre("เดิน:ไป")));
+
   // Any heading and trailing characters are not connecters
   EXPECT_THAT(language_segmenter->GetAllTerms(".com.google.android."),
               IsOkAndHolds(ElementsAre(".", "com.google.android", ".")));
@@ -333,6 +366,17 @@ TEST_P(ReverseJniLanguageSegmenterTest, Number) {
               IsOkAndHolds(ElementsAre("-", "123")));
 }
 
+TEST_P(ReverseJniLanguageSegmenterTest, FullWidthNumbers) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+
+  EXPECT_THAT(language_segmenter->GetAllTerms("０１２３４５６７８９"),
+              IsOkAndHolds(ElementsAre("０", "１", "２", "３", "４", "５", "６",
+                                       "７", "８", "９")));
+}
+
 TEST_P(ReverseJniLanguageSegmenterTest, ContinuousWhitespaces) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
@@ -369,15 +413,16 @@ TEST_P(ReverseJniLanguageSegmenterTest, CJKT) {
   // have whitespaces as word delimiter.
 
   // Chinese
-  EXPECT_THAT(language_segmenter->GetAllTerms("我每天走路去上班。"),
-              IsOkAndHolds(ElementsAre("我", "每天", "走路", "去", "上班")));
+  EXPECT_THAT(
+      language_segmenter->GetAllTerms("我每天走路去上班。"),
+      IsOkAndHolds(ElementsAre("我", "每天", "走路", "去", "上班", "。")));
   // Japanese
   EXPECT_THAT(language_segmenter->GetAllTerms("私は毎日仕事に歩いています。"),
               IsOkAndHolds(ElementsAre("私", "は", "毎日", "仕事", "に", "歩",
-                                       "い", "てい", "ます")));
+                                       "い", "てい", "ます", "。")));
   // Khmer
   EXPECT_THAT(language_segmenter->GetAllTerms("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
-              IsOkAndHolds(ElementsAre("ញុំ", "ដើរទៅ", "ធ្វើការ", "រាល់ថ្ងៃ")));
+              IsOkAndHolds(ElementsAre("ញុំ", "ដើរទៅ", "ធ្វើការ", "រាល់ថ្ងៃ", "។")));
   // Thai
   EXPECT_THAT(
       language_segmenter->GetAllTerms("ฉันเดินไปทำงานทุกวัน"),
@@ -393,7 +438,6 @@ TEST_P(ReverseJniLanguageSegmenterTest, LatinLettersWithAccents) {
               IsOkAndHolds(ElementsAre("āăąḃḅḇčćç")));
 }
 
-// TODO(samzheng): test cases for more languages (e.g. top 20 in the world)
 TEST_P(ReverseJniLanguageSegmenterTest, WhitespaceSplitLanguages) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
@@ -408,7 +452,6 @@ TEST_P(ReverseJniLanguageSegmenterTest, WhitespaceSplitLanguages) {
       IsOkAndHolds(ElementsAre("나는", " ", "매일", " ", "출근합니다", ".")));
 }
 
-// TODO(samzheng): more mixed languages test cases
 TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguages) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
@@ -443,7 +486,78 @@ TEST_P(ReverseJniLanguageSegmenterTest, NotCopyStrings) {
   EXPECT_THAT(word2_address, Eq(word2_result_address));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterOutOfBounds) {
+TEST_P(ReverseJniLanguageSegmenterTest, ResetToStartUtf32WordConnector) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "com:google:android is package";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:      "com:google:android is package"
+  //               ^                 ^^ ^^
+  // UTF-8 idx:    0              18 19 21 22
+  // UTF-32 idx:   0              18 19 21 22
+  auto position_or = itr->ResetToStartUtf32();
+  EXPECT_THAT(position_or, IsOk());
+  ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
+}
+
+TEST_P(ReverseJniLanguageSegmenterTest, NewIteratorResetToStartUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "How are you你好吗お元気ですか";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
+}
+
+TEST_P(ReverseJniLanguageSegmenterTest, IteratorOneAdvanceResetToStartUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "How are you你好吗お元気ですか";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  ASSERT_TRUE(itr->Advance());  // itr points to 'How'
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
+}
+
+TEST_P(ReverseJniLanguageSegmenterTest,
+       IteratorMultipleAdvancesResetToStartUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "How are you你好吗お元気ですか";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  ASSERT_TRUE(itr->Advance());
+  ASSERT_TRUE(itr->Advance());
+  ASSERT_TRUE(itr->Advance());
+  ASSERT_TRUE(itr->Advance());  // itr points to ' '
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
+}
+
+TEST_P(ReverseJniLanguageSegmenterTest, IteratorDoneResetToStartUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -451,19 +565,61 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterOutOfBounds) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  ASSERT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(8)));
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  while (itr->Advance()) {
+    // Do nothing.
+  }
+  EXPECT_THAT(itr->ResetToStartUtf32(), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
+}
+
+TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterUtf32WordConnector) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "package com:google:android name";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:     "package com:google:android name"
+  //              ^      ^^                 ^^
+  // UTF-8 idx:   0      7 8               26 27
+  // UTF-32 idx:  0      7 8               26 27
+  auto position_or = itr->ResetToTermStartingAfterUtf32(8);
+  EXPECT_THAT(position_or, IsOk());
+  EXPECT_THAT(position_or.ValueOrDie(), Eq(26));
+  ASSERT_THAT(itr->GetTerm(), Eq(" "));
+
+  position_or = itr->ResetToTermStartingAfterUtf32(7);
+  EXPECT_THAT(position_or, IsOk());
+  EXPECT_THAT(position_or.ValueOrDie(), Eq(8));
+  ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
+}
+
+TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterUtf32OutOfBounds) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "How are you你好吗お元気ですか";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:     "How are you你好吗お元気ですか"
+  //              ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:   0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:  0  3 4 7 8 11 131415 17 19
+  ASSERT_THAT(itr->ResetToTermStartingAfterUtf32(7), IsOkAndHolds(Eq(8)));
   ASSERT_THAT(itr->GetTerm(), Eq("you"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(-1),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(itr->GetTerm(), Eq("you"));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(-1), IsOk());
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(kText.length()),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(21),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(itr->GetTerm(), Eq("you"));
+  EXPECT_THAT(itr->GetTerm(), Eq("How"));
 }
 
 // Tests that ResetToTermAfter and Advance produce the same output. With the
@@ -472,7 +628,7 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterOutOfBounds) {
 // terms produced by ResetToTermAfter calls with the current position
 // provided as the argument.
 TEST_P(ReverseJniLanguageSegmenterTest,
-       MixedLanguagesResetToTermAfterEquivalentToAdvance) {
+       MixedLanguagesResetToTermAfterUtf32EquivalentToAdvance) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -487,14 +643,14 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetAfter(reset_to_term_itr.get());
+      GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       ThaiResetToTermAfterEquivalentToAdvance) {
+       ThaiResetToTermAfterUtf32EquivalentToAdvance) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -509,14 +665,14 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetAfter(reset_to_term_itr.get());
+      GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       KoreanResetToTermAfterEquivalentToAdvance) {
+       KoreanResetToTermAfterUtf32EquivalentToAdvance) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -531,7 +687,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetAfter(reset_to_term_itr.get());
+      GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
@@ -542,7 +698,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
 // should be able to mix ResetToTermAfter(current_position) calls and Advance
 // calls to mimic calling Advance.
 TEST_P(ReverseJniLanguageSegmenterTest,
-       MixedLanguagesResetToTermAfterInteroperableWithAdvance) {
+       MixedLanguagesResetToTermAfterUtf32InteroperableWithAdvance) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -557,7 +713,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> advance_and_reset_terms =
-      GetAllTermsAdvanceAndResetAfter(advance_and_reset_itr.get());
+      GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
 
   EXPECT_THAT(advance_and_reset_terms,
               testing::ElementsAreArray(advance_terms));
@@ -565,7 +721,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       ThaiResetToTermAfterInteroperableWithAdvance) {
+       ThaiResetToTermAfterUtf32InteroperableWithAdvance) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -580,7 +736,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> advance_and_reset_terms =
-      GetAllTermsAdvanceAndResetAfter(advance_and_reset_itr.get());
+      GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
 
   EXPECT_THAT(advance_and_reset_terms,
               testing::ElementsAreArray(advance_terms));
@@ -588,7 +744,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       KoreanResetToTermAfterInteroperableWithAdvance) {
+       KoreanResetToTermAfterUtf32InteroperableWithAdvance) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -603,14 +759,14 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> advance_and_reset_terms =
-      GetAllTermsAdvanceAndResetAfter(advance_and_reset_itr.get());
+      GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
 
   EXPECT_THAT(advance_and_reset_terms,
               testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(advance_and_reset_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermAfter) {
+TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -619,33 +775,35 @@ TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermAfter) {
       std::unique_ptr<LanguageSegmenter::Iterator> itr,
       language_segmenter->Segment("How are you你好吗お元気ですか"));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  EXPECT_THAT(itr->ResetToTermStartingAfter(2), IsOkAndHolds(Eq(3)));
+  // String:      "How are you你好吗お元気ですか"
+  //               ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:    0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:   0  3 4 7 8 11 131415 17 19
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(10), IsOkAndHolds(Eq(11)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(10), IsOkAndHolds(Eq(11)));
   EXPECT_THAT(itr->GetTerm(), Eq("你好"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(8)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(7), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("you"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(32), IsOkAndHolds(Eq(35)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(18), IsOkAndHolds(Eq(19)));
   EXPECT_THAT(itr->GetTerm(), Eq("か"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(14), IsOkAndHolds(Eq(17)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(12), IsOkAndHolds(Eq(13)));
   EXPECT_THAT(itr->GetTerm(), Eq("吗"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(3)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(35),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(19),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ContinuousWhitespacesResetToTermAfter) {
+TEST_P(ReverseJniLanguageSegmenterTest,
+       ContinuousWhitespacesResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -655,35 +813,36 @@ TEST_P(ReverseJniLanguageSegmenterTest, ContinuousWhitespacesResetToTermAfter) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kTextWithSpace));
 
-  // String: "Hello          World"
-  //          ^    ^         ^
-  // Bytes:   0    5         15
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(5)));
+  // String:      "Hello          World"
+  //               ^    ^         ^
+  // UTF-8 idx:    0    5         15
+  // UTF-32 idx:   0    5         15
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(2), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(10), IsOkAndHolds(Eq(15)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(10), IsOkAndHolds(Eq(15)));
   EXPECT_THAT(itr->GetTerm(), Eq("World"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(5), IsOkAndHolds(Eq(15)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(5), IsOkAndHolds(Eq(15)));
   EXPECT_THAT(itr->GetTerm(), Eq("World"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(15),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(15),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(17),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(17),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(19),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(19),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermAfter) {
+TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -693,21 +852,25 @@ TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermAfter) {
   constexpr std::string_view kChinese = "我每天走路去上班。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kChinese));
-  // String: "我每天走路去上班。"
-  //          ^ ^  ^   ^^
-  // Bytes:   0 3  9  15 18
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(3)));
+  // String:       "我每天走路去上班。"
+  //                ^ ^  ^   ^^   ^
+  // UTF-8 idx:     0 3  9  15 18 24
+  // UTF-832 idx:   0 1  3   5 6  8
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("每天"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(9)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(2), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("走路"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(19),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(7), IsOkAndHolds(Eq(8)));
+  EXPECT_THAT(itr->GetTerm(), Eq("。"));
+
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(8),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermAfter) {
+TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -716,21 +879,25 @@ TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermAfter) {
   constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kJapanese));
-  // String: "私は毎日仕事に歩いています。"
-  //          ^ ^ ^  ^  ^ ^ ^ ^  ^
-  // Bytes:   0 3 6  12 18212427 33
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(3)));
+  // String:       "私は毎日仕事に歩いています。"
+  //                ^ ^ ^  ^  ^ ^ ^ ^  ^  ^
+  // UTF-8 idx:     0 3 6  12 18212427 33 39
+  // UTF-32 idx:    0 1 2  4  6 7 8 9  11 13
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("は"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(33),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(13),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(7), IsOkAndHolds(Eq(12)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(3), IsOkAndHolds(Eq(4)));
   EXPECT_THAT(itr->GetTerm(), Eq("仕事"));
+
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(12), IsOkAndHolds(Eq(13)));
+  EXPECT_THAT(itr->GetTerm(), Eq("。"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermAfter) {
+TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -738,21 +905,25 @@ TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermAfter) {
   constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kKhmer));
-  // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
-  //          ^ ^   ^   ^
-  // Bytes:   0 9   24  45
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(9)));
+  // String:            "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
+  //                     ^ ^   ^   ^  ^
+  // UTF-8 idx:          0 9   24  45 69
+  // UTF-32 idx:         0 3   8   15 23
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("ដើរទៅ"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(47),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(15), IsOkAndHolds(Eq(23)));
+  EXPECT_THAT(itr->GetTerm(), Eq("។"));
+
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(23),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(14), IsOkAndHolds(Eq(24)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(6), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("ធ្វើការ"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermAfter) {
+TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermAfterUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -761,24 +932,48 @@ TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermAfter) {
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kThai));
-  // String: "ฉันเดินไปทำงานทุกวัน"
-  //          ^ ^  ^ ^    ^ ^
-  // Bytes:   0 9 21 27  42 51
-  EXPECT_THAT(itr->ResetToTermStartingAfter(0), IsOkAndHolds(Eq(9)));
+  // String:      "ฉันเดินไปทำงานทุกวัน"
+  //               ^ ^  ^ ^    ^ ^
+  // UTF-8 idx:    0 9 21 27  42 51
+  // UTF-32 idx:   0 3  7 9   14 17
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(0), IsOkAndHolds(Eq(3)));
   EXPECT_THAT(itr->GetTerm(), Eq("เดิน"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(51),
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(17),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(13), IsOkAndHolds(Eq(21)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(6), IsOkAndHolds(Eq(7)));
   EXPECT_THAT(itr->GetTerm(), Eq("ไป"));
 
-  EXPECT_THAT(itr->ResetToTermStartingAfter(34), IsOkAndHolds(Eq(42)));
+  EXPECT_THAT(itr->ResetToTermStartingAfterUtf32(12), IsOkAndHolds(Eq(14)));
   EXPECT_THAT(itr->GetTerm(), Eq("ทุก"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeOutOfBounds) {
+TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeWordConnectorUtf32) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto segmenter, language_segmenter_factory::Create(
+                          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  constexpr std::string_view kText = "package name com:google:android!";
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+                             segmenter->Segment(kText));
+
+  // String:      "package name com:google:android!"
+  //               ^      ^^   ^^                 ^
+  // UTF-8 idx:    0      7 8 12 13               31
+  // UTF-32 idx:   0      7 8 12 13               31
+  auto position_or = itr->ResetToTermEndingBeforeUtf32(31);
+  EXPECT_THAT(position_or, IsOk());
+  EXPECT_THAT(position_or.ValueOrDie(), Eq(13));
+  ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
+
+  position_or = itr->ResetToTermEndingBeforeUtf32(21);
+  EXPECT_THAT(position_or, IsOk());
+  EXPECT_THAT(position_or.ValueOrDie(), Eq(12));
+  ASSERT_THAT(itr->GetTerm(), Eq(" "));
+}
+
+TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeOutOfBoundsUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -786,19 +981,19 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeOutOfBounds) {
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              segmenter->Segment(kText));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  ASSERT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(4)));
+  // String:      "How are you你好吗お元気ですか"
+  //               ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:    0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:   0  3 4 7 8 11 131415 17 19
+  ASSERT_THAT(itr->ResetToTermEndingBeforeUtf32(7), IsOkAndHolds(Eq(4)));
   ASSERT_THAT(itr->GetTerm(), Eq("are"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(-1),
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(-1),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
   EXPECT_THAT(itr->GetTerm(), Eq("are"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(kText.length()),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(itr->GetTerm(), Eq("are"));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(29), IsOk());
+  EXPECT_THAT(itr->GetTerm(), Eq("か"));
 }
 
 // Tests that ResetToTermBefore and Advance produce the same output. With the
@@ -807,7 +1002,7 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeOutOfBounds) {
 // terms produced by ResetToTermBefore calls with the current position
 // provided as the argument (after their order has been reversed).
 TEST_P(ReverseJniLanguageSegmenterTest,
-       MixedLanguagesResetToTermBeforeEquivalentToAdvance) {
+       MixedLanguagesResetToTermBeforeEquivalentToAdvanceUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -817,17 +1012,12 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       segmenter->Segment(kText));
   std::vector<std::string_view> advance_terms =
       GetAllTermsAdvance(advance_itr.get());
-  // Can't produce the last term via calls to ResetToTermBefore. So skip
-  // past that one.
-  auto itr = advance_terms.begin();
-  std::advance(itr, advance_terms.size() - 1);
-  advance_terms.erase(itr);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kText));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetBefore(reset_to_term_itr.get());
+      GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
   std::reverse(reset_terms.begin(), reset_terms.end());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
@@ -836,7 +1026,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       ThaiResetToTermBeforeEquivalentToAdvance) {
+       ThaiResetToTermBeforeEquivalentToAdvanceUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -846,17 +1036,12 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       segmenter->Segment(kThai));
   std::vector<std::string_view> advance_terms =
       GetAllTermsAdvance(advance_itr.get());
-  // Can't produce the last term via calls to ResetToTermBefore. So skip
-  // past that one.
-  auto itr = advance_terms.begin();
-  std::advance(itr, advance_terms.size() - 1);
-  advance_terms.erase(itr);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kThai));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetBefore(reset_to_term_itr.get());
+      GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
   std::reverse(reset_terms.begin(), reset_terms.end());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
@@ -864,7 +1049,7 @@ TEST_P(ReverseJniLanguageSegmenterTest,
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       KoreanResetToTermBeforeEquivalentToAdvance) {
+       KoreanResetToTermBeforeEquivalentToAdvanceUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto segmenter, language_segmenter_factory::Create(
                           GetSegmenterOptions(GetLocale(), jni_cache_.get())));
@@ -874,24 +1059,19 @@ TEST_P(ReverseJniLanguageSegmenterTest,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> advance_terms =
       GetAllTermsAdvance(advance_itr.get());
-  // Can't produce the last term via calls to ResetToTermBefore. So skip
-  // past that one.
-  auto itr = advance_terms.begin();
-  std::advance(itr, advance_terms.size() - 1);
-  advance_terms.erase(itr);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
       segmenter->Segment(kKorean));
   std::vector<std::string_view> reset_terms =
-      GetAllTermsResetBefore(reset_to_term_itr.get());
+      GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
   std::reverse(reset_terms.begin(), reset_terms.end());
 
   EXPECT_THAT(reset_terms, testing::ElementsAreArray(advance_terms));
   EXPECT_THAT(reset_to_term_itr->GetTerm(), Eq(advance_itr->GetTerm()));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermBefore) {
+TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermBeforeUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -900,35 +1080,36 @@ TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermBefore) {
       std::unique_ptr<LanguageSegmenter::Iterator> itr,
       language_segmenter->Segment("How are you你好吗お元気ですか"));
 
-  // String: "How are you你好吗お元気ですか"
-  //          ^  ^^  ^^  ^  ^ ^ ^  ^  ^
-  // Bytes:   0  3 4 7 8 11 172023 29 35
-  EXPECT_THAT(itr->ResetToTermEndingBefore(2),
+  // String:      "How are you你好吗お元気ですか"
+  //               ^  ^^  ^^  ^  ^ ^ ^  ^  ^
+  // UTF-8 idx:    0  3 4 7 8 11 172023 29 35
+  // UTF-32 idx:   0  3 4 7 8 11 131415 17 19
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(2),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(10), IsOkAndHolds(Eq(7)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(10), IsOkAndHolds(Eq(7)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(4)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(7), IsOkAndHolds(Eq(4)));
   EXPECT_THAT(itr->GetTerm(), Eq("are"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(32), IsOkAndHolds(Eq(23)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(18), IsOkAndHolds(Eq(15)));
   EXPECT_THAT(itr->GetTerm(), Eq("元気"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(14), IsOkAndHolds(Eq(8)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(12), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("you"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(35), IsOkAndHolds(Eq(29)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(19), IsOkAndHolds(Eq(17)));
   EXPECT_THAT(itr->GetTerm(), Eq("です"));
 }
 
 TEST_P(ReverseJniLanguageSegmenterTest,
-       ContinuousWhitespacesResetToTermBefore) {
+       ContinuousWhitespacesResetToTermBeforeUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -938,34 +1119,35 @@ TEST_P(ReverseJniLanguageSegmenterTest,
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kTextWithSpace));
 
-  // String: "Hello          World"
-  //          ^    ^         ^
-  // Bytes:   0    5         15
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "Hello          World"
+  //               ^    ^         ^
+  // UTF-8 idx:    0    5         15
+  // UTF-32 idx:   0    5         15
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(2),
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(2),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(10), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(10), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("Hello"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(5), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(5), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("Hello"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(15), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(15), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(17), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(17), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(19), IsOkAndHolds(Eq(5)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(19), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq(" "));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermBefore) {
+TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermBeforeUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -975,21 +1157,22 @@ TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermBefore) {
   constexpr std::string_view kChinese = "我每天走路去上班。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kChinese));
-  // String: "我每天走路去上班。"
-  //          ^ ^  ^   ^^
-  // Bytes:   0 3  9  15 18
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "我每天走路去上班。"
+  //               ^ ^  ^   ^^
+  // UTF-8 idx:    0 3  9  15 18
+  // UTF-32 idx:   0 1  3   5 6
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(2), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("我"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(19), IsOkAndHolds(Eq(15)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(7), IsOkAndHolds(Eq(5)));
   EXPECT_THAT(itr->GetTerm(), Eq("去"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermBefore) {
+TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermBeforeUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -998,21 +1181,22 @@ TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermBefore) {
   constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kJapanese));
-  // String: "私は毎日仕事に歩いています。"
-  //          ^ ^ ^  ^  ^ ^ ^ ^  ^
-  // Bytes:   0 3 6  12 18212427 33
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "私は毎日仕事に歩いています。"
+  //               ^ ^ ^  ^  ^ ^ ^ ^  ^
+  // UTF-8 idx:    0 3 6  12 18212427 33
+  // UTF-32 idx:   0 1 2  4  6 7 8 9  11
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(33), IsOkAndHolds(Eq(27)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(11), IsOkAndHolds(Eq(9)));
   EXPECT_THAT(itr->GetTerm(), Eq("てい"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(7), IsOkAndHolds(Eq(3)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(3), IsOkAndHolds(Eq(1)));
   EXPECT_THAT(itr->GetTerm(), Eq("は"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermBefore) {
+TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermBeforeUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -1020,21 +1204,22 @@ TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermBefore) {
   constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kKhmer));
-  // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
-  //          ^ ^   ^   ^
-  // Bytes:   0 9   24  45
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
+  //               ^ ^   ^   ^
+  // UTF-8 idx:    0 9   24  45
+  // UTF-32 idx:   0 3   8   15
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(47), IsOkAndHolds(Eq(24)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(16), IsOkAndHolds(Eq(8)));
   EXPECT_THAT(itr->GetTerm(), Eq("ធ្វើការ"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(14), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(5), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("ញុំ"));
 }
 
-TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermBefore) {
+TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermBeforeUtf32) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(
@@ -1043,23 +1228,39 @@ TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermBefore) {
   constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
                              language_segmenter->Segment(kThai));
-  // String: "ฉันเดินไปทำงานทุกวัน"
-  //          ^ ^  ^ ^    ^ ^
-  // Bytes:   0 9 21 27  42 51
-  EXPECT_THAT(itr->ResetToTermEndingBefore(0),
+  // String:      "ฉันเดินไปทำงานทุกวัน"
+  //               ^ ^  ^ ^    ^ ^
+  // UTF-8 idx:    0 9 21 27  42 51
+  // UTF-32 idx:   0 3  7 9   14 17
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(0),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(itr->GetTerm(), IsEmpty());
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(51), IsOkAndHolds(Eq(42)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(17), IsOkAndHolds(Eq(14)));
   EXPECT_THAT(itr->GetTerm(), Eq("ทุก"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(13), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(4), IsOkAndHolds(Eq(0)));
   EXPECT_THAT(itr->GetTerm(), Eq("ฉัน"));
 
-  EXPECT_THAT(itr->ResetToTermEndingBefore(34), IsOkAndHolds(Eq(21)));
+  EXPECT_THAT(itr->ResetToTermEndingBeforeUtf32(11), IsOkAndHolds(Eq(7)));
   EXPECT_THAT(itr->GetTerm(), Eq("ไป"));
 }
 
+TEST_P(ReverseJniLanguageSegmenterTest, QuerySyntax) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_segmenter,
+      language_segmenter_factory::Create(
+          GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+  // Validates that the input strings are not copied
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<std::string_view> terms,
+      language_segmenter->GetAllTerms(
+          "(-term1 OR term2) AND property1.subproperty2:term3"));
+  EXPECT_THAT(terms, ElementsAre("(", "-", "term1", " ", "OR", " ", "term2",
+                                 ")", " ", "AND", " ", "property1", ".",
+                                 "subproperty2", ":", "term3"));
+}
+
 INSTANTIATE_TEST_SUITE_P(
     LocaleName, ReverseJniLanguageSegmenterTest,
     testing::Values(ULOC_US, ULOC_UK, ULOC_CANADA, ULOC_CANADA_FRENCH,
diff --git a/icing/tokenization/rfc822-tokenizer.cc b/icing/tokenization/rfc822-tokenizer.cc
new file mode 100644
index 0000000..13c58c5
--- /dev/null
+++ b/icing/tokenization/rfc822-tokenizer.cc
@@ -0,0 +1,798 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tokenization/rfc822-tokenizer.h"
+
+#include <algorithm>
+#include <deque>
+#include <queue>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/tokenization/token.h"
+#include "icing/tokenization/tokenizer.h"
+#include "icing/util/character-iterator.h"
+#include "icing/util/i18n-utils.h"
+#include "icing/util/status-macros.h"
+#include "unicode/umachine.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+bool IsDelimiter(UChar32 c) { return c == ',' || c == ';' || c == '\n'; }
+}  // namespace
+
+class Rfc822TokenIterator : public Tokenizer::Iterator {
+ public:
+  // Cursor is the index into the string_view, text_end_ is the length.
+  explicit Rfc822TokenIterator(std::string_view text)
+      : text_(std::move(text)),
+        iterator_(text, 0, 0, 0),
+        text_end_(text.length()),
+        token_index_(-1) {}
+
+  // Advance will move token_index_ past the end of tokens_
+  bool Advance() override {
+    // Stop the token index on a RFC822 token, or one past the end, where the
+    // next RFC822 token will be if more are generated.
+    do {
+      token_index_++;
+    } while (token_index_ < tokens_.size() &&
+             tokens_[token_index_].type != Token::Type::RFC822_TOKEN);
+
+    // There is still something left, possible if we rewinded and call Advance
+    if (token_index_ < tokens_.size()) {
+      return true;
+    }
+
+    // Done with the entire string_view.
+    if (iterator_.utf8_index() >= text_end_) {
+      return false;
+    }
+
+    // Parsing a new email, update the current email marker.
+    AdvancePastWhitespace();
+
+    // This may return false, as in the case of "<alex>,,", where after
+    // processing <alex>, there are no more tokens.
+    return GetNextRfc822Token();
+  }
+
+  // Returns the current token group, an RFC822_TOKEN along with all it's
+  // subtokens. For example, "tim@google.com" will return all tokens generated
+  // from that text.
+  //
+  // Returns:
+  //   A vector of Tokens on success
+  //   An empty vector if the token list is empty
+  //   An empty vector if the index is past the end of the token list
+  std::vector<Token> GetTokens() const override {
+    std::vector<Token> result;
+    if (token_index_ < tokens_.size() && token_index_ >= 0) {
+      int index = token_index_;
+      do {
+        result.push_back(tokens_[index]);
+      } while (++index < tokens_.size() &&
+               tokens_[index].type != Token::Type::RFC822_TOKEN);
+    }
+    return result;
+  }
+
+  bool ResetToTokenStartingAfter(int32_t utf32_offset) override {
+    CharacterIterator tracker(text_);
+    for (int new_index = 0; new_index < tokens_.size(); ++new_index) {
+      const Token& t = tokens_[new_index];
+      if (t.type != Token::Type::RFC822_TOKEN) {
+        continue;
+      }
+
+      tracker.AdvanceToUtf8(t.text.begin() - text_.begin());
+      if (tracker.utf32_index() > utf32_offset) {
+        token_index_ = new_index;
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  // This will attempt to reset the token_index to point to the last token
+  // ending before an offset. If it fails, due to there not being any tokens
+  // before the offset, the token index will become -1.
+  bool ResetToTokenEndingBefore(int32_t utf32_offset) override {
+    // First, advance until we pass offset or Advance is false
+    if (tokens_.empty()) {
+      if (!Advance()) {
+        // No tokens available, and Advancing doesn't get more, so return false.
+        return false;
+      }
+    }
+
+    CharacterIterator tracker(text_);
+
+    // Keep advancing until we parse all the emails, or run past the offset.
+    // Advance will always make token_index_ point to an RFC822_TOKEN, so we can
+    // look at that tokens text end to determine if it ends before the offset.
+    // This first loop will guarantee that we end up either past the offset or
+    // at the end.
+    do {
+      tracker.AdvanceToUtf8(tokens_[token_index_].text.end() - text_.begin());
+
+      // When we Advance and have to convert names to email addresses, it's
+      // possible that multiple RFC822 tokens are added. We need to advance
+      // through these one at a time, we cannot skip to the top of the line.
+    } while (tracker.utf32_index() <= utf32_offset && Advance());
+
+    // We are either past the offset or at the end. Either way, we now work
+    // backwards and reset to the first (highest index) RFC822_TOKEN we find.
+    while (--token_index_ >= 0) {
+      if (tokens_[token_index_].type != Token::Type::RFC822_TOKEN) {
+        continue;
+      }
+
+      tracker.MoveToUtf8(tokens_[token_index_].text.end() - text_.begin());
+      if (tracker.utf32_index() <= utf32_offset) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Returns a character iterator to the start of the token.
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenStart()
+      override {
+    CharacterIterator token_start = iterator_;
+    token_start.MoveToUtf8(GetTokens().at(0).text.begin() - text_.begin());
+    return token_start;
+  }
+
+  // Returns a character iterator to right after the end of the token.
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenEndExclusive()
+      override {
+    CharacterIterator token_end = iterator_;
+    token_end.MoveToUtf8(GetTokens().at(0).text.end() - text_.begin());
+    return token_end;
+  }
+
+  // Reset to start moves to the state we're in after the first Advance().
+  bool ResetToStart() override {
+    token_index_ = -1;
+    return Advance();
+  }
+
+ private:
+  // Advance until the next email delimiter, generating as many tokens as
+  // necessary.
+  bool GetNextRfc822Token() {
+    if (iterator_.utf8_index() >= text_end_) {
+      return false;
+    }
+
+    int token_start = iterator_.utf8_index();
+    bool address_found = false;
+    bool name_found = false;
+    std::vector<Token> next_tokens;
+    Token rfc822(Token::Type::RFC822_TOKEN);
+
+    // We start at unquoted and run until a ",;\n<( .
+    while (iterator_.utf8_index() < text_end_) {
+      UChar32 c = iterator_.GetCurrentChar();
+      if (IsDelimiter(c)) {
+        // End of the token, advance cursor past all delimiters then quit.
+        rfc822.text =
+            text_.substr(token_start, iterator_.utf8_index() - token_start);
+
+        UChar32 delimiter;
+        do {
+          AdvanceCursor();
+          delimiter = iterator_.GetCurrentChar();
+          // If we get current char on the end, it is not a delimiter so this
+          // loop will end
+        } while (IsDelimiter(delimiter));
+
+        break;
+      }
+
+      std::vector<Token> consume_result;
+      if (c == '"') {
+        consume_result = ConsumeQuotedSection();
+        name_found |= !consume_result.empty();
+      } else if (c == '(') {
+        consume_result = ConsumeParenthesizedSection();
+      } else if (c == '<') {
+        // Only set address_found to true if ConsumeAdress returns true.
+        // Otherwise, keep address_found as is to prevent setting address_found
+        // back to false if it is true.
+        consume_result = ConsumeAddress();
+        address_found |= !consume_result.empty();
+      } else {
+        consume_result = ConsumeUnquotedSection();
+        name_found |= !consume_result.empty();
+      }
+      next_tokens.insert(next_tokens.end(), consume_result.begin(),
+                         consume_result.end());
+    }
+    if (iterator_.utf8_index() >= text_end_) {
+      rfc822.text = text_.substr(token_start, text_end_ - token_start);
+    }
+
+    // If an address is found, use the tokens we have.
+    // If an address isn't found, and a name isn't found, also use the tokens
+    // we have.
+    // If an address isn't found but a name is, convert name Tokens to email
+    // Tokens.
+    if (!address_found && name_found) {
+      // We don't add the rfc822 token, as it will be handled by
+      // ConvertNameToEmail.
+      std::vector<Token> converted_tokens = ConvertNameToEmail(next_tokens);
+      tokens_.insert(tokens_.end(), converted_tokens.begin(),
+                     converted_tokens.end());
+    } else {
+      if (next_tokens.empty()) {
+        // Tokens may not be generated in the case of ",,,,,,"
+        return false;
+      } else {
+        // If tokens were generated, push back the RFC822 token for them
+        tokens_.push_back(rfc822);
+        tokens_.insert(tokens_.end(), next_tokens.begin(), next_tokens.end());
+      }
+    }
+
+    return true;
+  }
+
+  // We allow for the "First Last <email>" format, but if there is no email in
+  // brackets, we won't allow for unquoted spaces. For example, the input
+  // "alex@google.com tim@google.com" has an unquoted space, so we will split
+  // it into two emails. We don't need to find more tokens, we just need to
+  // find @ signs and spaces and convert name tokens to parts of the email.
+  std::vector<Token> ConvertNameToEmail(std::vector<Token>& name_tokens) {
+    if (name_tokens.empty()) {
+      return name_tokens;
+    }
+
+    // There will only be names and comments, and they will be in order.
+    std::vector<Token> converted_tokens;
+
+    // Start at the beginning of the current email.
+    CharacterIterator scanner(text_);
+
+    scanner.MoveToUtf8(name_tokens[0].text.begin() - text_.begin());
+    int token_processed_index = 0;
+
+    bool in_quote = false;
+    // Setting at_sign_index to before the beginning, it'll only be set to
+    // something else if we find an @ sign
+    const char* at_sign_index = nullptr;
+
+    // Run to the end
+    while (scanner.utf8_index() < iterator_.utf8_index()) {
+      const char* end_of_token = nullptr;
+      UChar32 c = scanner.GetCurrentChar();
+      if (c == '\\') {
+        // Skip the slash, as well as the following token.
+        scanner.AdvanceToUtf32(scanner.utf32_index() + 1);
+        scanner.AdvanceToUtf32(scanner.utf32_index() + 1);
+        continue;
+      }
+      if (c == '"') {
+        in_quote = !in_quote;
+      }
+      if (c == '@') {
+        at_sign_index = text_.begin() + scanner.utf8_index();
+      }
+
+      // If the next character is the end OR we hit an unquoted space.
+      if (scanner.utf8_index() + i18n_utils::GetUtf8Length(c) ==
+              iterator_.utf8_index() ||
+          (!in_quote && c == ' ')) {
+        if (!in_quote && c == ' ') {
+          end_of_token = text_.begin() + scanner.utf8_index();
+        } else {
+          end_of_token = text_.begin() + iterator_.utf8_index();
+        }
+        std::deque<Token> more_tokens = ConvertOneNameToEmail(
+            name_tokens, at_sign_index, end_of_token, token_processed_index);
+        converted_tokens.insert(converted_tokens.end(), more_tokens.begin(),
+                                more_tokens.end());
+        // Reset the at_sign_index
+        at_sign_index = nullptr;
+      }
+      scanner.AdvanceToUtf32(scanner.utf32_index() + 1);
+    }
+
+    // It's possible we left something out.
+    if (token_processed_index < name_tokens.size()) {
+      std::deque<Token> more_tokens =
+          ConvertOneNameToEmail(name_tokens, at_sign_index,
+                                name_tokens[name_tokens.size() - 1].text.end(),
+                                token_processed_index);
+      converted_tokens.insert(converted_tokens.end(), more_tokens.begin(),
+                              more_tokens.end());
+    }
+
+    return converted_tokens;
+  }
+
+  // Once a name is determined to be an address, convert its tokens to address
+  // tokens.
+  std::deque<Token> ConvertOneNameToEmail(const std::vector<Token>& name_tokens,
+                                          const char* at_sign_index,
+                                          const char* end_of_token,
+                                          int& token_processed_index) {
+    const char* address_start = nullptr;
+    const char* local_address_end = nullptr;
+    const char* host_address_start = nullptr;
+    const char* address_end = nullptr;
+    const char* token_start = nullptr;
+    const char* token_end = nullptr;
+    std::deque<Token> converted_tokens;
+
+    // Transform tokens up to end of token pointer.
+
+    for (; token_processed_index < name_tokens.size();
+         ++token_processed_index) {
+      const Token& token = name_tokens[token_processed_index];
+
+      if (token.text.end() > end_of_token) {
+        break;
+      }
+      std::string_view text = token.text;
+      // We need to do this both for comment and name tokens. Comment tokens
+      // will get a corresponding RFC822 token, but not an address or local
+      // address.
+      if (token_start == nullptr) {
+        token_start = text.begin();
+      }
+      token_end = text.end();
+
+      if (token.type == Token::Type::RFC822_COMMENT) {
+        // Comment tokens will stay as they are.
+        converted_tokens.push_back(token);
+      } else if (token.type == Token::Type::RFC822_NAME) {
+        // Names need to be converted to address tokens. We keep the order of
+        // which the name tokens appeared. Name tokens that appear before an
+        // @ sign in the name will become RFC822_ADDRESS_COMPONENT_LOCAL, and
+        // those after will become RFC822_ADDRESS_COMPONENT_HOST. We aren't
+        // able to determine RFC822_ADDRESS, RFC822_LOCAL_ADDRESS, and
+        // RFC_HOST_ADDRESS before checking the name tokens, so they will be
+        // added after the component tokens.
+        if (address_start == nullptr) {
+          address_start = text.begin();
+        }
+        address_end = text.end();
+        if (text.begin() > at_sign_index) {
+          if (host_address_start == nullptr) {
+            host_address_start = text.begin();
+          }
+          // Once this is hit, we switch to COMPONENT_HOST and mark end of the
+          // local address
+          converted_tokens.push_back(
+              Token(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, token.text));
+        } else {
+          local_address_end = text.end();
+          converted_tokens.push_back(
+              Token(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, token.text));
+        }
+      }
+    }
+
+    if (address_start != nullptr) {
+      converted_tokens.push_back(
+          Token(Token::Type::RFC822_ADDRESS,
+                std::string_view(address_start, address_end - address_start)));
+      if (local_address_end != nullptr) {
+        converted_tokens.push_back(
+            Token(Token::Type::RFC822_LOCAL_ADDRESS,
+                  std::string_view(address_start,
+                                   local_address_end - address_start)));
+      }
+    }
+
+    if (host_address_start != nullptr && host_address_start < address_end) {
+      converted_tokens.push_back(
+          Token(Token::Type::RFC822_HOST_ADDRESS,
+                text_.substr(host_address_start - text_.begin(),
+                             address_end - host_address_start)));
+    }
+
+    if (token_start != nullptr) {
+      converted_tokens.push_front(
+          Token(Token::Type::RFC822_TOKEN,
+                std::string_view(token_start, token_end - token_start)));
+    }
+
+    return converted_tokens;
+  }
+
+  // Returns name tokens in an unquoted section. This is useful in case we do
+  // not find an address and have to use the name. An unquoted section may look
+  // like "Alex Sav", or "alex@google.com". In the absense of a bracketed email
+  // address, the unquoted section will be used as the email address along with
+  // the quoted section.
+  std::vector<Token> ConsumeUnquotedSection() {
+    UChar32 c;
+
+    int token_start = -1;
+    std::vector<Token> next_tokens;
+
+    // Advance to another state or a character marking the end of token, one
+    // of \n,; .
+    while (iterator_.utf8_index() < text_end_) {
+      c = iterator_.GetCurrentChar();
+
+      if (i18n_utils::IsAlphaNumeric(c)) {
+        if (token_start == -1) {
+          // Start recording
+          token_start = iterator_.utf8_index();
+        }
+        AdvanceCursor();
+
+      } else {
+        if (token_start != -1) {
+          // The character is non alphabetic, save a token.
+          next_tokens.push_back(Token(
+              Token::Type::RFC822_NAME,
+              text_.substr(token_start, iterator_.utf8_index() - token_start)));
+          token_start = -1;
+        }
+
+        if (c == '"' || c == '<' || c == '(' || IsDelimiter(c)) {
+          // Stay on the token.
+          break;
+        }
+
+        AdvanceCursor();
+      }
+    }
+    if (token_start != -1) {
+      next_tokens.push_back(Token(
+          Token::Type::RFC822_NAME,
+          text_.substr(token_start, iterator_.utf8_index() - token_start)));
+    }
+    return next_tokens;
+  }
+
+  // Names that are within quotes should have all characters blindly
+  // unescaped. When a name is made into an address, it isn't re-escaped.
+
+  // Returns name tokens found in a quoted section. This is useful in case we do
+  // not find an address and have to use the name. The quoted section may
+  // contain whitespaces.
+  std::vector<Token> ConsumeQuotedSection() {
+    // Get past the first quote.
+    AdvanceCursor();
+
+    bool end_quote_found = false;
+    std::vector<Token> next_tokens;
+    UChar32 c;
+
+    int token_start = -1;
+
+    while (!end_quote_found && (iterator_.utf8_index() < text_end_)) {
+      c = iterator_.GetCurrentChar();
+
+      if (i18n_utils::IsAlphaNumeric(c)) {
+        if (token_start == -1) {
+          // Start tracking the token.
+          token_start = iterator_.utf8_index();
+        }
+        AdvanceCursor();
+
+      } else {
+        // Non- alphabetic
+        if (c == '\\') {
+          // A backslash, let's look at the next character.
+          CharacterIterator temp = iterator_;
+          temp.AdvanceToUtf32(iterator_.utf32_index() + 1);
+          UChar32 n = temp.GetCurrentChar();
+          if (i18n_utils::IsAlphaNumeric(n)) {
+            // The next character is alphabetic, skip the slash and don't end
+            // the last token. For quoted sections, the only things that are
+            // escaped are double quotes and slashes. For example, in "a\lex",
+            // an l appears after the slash. We want to treat this as if it
+            // was just "alex". So we tokenize it as <RFC822_NAME, "a\lex">.
+            AdvanceCursor();
+          } else {
+            // Not alphabetic, so save the last token if necessary.
+            if (token_start != -1) {
+              next_tokens.push_back(
+                  Token(Token::Type::RFC822_NAME,
+                        text_.substr(token_start,
+                                     iterator_.utf8_index() - token_start)));
+              token_start = -1;
+            }
+
+            // Skip the backslash.
+            AdvanceCursor();
+
+            if (n == '"' || n == '\\' || n == '@') {
+              // Skip these too if they're next.
+              AdvanceCursor();
+            }
+          }
+        } else {
+          // Not a backslash.
+
+          if (token_start != -1) {
+            next_tokens.push_back(
+                Token(Token::Type::RFC822_NAME,
+                      text_.substr(token_start,
+                                   iterator_.utf8_index() - token_start)));
+            token_start = -1;
+          }
+
+          if (c == '"') {
+            end_quote_found = true;
+          }
+          // Advance one more time to get past the non-alphabetic character.
+          AdvanceCursor();
+        }
+      }
+    }
+    if (token_start != -1) {
+      next_tokens.push_back(Token(
+          Token::Type::RFC822_NAME,
+          text_.substr(token_start, iterator_.utf8_index() - token_start)));
+    }
+    return next_tokens;
+  }
+
+  // '(', ')', '\\' chars should be escaped. All other escaped chars should be
+  // unescaped.
+  std::vector<Token> ConsumeParenthesizedSection() {
+    // Skip the initial (
+    AdvanceCursor();
+
+    int paren_layer = 1;
+    UChar32 c;
+    std::vector<Token> next_tokens;
+
+    int token_start = -1;
+
+    while (paren_layer > 0 && (iterator_.utf8_index() < text_end_)) {
+      c = iterator_.GetCurrentChar();
+
+      if (i18n_utils::IsAlphaNumeric(c)) {
+        if (token_start == -1) {
+          // Start tracking a token.
+          token_start = iterator_.utf8_index();
+        }
+        AdvanceCursor();
+      } else {
+        // Non alphabetic.
+        if (c == '\\') {
+          // A backslash, let's look at the next character.
+          UChar32 n = i18n_utils::GetUChar32At(text_.begin(), text_.length(),
+                                               iterator_.utf8_index() + 1);
+          if (i18n_utils::IsAlphaNumeric(n)) {
+            // Alphabetic, skip the slash and don't end the last token.
+            AdvanceCursor();
+          } else {
+            // Not alphabetic, save the last token if necessary.
+            if (token_start != -1) {
+              next_tokens.push_back(
+                  Token(Token::Type::RFC822_COMMENT,
+                        text_.substr(token_start,
+                                     iterator_.utf8_index() - token_start)));
+              token_start = -1;
+            }
+
+            // Skip the backslash.
+            AdvanceCursor();
+
+            if (n == ')' || n == '(' || n == '\\') {
+              // Skip these too if they're next.
+              AdvanceCursor();
+            }
+          }
+        } else {
+          // Not a backslash.
+          if (token_start != -1) {
+            next_tokens.push_back(
+                Token(Token::Type::RFC822_COMMENT,
+                      text_.substr(token_start,
+                                   iterator_.utf8_index() - token_start)));
+            token_start = -1;
+          }
+
+          if (c == '(') {
+            paren_layer++;
+          } else if (c == ')') {
+            paren_layer--;
+          }
+          AdvanceCursor();
+        }
+      }
+    }
+
+    if (token_start != -1) {
+      // Ran past the end of text_ without getting the last token.
+
+      // substr returns "a view of the substring [pos, pos + // rcount), where
+      // rcount is the smaller of count and size() - pos" therefore the count
+      // argument can be any value >= this->cursor - token_start. Therefore,
+      // ignoring the mutation warning.
+      next_tokens.push_back(Token(
+          Token::Type::RFC822_COMMENT,
+          text_.substr(token_start, iterator_.utf8_index() - token_start)));
+    }
+    return next_tokens;
+  }
+
+  // Returns tokens found in the address.
+  std::vector<Token> ConsumeAddress() {
+    // Skip the first <.
+    AdvanceCursor();
+
+    // Save the start position.
+    CharacterIterator address_start_iterator = iterator_;
+    std::vector<Token> next_tokens;
+
+    // Place the at sign on the '<', so that if no at_sign is found, the default
+    // is that the entire address is the host part.
+    int at_sign = -1;
+    int address_end = -1;
+
+    UChar32 c = iterator_.GetCurrentChar();
+    // Quick scan for @ and > signs.
+    while (c != '>' && iterator_.utf8_index() < text_end_) {
+      AdvanceCursor();
+      c = iterator_.GetCurrentChar();
+      if (c == '@') {
+        at_sign = iterator_.utf8_index();
+      }
+    }
+
+    if (iterator_.utf8_index() <= address_start_iterator.utf8_index()) {
+      // There is nothing between the brackets, either we have "<" or "<>".
+      return next_tokens;
+    }
+
+    // Either we find a > or run to the end, either way this is the end of the
+    // address. The ending bracket will be handled by ConsumeUnquoted.
+    address_end = iterator_.utf8_index();
+
+    // Reset to the start.
+    iterator_ = address_start_iterator;
+
+    int address_start = address_start_iterator.utf8_index();
+
+    Token::Type type = Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL;
+
+    // Create a local address token.
+    if (at_sign != -1) {
+      next_tokens.push_back(
+          Token(Token::Type::RFC822_LOCAL_ADDRESS,
+                text_.substr(address_start, at_sign - address_start)));
+    } else {
+      // All the tokens in the address are host components.
+      type = Token::Type::RFC822_ADDRESS_COMPONENT_HOST;
+      // If no @ is found, treat the entire address as the host address.
+      at_sign = address_start - 1;
+    }
+
+    // The only case where we don't have a host address part is something like
+    // <localaddress@>. If there is no @, the at_sign is the default -1, and the
+    // host address is [0, address_end).
+    int host_address_start = at_sign + 1;
+    if (host_address_start < address_end) {
+      next_tokens.push_back(Token(
+          Token::Type::RFC822_HOST_ADDRESS,
+          text_.substr(host_address_start, address_end - host_address_start)));
+    }
+
+    next_tokens.push_back(
+        Token(Token::Type::RFC822_ADDRESS,
+              text_.substr(address_start, address_end - address_start)));
+
+    int token_start = -1;
+
+    while (iterator_.utf8_index() < address_end) {
+      c = iterator_.GetCurrentChar();
+
+      if (i18n_utils::IsAlphaNumeric(c)) {
+        if (token_start == -1) {
+          token_start = iterator_.utf8_index();
+        }
+      } else {
+        // non alphabetic
+        if (c == '\\') {
+          // A backslash, let's look at the next character.
+          CharacterIterator temp = iterator_;
+          temp.AdvanceToUtf32(iterator_.utf32_index() + 1);
+          UChar32 n = temp.GetCurrentChar();
+          if (!i18n_utils::IsAlphaNumeric(n)) {
+            // Not alphabetic, end the last token if necessary.
+            if (token_start != -1) {
+              next_tokens.push_back(Token(
+                  type, text_.substr(token_start,
+                                     iterator_.utf8_index() - token_start)));
+              token_start = -1;
+            }
+          }
+        } else {
+          // Not backslash.
+          if (token_start != -1) {
+            next_tokens.push_back(Token(
+                type, text_.substr(token_start,
+                                   iterator_.utf8_index() - token_start)));
+            token_start = -1;
+          }
+          // Switch to host component tokens.
+          if (iterator_.utf8_index() == at_sign) {
+            type = Token::Type::RFC822_ADDRESS_COMPONENT_HOST;
+          }
+        }
+      }
+      AdvanceCursor();
+    }
+    if (token_start != -1) {
+      next_tokens.push_back(Token(
+          type,
+          text_.substr(token_start, iterator_.utf8_index() - token_start)));
+    }
+    // Unquoted will handle the closing bracket > if these is one.
+    return next_tokens;
+  }
+
+  void AdvanceCursor() {
+    iterator_.AdvanceToUtf32(iterator_.utf32_index() + 1);
+  }
+
+  void AdvancePastWhitespace() {
+    while (i18n_utils::IsWhitespaceAt(text_, iterator_.utf8_index())) {
+      AdvanceCursor();
+    }
+  }
+
+  std::string_view text_;
+  CharacterIterator iterator_;
+  int text_end_;
+
+  // A temporary store of Tokens. As we advance through the provided string,
+  // we parse entire addresses at a time rather than one token at a time.
+  // However, since we call the tokenizer with Advance() alternating with
+  // GetToken(), we need to store tokens for subsequent GetToken calls if
+  // Advance generates multiple tokens (it usually does). A vector is used as
+  // we need to iterate back and forth through tokens during snippeting. It is
+  // cleared by the destructor.
+  std::vector<Token> tokens_;
+  // Index to keep track of where we are in tokens_. This will always be set to
+  // point to an RFC822_TOKEN, or one past the end of the tokens_ vector. The
+  // only exception is before the first Advance call.
+  int token_index_;
+};
+
+libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>>
+Rfc822Tokenizer::Tokenize(std::string_view text) const {
+  return std::make_unique<Rfc822TokenIterator>(text);
+}
+
+libtextclassifier3::StatusOr<std::vector<Token>> Rfc822Tokenizer::TokenizeAll(
+    std::string_view text) const {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
+                         Tokenize(text));
+  std::vector<Token> tokens;
+  while (iterator->Advance()) {
+    std::vector<Token> batch_tokens = iterator->GetTokens();
+    tokens.insert(tokens.end(), batch_tokens.begin(), batch_tokens.end());
+  }
+  return tokens;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/tokenization/rfc822-tokenizer.h b/icing/tokenization/rfc822-tokenizer.h
new file mode 100644
index 0000000..09e4624
--- /dev/null
+++ b/icing/tokenization/rfc822-tokenizer.h
@@ -0,0 +1,38 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TOKENIZATION_RFC822_TOKENIZER_H_
+#define ICING_TOKENIZATION_RFC822_TOKENIZER_H_
+
+#include <vector>
+
+#include "icing/tokenization/tokenizer.h"
+
+namespace icing {
+namespace lib {
+
+class Rfc822Tokenizer : public Tokenizer {
+ public:
+  libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize(
+      std::string_view text) const override;
+
+  libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll(
+      std::string_view text) const override;
+
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TOKENIZATION_RFC822_TOKENIZER_H_
diff --git a/icing/tokenization/rfc822-tokenizer_test.cc b/icing/tokenization/rfc822-tokenizer_test.cc
new file mode 100644
index 0000000..ee3a95d
--- /dev/null
+++ b/icing/tokenization/rfc822-tokenizer_test.cc
@@ -0,0 +1,992 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tokenization/rfc822-tokenizer.h"
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+namespace {
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+
+TEST(Rfc822TokenizerTest, StartingState) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = "a@g.c";
+  auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
+
+  ASSERT_THAT(token_iterator->GetTokens(), IsEmpty());
+  ASSERT_TRUE(token_iterator->Advance());
+  ASSERT_THAT(token_iterator->GetTokens(), Not(IsEmpty()));
+}
+
+TEST(Rfc822TokenizerTest, EmptyMiddleToken) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+
+  std::string s("<alex>,,<tom>");
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(s),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "<alex>"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "alex"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "<tom>"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "tom"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "tom"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "tom"))));
+}
+
+TEST(Rfc822TokenizerTest, Simple) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+
+  std::string_view s("<你alex@google.com>");
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(s),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "<你alex@google.com>"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "你alex"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "你alex@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "你alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
+}
+
+TEST(Rfc822TokenizerTest, Small) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+
+  std::string s = "\"a\"";
+
+  EXPECT_THAT(rfc822_tokenizer.TokenizeAll(s),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::RFC822_TOKEN, "a"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "a"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS, "a"),
+                  EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "a"))));
+
+  s = "\"a\", \"b\"";
+
+  EXPECT_THAT(rfc822_tokenizer.TokenizeAll(s),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::RFC822_TOKEN, "a"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "a"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS, "a"),
+                  EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "a"),
+                  EqualsToken(Token::Type::RFC822_TOKEN, "b"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "b"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS, "b"),
+                  EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "b"))));
+
+  s = "(a)";
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(s),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::RFC822_TOKEN, "(a)"),
+                               EqualsToken(Token::Type::RFC822_COMMENT, "a"))));
+}
+
+TEST(Rfc822TokenizerTest, PB) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+
+  std::string_view s("peanut (comment) butter, <alex@google.com>");
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(s),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "peanut"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "peanut"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "peanut"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "peanut"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "butter"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "butter"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "butter"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "butter"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "<alex@google.com>"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
+}
+
+TEST(Rfc822TokenizerTest, NoBrackets) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+
+  std::string_view s("alex@google.com");
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(s),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "alex@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
+}
+
+TEST(Rfc822TokenizerTest, TwoAddresses) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+
+  std::string_view s("<你alex@google.com>; <alexsav@gmail.com>");
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(s),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "<你alex@google.com>"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "你alex"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "你alex@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "你alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "<alexsav@gmail.com>"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alexsav"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "gmail.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "alexsav@gmail.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alexsav"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "gmail"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
+}
+
+TEST(Rfc822TokenizerTest, Comment) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+
+  std::string_view s("(a comment) <alex@google.com>");
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(s),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN,
+                      "(a comment) <alex@google.com>"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "a"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
+}
+
+TEST(Rfc822TokenizerTest, NameAndComment) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+
+  std::string_view s("\"a name\" also a name <alex@google.com>");
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(s),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN,
+                      "\"a name\" also a name <alex@google.com>"),
+          EqualsToken(Token::Type::RFC822_NAME, "a"),
+          EqualsToken(Token::Type::RFC822_NAME, "name"),
+          EqualsToken(Token::Type::RFC822_NAME, "also"),
+          EqualsToken(Token::Type::RFC822_NAME, "a"),
+          EqualsToken(Token::Type::RFC822_NAME, "name"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
+}
+
+// Test from tokenizer_test.cc.
+TEST(Rfc822TokenizerTest, Rfc822SanityCheck) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+
+  std::string addr1("A name (A comment) <address@domain.com>");
+  std::string addr2(
+      "\"(Another name)\" (A different comment) "
+      "<bob-loblaw@foo.bar.com>");
+  std::string addr3("<no.at.sign.present>");
+  std::string addr4("<double@at@signs.present>");
+  std::string rfc822 = addr1 + ", " + addr2 + ", " + addr3 + ", " + addr4;
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(rfc822),
+      IsOkAndHolds(ElementsAre(
+
+          EqualsToken(Token::Type::RFC822_TOKEN, addr1),
+          EqualsToken(Token::Type::RFC822_NAME, "A"),
+          EqualsToken(Token::Type::RFC822_NAME, "name"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "A"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "address"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "domain.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "address@domain.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "address"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "domain"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+
+          EqualsToken(Token::Type::RFC822_TOKEN, addr2),
+          EqualsToken(Token::Type::RFC822_NAME, "Another"),
+          EqualsToken(Token::Type::RFC822_NAME, "name"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "A"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "different"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "bob-loblaw"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "foo.bar.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "bob-loblaw@foo.bar.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "bob"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "loblaw"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "foo"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "bar"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+
+          EqualsToken(Token::Type::RFC822_TOKEN, addr3),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "no.at.sign.present"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "no.at.sign.present"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "no"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "at"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "sign"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "present"),
+
+          EqualsToken(Token::Type::RFC822_TOKEN, addr4),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "double@at"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "signs.present"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "double@at@signs.present"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "double"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "at"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "signs"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "present"))));
+}
+
+// Tests from rfc822 converter.
+TEST(Rfc822TokenizerTest, SimpleRfcText) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string test_string =
+      "foo@google.com,bar@google.com,baz@google.com,foo+hello@google.com,baz@"
+      "corp.google.com";
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(test_string),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "foo@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "foo"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "foo@google.com"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "foo"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+
+          EqualsToken(Token::Type::RFC822_TOKEN, "bar@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "bar"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "bar@google.com"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "bar"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+
+          EqualsToken(Token::Type::RFC822_TOKEN, "baz@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "baz"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "baz@google.com"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "baz"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+
+          EqualsToken(Token::Type::RFC822_TOKEN, "foo+hello@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "foo"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "hello"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "foo+hello@google.com"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "foo+hello"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+
+          EqualsToken(Token::Type::RFC822_TOKEN, "baz@corp.google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "baz"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "corp"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "baz@corp.google.com"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "baz"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "corp.google.com"))));
+}
+
+TEST(Rfc822TokenizerTest, ComplicatedRfcText) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string test_string =
+      R"raw("Weird, But&(Also)\\Valid" Name (!With, "an" \\odd\\ cmt too¡) <Foo B(a)r,Baz@g.co>
+      <easy@google.com>)raw";
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(test_string),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(
+              Token::Type::RFC822_TOKEN,
+              R"raw("Weird, But&(Also)\\Valid" Name (!With, "an" \\odd\\ cmt too¡) <Foo B(a)r,Baz@g.co>)raw"),
+          EqualsToken(Token::Type::RFC822_NAME, "Weird"),
+          EqualsToken(Token::Type::RFC822_NAME, "But"),
+          EqualsToken(Token::Type::RFC822_NAME, "Also"),
+          EqualsToken(Token::Type::RFC822_NAME, "Valid"),
+          EqualsToken(Token::Type::RFC822_NAME, "Name"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "With"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "an"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "odd"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "cmt"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "too"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "Foo B(a)r,Baz"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.co"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "Foo B(a)r,Baz@g.co"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "Foo"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "B"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "a"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "r"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "Baz"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "g"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "co"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "<easy@google.com>"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "easy"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "easy@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "easy"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
+}
+
+TEST(Rfc822TokenizerTest, FromHtmlBugs) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  // This input used to cause HTML parsing exception. We don't do HTML parsing
+  // any more (b/8388100) so we are just checking that it does not crash and
+  // that it retains the input.
+
+  // http://b/8988210. Put crashing string "&\r" x 100 into name and comment
+  // field of rfc822 token.
+
+  std::string s("\"");
+  for (int i = 0; i < 100; i++) {
+    s.append("&\r");
+  }
+  s.append("\" (");
+  for (int i = 0; i < 100; i++) {
+    s.append("&\r");
+  }
+  s.append(") <foo@google.com>");
+
+  // It shouldn't change anything
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(s),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, s),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "foo"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "foo@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "foo"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
+}
+
+TEST(Rfc822TokenizerTest, EmptyComponentsTest) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  EXPECT_THAT(rfc822_tokenizer.TokenizeAll(""),
+              IsOkAndHolds(testing::IsEmpty()));
+
+  // Name is considered the address if address is empty.
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll("name<>"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
+
+  // Empty name and address means that there is no token.
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll("(a long comment with nothing else)"),
+      IsOkAndHolds(
+          ElementsAre(EqualsToken(Token::Type::RFC822_TOKEN,
+                                  "(a long comment with nothing else)"),
+                      EqualsToken(Token::Type::RFC822_COMMENT, "a"),
+                      EqualsToken(Token::Type::RFC822_COMMENT, "long"),
+                      EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
+                      EqualsToken(Token::Type::RFC822_COMMENT, "with"),
+                      EqualsToken(Token::Type::RFC822_COMMENT, "nothing"),
+                      EqualsToken(Token::Type::RFC822_COMMENT, "else"))));
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll("name ()"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
+
+  EXPECT_THAT(rfc822_tokenizer.TokenizeAll(R"((comment) "")"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::RFC822_TOKEN, "(comment) \"\""),
+                  EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
+}
+
+TEST(Rfc822TokenizerTest, NameTest) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+
+  // Name spread between address or comment.
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll("peanut <address> butter"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "peanut <address> butter"),
+          EqualsToken(Token::Type::RFC822_NAME, "peanut"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "address"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "address"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"),
+          EqualsToken(Token::Type::RFC822_NAME, "butter"))));
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll("peanut (comment) butter"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "peanut"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "peanut"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "peanut"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "peanut"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "butter"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "butter"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "butter"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "butter"))));
+
+  // Dropping quotes when they're not needed.
+  std::string s = R"(peanut <address> "butter")";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(s),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, s),
+          EqualsToken(Token::Type::RFC822_NAME, "peanut"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "address"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "address"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"),
+          EqualsToken(Token::Type::RFC822_NAME, "butter"))));
+
+  s = R"(peanut "butter")";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(s),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "peanut"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "peanut"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "peanut"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "peanut"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "butter"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "butter"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "butter"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "butter"))));
+  // Adding quotes when they are needed.
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll("ple@se quote this <addr>"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "ple@se quote this <addr>"),
+          EqualsToken(Token::Type::RFC822_NAME, "ple"),
+          EqualsToken(Token::Type::RFC822_NAME, "se"),
+          EqualsToken(Token::Type::RFC822_NAME, "quote"),
+          EqualsToken(Token::Type::RFC822_NAME, "this"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "addr"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "addr"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "addr"))));
+}
+
+TEST(Rfc822TokenizerTest, CommentEscapeTest) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  // '(', ')', '\\' chars should be escaped. All other escaped chars should be
+  // unescaped.
+  EXPECT_THAT(rfc822_tokenizer.TokenizeAll(R"((co\)mm\\en\(t))"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::RFC822_TOKEN, R"((co\)mm\\en\(t))"),
+                  EqualsToken(Token::Type::RFC822_COMMENT, "co"),
+                  EqualsToken(Token::Type::RFC822_COMMENT, "mm"),
+                  EqualsToken(Token::Type::RFC822_COMMENT, "en"),
+                  EqualsToken(Token::Type::RFC822_COMMENT, "t"))));
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(R"((c\om\ment) name)"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, R"(c\om\ment)"),
+          EqualsToken(Token::Type::RFC822_COMMENT, R"(c\om\ment)"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(R"((co(m\))ment) name)"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, R"(co(m\))ment)"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "co"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "m"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "ment"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
+}
+
+TEST(Rfc822TokenizerTest, QuoteEscapeTest) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  // All names that include non-alphanumeric chars must be quoted and have '\\'
+  // and '"' chars escaped.
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(R"(n\\a\me <addr>)"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, R"(n\\a\me <addr>)"),
+          EqualsToken(Token::Type::RFC822_NAME, "n"),
+          EqualsToken(Token::Type::RFC822_NAME, "a"),
+          EqualsToken(Token::Type::RFC822_NAME, "me"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "addr"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "addr"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "addr"))));
+
+  // Names that are within quotes should have all characters blindly unescaped.
+  // When a name is made into an address, it isn't re-escaped.
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(R"("n\\a\m\"e")"),
+      // <n\am"e>
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, R"(n\\a\m\"e)"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "n"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "a\\m"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "e"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, R"(n\\a\m\"e)"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, R"(n\\a\m\"e)"))));
+}
+
+TEST(Rfc822TokenizerTest, UnterminatedComponentTest) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll("name (comment"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(R"(half of "the name)"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "half"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "half"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "half"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "half"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "of"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "of"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "of"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "of"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "the name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "the"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "the name"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "the name"))));
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(R"("name\)"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(R"(name (comment\)"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(R"(<addr> "name\)"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "<addr> \"name\\"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "addr"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "addr"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "addr"),
+          EqualsToken(Token::Type::RFC822_NAME, "name"))));
+
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(R"(name (comment\))"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "comment"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "comment"))));
+}
+
+TEST(Rfc822TokenizerTest, Tokenize) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+
+  std::string text =
+      R"raw("Berg" (home) <berg\@google.com>, tom\@google.com (work))raw";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN,
+                      R"("Berg" (home) <berg\@google.com>)"),
+          EqualsToken(Token::Type::RFC822_NAME, "Berg"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "home"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "berg\\"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "berg\\@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "berg"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "tom\\@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "tom"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "tom\\@google.com"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "tom\\@google.com"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "work"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "work"))));
+
+  text = R"raw(Foo Bar (something) <foo\@google.com>, )raw"
+         R"raw(blah\@google.com (something))raw";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN,
+                      "Foo Bar (something) <foo\\@google.com>"),
+          EqualsToken(Token::Type::RFC822_NAME, "Foo"),
+          EqualsToken(Token::Type::RFC822_NAME, "Bar"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "something"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "foo\\"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "foo\\@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "foo"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "blah\\@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "blah"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "blah\\@google.com"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "blah\\@google.com"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "something"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "something"))));
+}
+
+TEST(Rfc822TokenizerTest, EdgeCases) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+
+  // Text to trigger the scenario where you have a non-alphabetic followed
+  // by a \ followed by non alphabetic to end an in-address token.
+  std::string text = R"raw(<be.\&rg@google.com>)raw";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN,
+                      R"raw(<be.\&rg@google.com>)raw"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "be.\\&rg"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "be.\\&rg@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "be"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "rg"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
+
+  // A \ followed by an alphabetic shouldn't end the token.
+  text = "<a\\lex@google.com>";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "<a\\lex@google.com>"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "a\\lex"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "a\\lex@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "a\\lex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
+
+  // \\ or \" in a quoted section.
+  text = R"("al\\ex@goo\"<idk>gle.com")";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, R"(al\\ex@goo\"<idk>gle.com)"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "al"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "ex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "goo"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "idk"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "gle"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS,
+                      R"(al\\ex@goo\"<idk>gle.com)"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "al\\\\ex"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "goo\\\"<idk>gle.com"))));
+
+  text = "<alex@google.com";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "<alex@google.com"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
+}
+
+TEST(Rfc822TokenizerTest, NumberInAddress) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = "<3alex@google.com>";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "<3alex@google.com>"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "3alex"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "3alex@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "3alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"))));
+}
+
+TEST(Rfc822TokenizerTest, DoubleQuoteDoubleSlash) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = R"("alex\"")";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "alex"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "alex"))));
+
+  text = R"("alex\\\a")";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, R"(alex\\\a)"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "a"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, R"(alex\\\a)"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, R"(alex\\\a)"))));
+}
+
+TEST(Rfc822TokenizerTest, TwoEmails) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = "tjbarron@google.com alexsav@google.com";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "tjbarron@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "tjbarron"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "tjbarron@google.com"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "tjbarron"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "alexsav@google.com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alexsav"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "alexsav@google.com"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alexsav"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
+}
+
+TEST(Rfc822TokenizerTest, BackSlashes) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = R"("\name")";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "name"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "name"))));
+
+  text = R"("name@foo\@gmail")";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "name@foo\\@gmail"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "name"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "foo"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "gmail"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "name@foo\\@gmail"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "name"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "foo\\@gmail"))));
+}
+
+TEST(Rfc822TokenizerTest, BigWhitespace) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = "\"quoted\"              <address>";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, text),
+          EqualsToken(Token::Type::RFC822_NAME, "quoted"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "address"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "address"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"))));
+}
+
+TEST(Rfc822TokenizerTest, AtSignFirst) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = "\"@foo\"";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "foo"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "foo"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "foo"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "foo"))));
+}
+
+TEST(Rfc822TokenizerTest, SlashThenUnicode) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = R"("quoted\你cjk")";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "quoted\\你cjk"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST,
+                      "quoted\\你cjk"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "quoted\\你cjk"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "quoted\\你cjk"))));
+}
+
+TEST(Rfc822TokenizerTest, AddressEmptyAddress) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = "<address> <> Name";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, text),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "address"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "address"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "address"),
+          EqualsToken(Token::Type::RFC822_NAME, "Name"))));
+}
+
+TEST(Rfc822TokenizerTest, ProperComment) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = "(comment)alex@google.com";
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "comment)alex@google.com"),
+          EqualsToken(Token::Type::RFC822_COMMENT, "comment"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "google"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "com"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "alex@google.com"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
+          EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "google.com"))));
+}
+
+TEST(Rfc822TokenizerTest, SmallNameToEmail) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = "a@g.c,b@g.c";
+  EXPECT_THAT(rfc822_tokenizer.TokenizeAll(text),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::RFC822_TOKEN, "a@g.c"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "a"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "g"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "c"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS, "a@g.c"),
+                  EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "a"),
+                  EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.c"),
+                  EqualsToken(Token::Type::RFC822_TOKEN, "b@g.c"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "b"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "g"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "c"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS, "b@g.c"),
+                  EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "b"),
+                  EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.c"))));
+
+  text = "a\\\\@g.c";
+  EXPECT_THAT(rfc822_tokenizer.TokenizeAll(text),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::RFC822_TOKEN, "a\\\\@g.c"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "a"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "g"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_HOST, "c"),
+                  EqualsToken(Token::Type::RFC822_ADDRESS, "a\\\\@g.c"),
+                  EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "a"),
+                  EqualsToken(Token::Type::RFC822_HOST_ADDRESS, "g.c"))));
+}
+
+TEST(Rfc822TokenizerTest, AtSignLast) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string_view text("<alex@>, tim@");
+  EXPECT_THAT(
+      rfc822_tokenizer.TokenizeAll(text),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::Type::RFC822_TOKEN, "<alex@>"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "alex"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "alex@"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "alex"),
+          EqualsToken(Token::Type::RFC822_TOKEN, "tim"),
+          EqualsToken(Token::Type::RFC822_ADDRESS_COMPONENT_LOCAL, "tim"),
+          EqualsToken(Token::Type::RFC822_ADDRESS, "tim"),
+          EqualsToken(Token::Type::RFC822_LOCAL_ADDRESS, "tim"))));
+}
+
+TEST(Rfc822TokenizerTest, Commas) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = ",,,,,,,,,,,,,,,,,,,,,,,,,,;";
+  EXPECT_THAT(rfc822_tokenizer.TokenizeAll(text), IsOkAndHolds(IsEmpty()));
+}
+
+TEST(Rfc822TokenizerTest, ResetToTokenStartingAfter) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = "a@g.c,b@g.c";
+  auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
+  ASSERT_TRUE(token_iterator->Advance());
+  ASSERT_TRUE(token_iterator->Advance());
+
+  ASSERT_TRUE(token_iterator->ResetToTokenStartingAfter(-1));
+  EXPECT_THAT(token_iterator->GetTokens().at(0).text, "a@g.c");
+
+  ASSERT_TRUE(token_iterator->ResetToTokenStartingAfter(5));
+  EXPECT_THAT(token_iterator->GetTokens().at(0).text, "b@g.c");
+
+  ASSERT_FALSE(token_iterator->ResetToTokenStartingAfter(6));
+}
+
+TEST(Rfc822TokenizerTest, ResetToTokenEndingBefore) {
+  Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
+  std::string text = "a@g.c,b@g.c";
+  auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
+  token_iterator->Advance();
+
+  ASSERT_TRUE(token_iterator->ResetToTokenEndingBefore(5));
+  EXPECT_THAT(token_iterator->GetTokens().at(0).text, "a@g.c");
+
+  ASSERT_FALSE(token_iterator->ResetToTokenEndingBefore(4));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/tokenization/simple/space-language-segmenter-factory.cc b/icing/tokenization/simple/space-language-segmenter-factory.cc
deleted file mode 100644
index 1cca603..0000000
--- a/icing/tokenization/simple/space-language-segmenter-factory.cc
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/tokenization/language-segmenter-factory.h"
-#include "icing/tokenization/simple/space-language-segmenter.h"
-#include "icing/util/logging.h"
-
-namespace icing {
-namespace lib {
-
-namespace language_segmenter_factory {
-
-// Creates a language segmenter with the given locale.
-//
-// Returns:
-//   A LanguageSegmenter on success
-//   INVALID_ARGUMENT if locale string is invalid
-//
-// TODO(samzheng): Figure out if we want to verify locale strings and notify
-// users. Right now illegal locale strings will be ignored by ICU. ICU
-// components will be created with its default locale.
-libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create(
-    SegmenterOptions) {
-  return std::make_unique<SpaceLanguageSegmenter>();
-}
-
-}  // namespace language_segmenter_factory
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/tokenization/simple/space-language-segmenter.cc b/icing/tokenization/simple/space-language-segmenter.cc
deleted file mode 100644
index 7e301ec..0000000
--- a/icing/tokenization/simple/space-language-segmenter.cc
+++ /dev/null
@@ -1,205 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/tokenization/simple/space-language-segmenter.h"
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/absl_ports/canonical_errors.h"
-#include "icing/legacy/core/icing-string-util.h"
-#include "icing/util/status-macros.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-constexpr char kASCIISpace = ' ';
-}  // namespace
-
-class SpaceLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
- public:
-  SpaceLanguageSegmenterIterator(std::string_view text)
-      : text_(text), term_start_index_(0), term_end_index_exclusive_(0) {}
-
-  // Advances to the next term. Returns false if it has reached the end.
-  bool Advance() override {
-    if (term_end_index_exclusive_ >= text_.size() ||
-        term_start_index_ >= text_.size()) {
-      // Reached the end
-      return false;
-    }
-
-    // Next term starts where we left off.
-    term_start_index_ = term_end_index_exclusive_;
-
-    // We know a term is at least one length, so we can +1 first.
-    term_end_index_exclusive_++;
-
-    // We alternate terms between space and non-space. Figure out what type of
-    // term we're currently on so we know how to stop.
-    bool is_space = text_[term_start_index_] == kASCIISpace;
-
-    while (term_end_index_exclusive_ < text_.size()) {
-      bool end_is_space = text_[term_end_index_exclusive_] == kASCIISpace;
-      if (is_space != end_is_space) {
-        // We finally see a different type of character, reached the end.
-        break;
-      }
-      // We're still seeing the same types of characters (saw a space and
-      // still seeing spaces, or saw a non-space and still seeing non-spaces).
-      // Haven't reached the next term yet, keep advancing.
-      term_end_index_exclusive_++;
-    }
-
-    return true;
-  }
-
-  // Returns the current term. It can be called only when Advance() returns
-  // true.
-  std::string_view GetTerm() const override {
-    if (text_[term_start_index_] == kASCIISpace) {
-      // Rule: multiple continuous whitespaces are treated as one.
-      return std::string_view(&text_[term_start_index_], 1);
-    }
-    return text_.substr(term_start_index_,
-                        term_end_index_exclusive_ - term_start_index_);
-  }
-
-  libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfter(
-      int32_t offset) override {
-    if (offset < 0) {
-      // Start over from the beginning to find the first term.
-      term_start_index_ = 0;
-      term_end_index_exclusive_ = 0;
-    } else {
-      // Offset points to a term right now. Advance to get past the current
-      // term.
-      term_end_index_exclusive_ = offset;
-      if (!Advance()) {
-        return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
-            "No term found in '%s' that starts after offset %d",
-            std::string(text_).c_str(), offset));
-      }
-    }
-
-    // Advance again so we can point to the next term.
-    if (!Advance()) {
-      return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
-          "No term found in '%s' that starts after offset %d",
-          std::string(text_).c_str(), offset));
-    }
-
-    return term_start_index_;
-  }
-
-  libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBefore(
-      int32_t offset) override {
-    if (offset <= 0 || offset > text_.size()) {
-      return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
-          "No term found in '%s' that ends before offset %d",
-          std::string(text_).c_str(), offset));
-    }
-
-    if (offset == text_.size()) {
-      // Special-case if the offset is the text length, this is the last term in
-      // the text, which is also considered to be "ending before" the offset.
-      term_end_index_exclusive_ = offset;
-      ICING_ASSIGN_OR_RETURN(term_start_index_, GetTermStartingBefore(offset));
-      return term_start_index_;
-    }
-
-    // Otherwise, this is just the end of the previous term and we still need to
-    // find the start of the previous term.
-    ICING_ASSIGN_OR_RETURN(term_end_index_exclusive_,
-                           GetTermStartingBefore(offset));
-
-    if (term_end_index_exclusive_ == 0) {
-      // The current term starts at the beginning of the underlying text_.
-      // There is no term before this.
-      return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
-          "No term found in '%s' that ends before offset %d",
-          std::string(text_).c_str(), offset));
-    }
-
-    // Reset ourselves to find the term before the end.
-    ICING_ASSIGN_OR_RETURN(
-        term_start_index_,
-        GetTermStartingBefore(term_end_index_exclusive_ - 1));
-    return term_start_index_;
-  }
-
-  libtextclassifier3::StatusOr<int32_t> ResetToStart() override {
-    term_start_index_ = 0;
-    term_end_index_exclusive_ = 0;
-    if (!Advance()) {
-      return absl_ports::NotFoundError("");
-    }
-    return term_start_index_;
-  }
-
- private:
-  // Return the start offset of the term starting right before the given offset.
-  libtextclassifier3::StatusOr<int32_t> GetTermStartingBefore(int32_t offset) {
-    bool is_space = text_[offset] == kASCIISpace;
-
-    // Special-case that if offset was the text length, then we're already at
-    // the "end" of our current term.
-    if (offset == text_.size()) {
-      is_space = text_[--offset] == kASCIISpace;
-    }
-
-    // While it's the same type of character (space vs non-space), we're in the
-    // same term. So keep iterating backwards until we see a change.
-    while (offset >= 0 && (text_[offset] == kASCIISpace) == is_space) {
-      --offset;
-    }
-
-    // +1 is because offset was off-by-one to exit the while-loop.
-    return ++offset;
-  }
-
-  // Text to be segmented
-  std::string_view text_;
-
-  // The start and end indices are used to track the positions of current
-  // term.
-  int term_start_index_;
-  int term_end_index_exclusive_;
-};
-
-libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
-SpaceLanguageSegmenter::Segment(const std::string_view text) const {
-  return std::make_unique<SpaceLanguageSegmenterIterator>(text);
-}
-
-libtextclassifier3::StatusOr<std::vector<std::string_view>>
-SpaceLanguageSegmenter::GetAllTerms(const std::string_view text) const {
-  ICING_ASSIGN_OR_RETURN(std::unique_ptr<LanguageSegmenter::Iterator> iterator,
-                         Segment(text));
-  std::vector<std::string_view> terms;
-  while (iterator->Advance()) {
-    terms.push_back(iterator->GetTerm());
-  }
-  return terms;
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/tokenization/simple/space-language-segmenter.h b/icing/tokenization/simple/space-language-segmenter.h
deleted file mode 100644
index de0a6d3..0000000
--- a/icing/tokenization/simple/space-language-segmenter.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TOKENIZATION_SIMPLE_SPACE_LANGUAGE_SEGMENTER_H_
-#define ICING_TOKENIZATION_SIMPLE_SPACE_LANGUAGE_SEGMENTER_H_
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <string_view>
-#include <vector>
-
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/tokenization/language-segmenter.h"
-
-namespace icing {
-namespace lib {
-
-// Simple segmenter that splits on spaces, regardless of language. Continuous
-// whitespaces will be returned as a single whitespace character.
-class SpaceLanguageSegmenter : public LanguageSegmenter {
- public:
-  SpaceLanguageSegmenter() = default;
-  SpaceLanguageSegmenter(const SpaceLanguageSegmenter&) = delete;
-  SpaceLanguageSegmenter& operator=(const SpaceLanguageSegmenter&) = delete;
-
-  // Segmentation is based purely on whitespace; does not take into account the
-  // language of the text.
-  //
-  // Returns:
-  //   An iterator of terms on success
-  libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
-  Segment(std::string_view text) const override;
-
-  // Does not take into account the language of the text.
-  //
-  // Returns:
-  //   A list of terms on success
-  //   INTERNAL_ERROR if any error occurs
-  libtextclassifier3::StatusOr<std::vector<std::string_view>> GetAllTerms(
-      std::string_view text) const override;
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_TOKENIZATION_SIMPLE_SPACE_LANGUAGE_SEGMENTER_H_
diff --git a/icing/tokenization/simple/space-language-segmenter_test.cc b/icing/tokenization/simple/space-language-segmenter_test.cc
deleted file mode 100644
index 8ed38b2..0000000
--- a/icing/tokenization/simple/space-language-segmenter_test.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "icing/absl_ports/str_cat.h"
-#include "icing/testing/common-matchers.h"
-#include "icing/tokenization/language-segmenter-factory.h"
-#include "icing/tokenization/language-segmenter.h"
-
-namespace icing {
-namespace lib {
-namespace {
-
-using ::testing::ElementsAre;
-using ::testing::Eq;
-using ::testing::IsEmpty;
-
-TEST(SpaceLanguageSegmenterTest, EmptyText) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
-  EXPECT_THAT(language_segmenter->GetAllTerms(""), IsOkAndHolds(IsEmpty()));
-}
-
-TEST(SpaceLanguageSegmenterTest, SimpleText) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
-  EXPECT_THAT(language_segmenter->GetAllTerms("Hello World"),
-              IsOkAndHolds(ElementsAre("Hello", " ", "World")));
-}
-
-TEST(SpaceLanguageSegmenterTest, Punctuation) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
-
-  EXPECT_THAT(language_segmenter->GetAllTerms("Hello, World!!!"),
-              IsOkAndHolds(ElementsAre("Hello,", " ", "World!!!")));
-  EXPECT_THAT(language_segmenter->GetAllTerms("Open-source project"),
-              IsOkAndHolds(ElementsAre("Open-source", " ", "project")));
-  EXPECT_THAT(language_segmenter->GetAllTerms("100%"),
-              IsOkAndHolds(ElementsAre("100%")));
-  EXPECT_THAT(language_segmenter->GetAllTerms("(A&B)"),
-              IsOkAndHolds(ElementsAre("(A&B)")));
-}
-
-TEST(SpaceLanguageSegmenterTest, Alphanumeric) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
-
-  // Alphanumeric terms are allowed
-  EXPECT_THAT(language_segmenter->GetAllTerms("Se7en A4 3a"),
-              IsOkAndHolds(ElementsAre("Se7en", " ", "A4", " ", "3a")));
-}
-
-TEST(SpaceLanguageSegmenterTest, Number) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
-
-  // Alphanumeric terms are allowed
-  EXPECT_THAT(
-      language_segmenter->GetAllTerms("3.141592653589793238462643383279"),
-      IsOkAndHolds(ElementsAre("3.141592653589793238462643383279")));
-
-  EXPECT_THAT(language_segmenter->GetAllTerms("3,456.789"),
-              IsOkAndHolds(ElementsAre("3,456.789")));
-
-  EXPECT_THAT(language_segmenter->GetAllTerms("-123"),
-              IsOkAndHolds(ElementsAre("-123")));
-}
-
-TEST(SpaceLanguageSegmenterTest, ContinuousWhitespaces) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
-
-  // Multiple continuous whitespaces are treated as one.
-  const int kNumSeparators = 256;
-  const std::string text_with_spaces =
-      absl_ports::StrCat("Hello", std::string(kNumSeparators, ' '), "World");
-  EXPECT_THAT(language_segmenter->GetAllTerms(text_with_spaces),
-              IsOkAndHolds(ElementsAre("Hello", " ", "World")));
-}
-
-TEST(SpaceLanguageSegmenterTest, NotCopyStrings) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
-                             language_segmenter_factory::Create());
-  // Validates that the input strings are not copied
-  const std::string text = "Hello World";
-  const char* word1_address = text.c_str();
-  const char* word2_address = text.c_str() + 6;
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<std::string_view> terms,
-                             language_segmenter->GetAllTerms(text));
-  ASSERT_THAT(terms, ElementsAre("Hello", " ", "World"));
-  const char* word1_result_address = terms.at(0).data();
-  const char* word2_result_address = terms.at(2).data();
-
-  // The underlying char* should be the same
-  EXPECT_THAT(word1_address, Eq(word1_result_address));
-  EXPECT_THAT(word2_address, Eq(word2_result_address));
-}
-
-}  // namespace
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/tokenization/token.h b/icing/tokenization/token.h
index 0bb3aaf..05d6fe4 100644
--- a/icing/tokenization/token.h
+++ b/icing/tokenization/token.h
@@ -20,16 +20,25 @@
 namespace icing {
 namespace lib {
 
-// TODO(samzheng) Add group id support if needed. Right now in raw query we
-// don't need group ids since all our query operators (OR, Exclusion, Property
-// Restriction) only apply to the token right after them (vs. applying to
-// multiple tokens after them). The "groups" of tokens can be easily recognized.
 struct Token {
-  enum Type {
+  enum class Type {
     // Common types
     REGULAR,  // A token without special meanings, the value of it will be
               // indexed or searched directly
 
+    VERBATIM,  // A token that should be indexed and searched without any
+               // modifications to the raw text
+
+    // An RFC822 section with the content in RFC822_TOKEN tokenizes as follows:
+    RFC822_NAME,                     // "User", "Johnsson"
+    RFC822_COMMENT,                  // "A", "comment", "here"
+    RFC822_LOCAL_ADDRESS,            // "user.name"
+    RFC822_HOST_ADDRESS,             // "domain.name.com"
+    RFC822_ADDRESS,                  // "user.name@domain.name.com"
+    RFC822_ADDRESS_COMPONENT_LOCAL,  // "user", "name",
+    RFC822_ADDRESS_COMPONENT_HOST,   // "domain", "name", "com"
+    RFC822_TOKEN,  // "User Johnsson (A comment) <user.name@domain.name.com>"
+
     // Types only used in raw query
     QUERY_OR,         // Indicates OR logic between its left and right tokens
     QUERY_EXCLUSION,  // Indicates exclusion operation on next token
@@ -37,6 +46,20 @@ struct Token {
     QUERY_LEFT_PARENTHESES,   // Left parentheses
     QUERY_RIGHT_PARENTHESES,  // Right parentheses
 
+    // Types used in URL tokenization
+    URL_SCHEME,  // "http", "https", "ftp", "content"
+    URL_USERNAME,
+    URL_PASSWORD,
+    URL_HOST_COMMON_PART,  // Hosts are split into two types, common and
+                           // significant. Common are e.g: www, ww2, .com, etc.
+    URL_HOST_SIGNIFICANT_PART,
+    URL_PORT,
+    URL_PATH_PART,  // Tokenized path, e.g. /abc-d/e.fg-> [abc-d], [e.fg]
+    URL_QUERY,      // After ?, before #, e.g. "param1=value-1&param2=value-2
+    URL_REF,        // Anything after #. Could be anything
+    URL_SUFFIX,
+    URL_SUFFIX_INNERMOST,
+
     // Indicates errors
     INVALID,
   };
@@ -46,10 +69,10 @@ struct Token {
       : type(type_in), text(text_in) {}
 
   // The type of token
-  const Type type;
+  Type type;
 
   // The content of token
-  const std::string_view text;
+  std::string_view text;
 };
 
 }  // namespace lib
diff --git a/icing/tokenization/tokenizer-factory.cc b/icing/tokenization/tokenizer-factory.cc
index 9ebbce5..d120ac8 100644
--- a/icing/tokenization/tokenizer-factory.cc
+++ b/icing/tokenization/tokenizer-factory.cc
@@ -22,7 +22,14 @@
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/tokenization/plain-tokenizer.h"
 #include "icing/tokenization/raw-query-tokenizer.h"
+#include "icing/tokenization/rfc822-tokenizer.h"
 #include "icing/tokenization/tokenizer.h"
+
+#ifdef ENABLE_URL_TOKENIZER
+#include "icing/tokenization/url-tokenizer.h"
+#endif  // ENABLE_URL_TOKENIZER
+
+#include "icing/tokenization/verbatim-tokenizer.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
@@ -31,14 +38,24 @@ namespace lib {
 namespace tokenizer_factory {
 
 libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>>
-CreateIndexingTokenizer(IndexingConfig::TokenizerType::Code type,
+CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type,
                         const LanguageSegmenter* lang_segmenter) {
   ICING_RETURN_ERROR_IF_NULL(lang_segmenter);
 
   switch (type) {
-    case IndexingConfig::TokenizerType::PLAIN:
+    case StringIndexingConfig::TokenizerType::PLAIN:
       return std::make_unique<PlainTokenizer>(lang_segmenter);
-    case IndexingConfig::TokenizerType::NONE:
+    case StringIndexingConfig::TokenizerType::VERBATIM:
+      return std::make_unique<VerbatimTokenizer>();
+    case StringIndexingConfig::TokenizerType::RFC822:
+      return std::make_unique<Rfc822Tokenizer>();
+// TODO (b/246964044): remove ifdef guard when url-tokenizer is ready for export
+// to Android.
+#ifdef ENABLE_URL_TOKENIZER
+    case StringIndexingConfig::TokenizerType::URL:
+      return std::make_unique<UrlTokenizer>();
+#endif  // ENABLE_URL_TOKENIZER
+    case StringIndexingConfig::TokenizerType::NONE:
       [[fallthrough]];
     default:
       // This should never happen.
diff --git a/icing/tokenization/tokenizer-factory.h b/icing/tokenization/tokenizer-factory.h
index f81fd96..8b9226d 100644
--- a/icing/tokenization/tokenizer-factory.h
+++ b/icing/tokenization/tokenizer-factory.h
@@ -37,7 +37,7 @@ namespace tokenizer_factory {
 //   FAILED_PRECONDITION on any null pointer input
 //   INVALID_ARGUMENT if tokenizer type is invalid
 libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>>
-CreateIndexingTokenizer(IndexingConfig::TokenizerType::Code type,
+CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type,
                         const LanguageSegmenter* lang_segmenter);
 
 // All the supported query tokenizer types
diff --git a/icing/tokenization/tokenizer.h b/icing/tokenization/tokenizer.h
index 38c4745..fb7613f 100644
--- a/icing/tokenization/tokenizer.h
+++ b/icing/tokenization/tokenizer.h
@@ -18,9 +18,12 @@
 #include <cstdint>
 #include <memory>
 #include <string_view>
+#include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
 #include "icing/tokenization/token.h"
+#include "icing/util/character-iterator.h"
 
 namespace icing {
 namespace lib {
@@ -38,14 +41,6 @@ class Tokenizer {
  public:
   virtual ~Tokenizer() = default;
 
-  enum Type {
-    // Index tokenizers
-    PLAIN,  // Used to tokenize plain text input
-
-    // Query tokenizers
-    RAW_QUERY,  // Used to tokenize raw queries
-  };
-
   // An iterator helping to get tokens.
   // Example usage:
   //
@@ -60,31 +55,48 @@ class Tokenizer {
     // Advances to the next token. Returns false if it has reached the end.
     virtual bool Advance() = 0;
 
-    // Returns the current token. It can be called only when Advance() returns
-    // true, otherwise an invalid token could be returned.
-    virtual Token GetToken() const = 0;
+    // Returns the current token, maybe with compound tokens as well. It can be
+    // called only when Advance() returns true, otherwise an empty Token vector
+    // may be returned.
+    virtual std::vector<Token> GetTokens() const = 0;
+
+    virtual libtextclassifier3::StatusOr<CharacterIterator>
+    CalculateTokenStart() {
+      return absl_ports::UnimplementedError(
+          "CalculateTokenStart is not implemented!");
+    }
+
+    virtual libtextclassifier3::StatusOr<CharacterIterator>
+    CalculateTokenEndExclusive() {
+      return absl_ports::UnimplementedError(
+          "CalculateTokenEndExclusive is not implemented!");
+    }
 
     // Sets the tokenizer to point at the first token that *starts* *after*
     // offset. Returns false if there are no valid tokens starting after
     // offset.
     // Ex.
     // auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie();
-    // iterator.ResetToTokenAfter(4);
+    // iterator.ResetToTokenStartingAfter(4);
     // // The first full token starting after position 4 (the 'b' in "bar") is
     // // "baz".
     // PrintToken(iterator.GetToken());  // prints "baz"
-    virtual bool ResetToTokenAfter(int32_t offset) { return false; }
+    virtual bool ResetToTokenStartingAfter(int32_t utf32_offset) {
+      return false;
+    }
 
     // Sets the tokenizer to point at the first token that *ends* *before*
     // offset. Returns false if there are no valid tokens ending
     // before offset.
     // Ex.
     // auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie();
-    // iterator.ResetToTokenBefore(4);
+    // iterator.ResetToTokenEndingBefore(4);
     // // The first full token ending before position 4 (the 'b' in "bar") is
     // // "foo".
     // PrintToken(iterator.GetToken());  // prints "foo"
-    virtual bool ResetToTokenBefore(int32_t offset) { return false; }
+    virtual bool ResetToTokenEndingBefore(int32_t utf32_offset) {
+      return false;
+    }
 
     virtual bool ResetToStart() { return false; }
   };
diff --git a/icing/tokenization/verbatim-tokenizer.cc b/icing/tokenization/verbatim-tokenizer.cc
new file mode 100644
index 0000000..9ca611d
--- /dev/null
+++ b/icing/tokenization/verbatim-tokenizer.cc
@@ -0,0 +1,144 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tokenization/verbatim-tokenizer.h"
+
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/util/character-iterator.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+class VerbatimTokenIterator : public Tokenizer::Iterator {
+ public:
+  explicit VerbatimTokenIterator(std::string_view text)
+      : term_(std::move(text)) {}
+
+  bool Advance() override {
+    if (term_.empty() || has_advanced_to_end_) {
+      return false;
+    }
+
+    has_advanced_to_end_ = true;
+    return true;
+  }
+
+  std::vector<Token> GetTokens() const override {
+    std::vector<Token> result;
+
+    if (!term_.empty() && has_advanced_to_end_) {
+      result.push_back(Token(Token::Type::VERBATIM, term_));
+    }
+
+    return result;
+  }
+
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenStart()
+      override {
+    if (term_.empty()) {
+      return absl_ports::AbortedError(
+          "Could not calculate start of empty token.");
+    }
+
+    return CharacterIterator(term_, 0, 0, 0);
+  }
+
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenEndExclusive()
+      override {
+    if (term_.empty()) {
+      return absl_ports::AbortedError(
+          "Could not calculate end of empty token.");
+    }
+
+    if (token_end_iterator_.utf8_index() >= 0) {
+      return token_end_iterator_;
+    }
+
+    bool moved_to_token_end = token_end_iterator_.MoveToUtf8(term_.length());
+    if (moved_to_token_end) {
+      return token_end_iterator_;
+    } else {
+      return absl_ports::AbortedError("Could not move to end of token.");
+    }
+  }
+
+  bool ResetToTokenStartingAfter(int32_t utf32_offset) override {
+    // We can only reset to the sole verbatim token, so we must have a negative
+    // offset for it to be considered the token after.
+    if (utf32_offset < 0) {
+      // Because we are now at the sole verbatim token, we should ensure we can
+      // no longer advance past it.
+      has_advanced_to_end_ = true;
+      return true;
+    }
+    return false;
+  }
+
+  bool ResetToTokenEndingBefore(int32_t utf32_offset) override {
+    // We can only reset to the sole verbatim token, so we must have an offset
+    // after the end of the token for the reset to be valid. This means the
+    // provided utf-32 offset must be equal to or greater than the utf-32 length
+    // of the token.
+    if (token_end_iterator_.utf8_index() < 0) {
+      // Moves one index past the end of the term.
+      bool moved_to_token_end = token_end_iterator_.MoveToUtf8(term_.length());
+      if (!moved_to_token_end) {
+        // We're unable to reset as we failed to move to the end of the term.
+        return false;
+      }
+    }
+
+    if (utf32_offset >= token_end_iterator_.utf32_index()) {
+      // Because we are now at the sole verbatim token, we should ensure we can
+      // no longer advance past it.
+      has_advanced_to_end_ = true;
+      return true;
+    }
+    return false;
+  }
+
+  bool ResetToStart() override {
+    has_advanced_to_end_ = true;
+    return true;
+  }
+
+ private:
+  std::string_view term_;
+  CharacterIterator token_end_iterator_ = CharacterIterator(term_, -1, -1, -1);
+  // Used to determine whether we have advanced on the sole verbatim token
+  bool has_advanced_to_end_ = false;
+};
+
+libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>>
+VerbatimTokenizer::Tokenize(std::string_view text) const {
+  return std::make_unique<VerbatimTokenIterator>(text);
+}
+
+libtextclassifier3::StatusOr<std::vector<Token>> VerbatimTokenizer::TokenizeAll(
+    std::string_view text) const {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
+                         Tokenize(text));
+  std::vector<Token> tokens;
+  while (iterator->Advance()) {
+    std::vector<Token> batch = iterator->GetTokens();
+    tokens.insert(tokens.end(), batch.begin(), batch.end());
+  }
+  return tokens;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/tokenization/verbatim-tokenizer.h b/icing/tokenization/verbatim-tokenizer.h
new file mode 100644
index 0000000..8404cf1
--- /dev/null
+++ b/icing/tokenization/verbatim-tokenizer.h
@@ -0,0 +1,41 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TOKENIZATION_VERBATIM_H_
+#define ICING_TOKENIZATION_VERBATIM_H_
+
+#include <memory>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/tokenization/tokenizer.h"
+
+namespace icing {
+namespace lib {
+
+// Provides verbatim tokenization on input text
+class VerbatimTokenizer : public Tokenizer {
+ public:
+  libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize(
+      std::string_view text) const override;
+
+  libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll(
+      std::string_view text) const override;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TOKENIZATION_VERBATIM_H_
diff --git a/icing/tokenization/verbatim-tokenizer_test.cc b/icing/tokenization/verbatim-tokenizer_test.cc
new file mode 100644
index 0000000..bae69ff
--- /dev/null
+++ b/icing/tokenization/verbatim-tokenizer_test.cc
@@ -0,0 +1,210 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string_view>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/portable/platform.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/token.h"
+#include "icing/tokenization/tokenizer-factory.h"
+#include "icing/util/character-iterator.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+namespace {
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+class VerbatimTokenizerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    jni_cache_ = GetTestJniCache();
+    language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                         jni_cache_.get());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        language_segmenter_,
+        language_segmenter_factory::Create(std::move(options)));
+  }
+
+  std::unique_ptr<const JniCache> jni_cache_;
+  std::unique_ptr<LanguageSegmenter> language_segmenter_;
+};
+
+TEST_F(VerbatimTokenizerTest, Empty) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  EXPECT_THAT(verbatim_tokenizer->TokenizeAll(""), IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(VerbatimTokenizerTest, Simple) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  EXPECT_THAT(
+      verbatim_tokenizer->TokenizeAll("foo bar"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::VERBATIM, "foo bar"))));
+}
+
+TEST_F(VerbatimTokenizerTest, Punctuation) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  EXPECT_THAT(verbatim_tokenizer->TokenizeAll("Hello, world!"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::VERBATIM, "Hello, world!"))));
+}
+
+TEST_F(VerbatimTokenizerTest, NoTokensBeforeAdvancing) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
+
+  // We should get no tokens if we get the token before advancing.
+  EXPECT_THAT(token_iterator->GetTokens(), IsEmpty());
+}
+
+TEST_F(VerbatimTokenizerTest, ResetToTokenEndingBefore) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
+
+  // Reset to beginning of verbatim of token. We provide an offset of 13 as it
+  // is larger than the final index (12) of the verbatim token.
+  EXPECT_TRUE(token_iterator->ResetToTokenEndingBefore(13));
+  EXPECT_THAT(token_iterator->GetTokens(),
+              ElementsAre(EqualsToken(Token::Type::VERBATIM, "Hello, world!")));
+
+  // Ensure our cached character iterator propertly maintains the end of the
+  // verbatim token.
+  EXPECT_TRUE(token_iterator->ResetToTokenEndingBefore(13));
+  EXPECT_THAT(token_iterator->GetTokens(),
+              ElementsAre(EqualsToken(Token::Type::VERBATIM, "Hello, world!")));
+
+  // We should not be able to reset with an offset before or within
+  // the verbatim token's utf-32 length.
+  EXPECT_FALSE(token_iterator->ResetToTokenEndingBefore(0));
+  EXPECT_FALSE(token_iterator->ResetToTokenEndingBefore(12));
+}
+
+TEST_F(VerbatimTokenizerTest, ResetToTokenStartingAfter) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
+
+  // Get token without resetting
+  EXPECT_TRUE(token_iterator->Advance());
+  EXPECT_THAT(token_iterator->GetTokens(),
+              ElementsAre(EqualsToken(Token::Type::VERBATIM, "Hello, world!")));
+
+  // We expect a sole verbatim token, so it's not possible to reset after the
+  // start of the token.
+  EXPECT_FALSE(token_iterator->ResetToTokenStartingAfter(1));
+
+  // We expect to be reset to the sole verbatim token when the offset is
+  // negative.
+  EXPECT_TRUE(token_iterator->ResetToTokenStartingAfter(-1));
+  EXPECT_THAT(token_iterator->GetTokens(),
+              ElementsAre(EqualsToken(Token::Type::VERBATIM, "Hello, world!")));
+}
+
+TEST_F(VerbatimTokenizerTest, ResetToStart) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
+
+  // Get token without resetting
+  EXPECT_TRUE(token_iterator->Advance());
+  EXPECT_THAT(token_iterator->GetTokens(),
+              ElementsAre(EqualsToken(Token::Type::VERBATIM, "Hello, world!")));
+
+  // Retrieve token again after resetting to start
+  EXPECT_TRUE(token_iterator->ResetToStart());
+  EXPECT_THAT(token_iterator->GetTokens(),
+              ElementsAre(EqualsToken(Token::Type::VERBATIM, "Hello, world!")));
+}
+
+TEST_F(VerbatimTokenizerTest, CalculateTokenStart) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
+
+  ICING_ASSERT_OK_AND_ASSIGN(CharacterIterator start_character_iterator,
+                             token_iterator->CalculateTokenStart());
+
+  // We should retrieve the character 'H', the first character of the token.
+  EXPECT_THAT(start_character_iterator.GetCurrentChar(), Eq('H'));
+}
+
+TEST_F(VerbatimTokenizerTest, CalculateTokenEnd) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
+
+  ICING_ASSERT_OK_AND_ASSIGN(CharacterIterator end_character_iterator,
+                             token_iterator->CalculateTokenEndExclusive());
+
+  // We should retrieve the the null character, as the returned character
+  // iterator will be set one past the end of the token.
+  EXPECT_THAT(end_character_iterator.GetCurrentChar(), Eq('\0'));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/tools/document-store-dump.cc b/icing/tools/document-store-dump.cc
deleted file mode 100644
index 45c9bf5..0000000
--- a/icing/tools/document-store-dump.cc
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/tools/document-store-dump.h"
-
-#include <cinttypes>
-
-#include "icing/absl_ports/str_cat.h"
-#include "icing/legacy/core/icing-string-util.h"
-#include "icing/util/logging.h"
-
-namespace icing {
-namespace lib {
-namespace {
-
-void AppendDocumentProto(DocId document_id, const Document& doc,
-                         std::string* output) {
-  absl_ports::StrAppend(
-      output, IcingStringUtil::StringPrintf(
-                  "Document {\n   document_id: %d\n  corpus_id: %d\n  uri: "
-                  "'%s'\n  score: %d\n  created_timestamp_ms: %" PRIu64 "\n",
-                  static_cast<int>(document_id), doc.corpus_id(),
-                  doc.uri().c_str(), static_cast<int>(doc.score()),
-                  static_cast<int64_t>(doc.created_timestamp_ms())));
-  for (const auto& section : doc.sections()) {
-    absl_ports::StrAppend(
-        output, IcingStringUtil::StringPrintf(
-                    "  section {\n    id: %d\n    indexed_length: "
-                    "%d\n    content: '%s'\n    snippet: '%s'\n",
-                    static_cast<int>(section.id()),
-                    static_cast<int>(section.indexed_length()),
-                    section.content().c_str(), section.snippet().c_str()));
-    for (int64_t extracted_number : section.extracted_numbers()) {
-      absl_ports::StrAppend(output, IcingStringUtil::StringPrintf(
-                                        "    extracted_numbers: %" PRId64 "\n",
-                                        extracted_number));
-    }
-    for (const std::string& annotation_token : section.annotation_tokens()) {
-      absl_ports::StrAppend(
-          output, IcingStringUtil::StringPrintf("    annotation_tokens: '%s'\n",
-                                                annotation_token.c_str()));
-    }
-    std::string indexed = (section.config().indexed()) ? "true" : "false";
-    std::string index_prefixes =
-        (section.config().index_prefixes()) ? "true" : "false";
-    absl_ports::StrAppend(
-        output,
-        IcingStringUtil::StringPrintf(
-            "    config {\n      name: '%s'\n      indexed: %s\n      "
-            "tokenizer: %d\n      weight: %d\n      index_prefixes: %s\n      "
-            "subsection_separator: '%s'\n",
-            section.config().name().c_str(), indexed.c_str(),
-            section.config().tokenizer(),
-            static_cast<int>(section.config().weight()), index_prefixes.c_str(),
-            section.config().subsection_separator().c_str()));
-    for (const auto& variant_generator :
-         section.config().variant_generators()) {
-      absl_ports::StrAppend(
-          output, IcingStringUtil::StringPrintf(
-                      "      variant_generators: %d\n", variant_generator));
-    }
-    absl_ports::StrAppend(
-        output,
-        IcingStringUtil::StringPrintf(
-            "      common_term_legacy_hit_score: %d\n      "
-            "rfc822_host_name_term_legacy_hit_score: %d\n      "
-            "semantic_property: '%s'\n      universal_section_id: %d\n      "
-            "omnibox_section_type: %d\n      st_section_type: %d\n    }\n  }\n",
-            section.config().common_term_legacy_hit_score(),
-            section.config().rfc822_host_name_term_legacy_hit_score(),
-            section.config().semantic_property().c_str(),
-            section.config().universal_section_id(),
-            section.config().omnibox_section_type(),
-            section.config().st_section_type()));
-  }
-  for (const auto& language : doc.languages()) {
-    std::string used_classifier =
-        (language.used_classifier()) ? "true" : "false";
-    absl_ports::StrAppend(
-        output, IcingStringUtil::StringPrintf(
-                    "  languages {\n    language: %d\n    score: %d\n    "
-                    "used_classifier: %s\n  }\n",
-                    language.language(), static_cast<int>(language.score()),
-                    used_classifier.c_str()));
-  }
-  absl_ports::StrAppend(
-      output, IcingStringUtil::StringPrintf(
-                  " ANNOTATIONS PRINTING NOT IMPLEMENTED YET IN ICING-TOOL\n"));
-}
-
-}  // namespace
-
-std::string GetDocumentStoreDump(const DocumentStore& document_store) {
-  std::string output;
-  for (DocId document_id = 0; document_id < document_store.num_documents();
-       document_id++) {
-    Document doc;
-    if (!document_store.ReadDocument(document_id, &doc)) {
-      ICING_LOG(FATAL) << "Failed to read document";
-    }
-
-    AppendDocumentProto(document_id, doc, &output);
-  }
-  return output;
-}
-
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/tools/document-store-dump.h b/icing/tools/document-store-dump.h
deleted file mode 100644
index 023b301..0000000
--- a/icing/tools/document-store-dump.h
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TOOLS_DOCUMENT_STORE_DUMP_H_
-#define ICING_TOOLS_DOCUMENT_STORE_DUMP_H_
-
-#include <string>
-
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/document-store.h"
-
-namespace icing {
-namespace lib {
-
-// Utility function for dumping the complete document store content.
-// This provides a human-readable representation of the document store, mainly
-// provided for easier understandability for developers.
-// The output of this class should only be available on cmdline-tool-level
-// (with root access), or unit tests. In other words it should not be possible
-// to trigger this on a release key device, for data protection reasons.
-std::string GetDocumentStoreDump(const DocumentStore& document_store);
-
-}  // namespace lib
-}  // namespace icing
-#endif  // ICING_TOOLS_DOCUMENT_STORE_DUMP_H_
diff --git a/icing/tools/icing-tool.cc b/icing/tools/icing-tool.cc
deleted file mode 100644
index 72a11e9..0000000
--- a/icing/tools/icing-tool.cc
+++ /dev/null
@@ -1,306 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Copyright 2012 Google Inc. All Rights Reserved.
-// Author: ulas@google.com (Ulas Kirazci)
-//
-// A tool to debug the native index.
-
-#include <getopt.h>
-#include <unistd.h>
-
-#include <string>
-
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/core/string-util.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/doc-property-filter.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/document-store.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/dynamic-trie.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/filesystem.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/mobstore.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/native-index-impl.h"
-#include "icing/absl_ports/str_cat.h"
-#include "icing/legacy/core/icing-string-util.h"
-#include "icing/tools/document-store-dump.h"
-#include "icing/util/logging.h"
-
-using std::vector;
-using ::wireless_android_play_playlog::icing::IndexRestorationStats;
-
-namespace icing {
-namespace lib {
-
-// 256KB for debugging.
-const size_t kMaxDocumentSizeForDebugging = 1u << 18;
-// Dump dynamic trie stats and contents.
-void ProcessDynamicTrie(const char* filename) {
-  Filesystem filesystem;
-  DynamicTrie trie(filename, DynamicTrie::RuntimeOptions(), &filesystem);
-  if (!trie.Init()) {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Opening trie %s failed",
-                                                      filename);
-    return;
-  }
-
-  std::string out;
-  trie.GetDebugInfo(true, &out);
-  printf("Stats:\n%s", out.c_str());
-
-  std::ostringstream contents;
-  vector<std::string> keys;
-  trie.DumpTrie(&contents, &keys);
-  printf("Contents:\n%s", contents.str().c_str());
-}
-
-NativeIndexImpl* MakeIndex(const char* root_dir) {
-  NativeConfig native_config;
-  native_config.set_max_document_size(kMaxDocumentSizeForDebugging);
-  FlashIndexOptions flash_index_options(
-      NativeIndexImpl::GetNativeIndexDir(root_dir));
-  NativeIndexImpl* ni =
-      new NativeIndexImpl(root_dir, native_config, flash_index_options);
-  InitStatus init_status;
-  if (!ni->Init(&init_status)) {
-    ICING_LOG(FATAL) << "Failed to initialize legacy native index impl";
-  }
-
-  IndexRestorationStats unused;
-  ni->RestoreIndex(IndexRequestSpec::default_instance(), &unused);
-  return ni;
-}
-
-void RunQuery(NativeIndexImpl* ni, const std::string& query, int start,
-              int num_results) {
-  // Pull out corpusids and uris.
-  QueryRequestSpec spec;
-  spec.set_no_corpus_filter(true);
-  spec.set_want_uris(true);
-  spec.set_scoring_verbosity_level(1);
-  spec.set_prefix_match(true);
-
-  QueryResponse response;
-  ni->ExecuteQuery(query, spec, 10000, start, num_results, &response);
-
-  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-      "Query [%s] num results %u", query.c_str(), response.num_results());
-
-  for (int i = 0, uri_offset = 0; i < response.num_results(); i++) {
-    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-        "%d: (cid=%u) uri %.*s", i, response.corpus_ids(i),
-        response.uri_lengths(i), response.uri_buffer().data() + uri_offset);
-    uri_offset += response.uri_lengths(i);
-  }
-}
-
-void RunSuggest(NativeIndexImpl* ni, const std::string& prefix,
-                int num_results) {
-  SuggestionResponse results;
-  ni->Suggest(prefix, num_results, vector<CorpusId>(), &results);
-
-  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-      "Query [%s] num results %zu", prefix.c_str(),
-      static_cast<size_t>(results.suggestions_size()));
-
-  for (size_t i = 0; i < results.suggestions_size(); i++) {
-    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-        "Sugg: [%s] display text [%s]", results.suggestions(i).query().c_str(),
-        results.suggestions(i).display_text().c_str());
-  }
-}
-
-int IcingTool(int argc, char** argv) {
-  auto file_storage = CreatePosixFileStorage();
-  enum Options {
-    OPT_FILENAME,
-    OPT_OP,
-    OPT_QUERY,
-    NUM_OPT,
-  };
-  static const option kOptions[NUM_OPT + 1] = {
-      {"filename", 1, nullptr, 0},
-      {"op", 1, nullptr, 0},
-      {"query", 1, nullptr, 0},
-      {nullptr, 0, nullptr, 0},
-  };
-  const char* opt_values[NUM_OPT];
-  memset(opt_values, 0, sizeof(opt_values));
-
-  while (true) {
-    int opt_idx = -1;
-    int ret = getopt_long(argc, argv, "", kOptions, &opt_idx);
-    if (ret != 0) break;
-
-    if (opt_idx >= 0 && opt_idx < NUM_OPT) {
-      opt_values[opt_idx] = optarg;
-    }
-  }
-
-  if (!opt_values[OPT_OP]) {
-    ICING_LOG(ERROR) << "No op specified";
-    return -1;
-  }
-
-  if (!opt_values[OPT_FILENAME]) {
-    ICING_LOG(ERROR) << "No filename specified";
-    return -1;
-  }
-  if (!strncmp(
-          opt_values[OPT_FILENAME],
-          "/data/data/com.google.android.gms/files/AppDataSearch",
-          strlen("/data/data/com.google.android.gms/files/AppDataSearch"))) {
-    ICING_LOG(ERROR)
-        << "Should not read directly from the file in gmscore - "
-           "icing-tool also commits writes as side-effects which corrupts "
-           "the index on concurrent modification";
-    return -1;
-  }
-
-  const char* op = opt_values[OPT_OP];
-  DocumentStore::Options options(file_storage.get(),
-                                 kMaxDocumentSizeForDebugging);
-  if (!strcmp(op, "dyntrie")) {
-    std::string full_file_path =
-        absl_ports::StrCat(opt_values[OPT_FILENAME], "/idx.lexicon");
-    ProcessDynamicTrie(full_file_path.c_str());
-  } else if (!strcmp(op, "verify")) {
-    std::unique_ptr<NativeIndexImpl> ni(MakeIndex(opt_values[OPT_FILENAME]));
-    ni->CheckVerify();
-  } else if (!strcmp(op, "query")) {
-    if (opt_values[OPT_QUERY] == nullptr) {
-      ICING_LOG(FATAL) << "Opt value is null";
-    }
-
-    std::unique_ptr<NativeIndexImpl> ni(MakeIndex(opt_values[OPT_FILENAME]));
-    RunQuery(ni.get(), opt_values[OPT_QUERY], 0, 100);
-  } else if (!strcmp(op, "suggest")) {
-    if (opt_values[OPT_QUERY] == nullptr) {
-      ICING_LOG(FATAL) << "Opt value is null";
-    }
-
-    std::unique_ptr<NativeIndexImpl> ni(MakeIndex(opt_values[OPT_FILENAME]));
-    RunSuggest(ni.get(), opt_values[OPT_QUERY], 100);
-  } else if (!strcmp(op, "dump-all-docs")) {
-    DocumentStore ds(opt_values[OPT_FILENAME], options);
-    if (!ds.Init()) {
-      ICING_LOG(FATAL) << "Legacy document store failed to initialize";
-    }
-
-    printf(
-        "------ Document Store Dump Start ------\n"
-        "%s\n"
-        "------ Document Store Dump End ------\n",
-        GetDocumentStoreDump(ds).c_str());
-  } else if (!strcmp(op, "dump-uris")) {
-    CorpusId corpus_id = kInvalidCorpusId;
-    if (opt_values[OPT_QUERY]) {
-      // Query is corpus id.
-      corpus_id = atoi(opt_values[OPT_QUERY]);  // NOLINT
-    }
-    DocumentStore ds(opt_values[OPT_FILENAME], options);
-    if (!ds.Init()) {
-      ICING_LOG(FATAL) << "Legacy document store failed to initialize";
-    }
-
-    DocPropertyFilter dpf;
-    ds.AddDeletedTagFilter(&dpf);
-
-    // Dump with format "<corpusid> <uri> <tagname>*".
-    int filtered = 0;
-    vector<std::string> tagnames;
-    for (DocId document_id = 0; document_id < ds.num_documents();
-         document_id++) {
-      Document doc;
-      if (!ds.ReadDocument(document_id, &doc)) {
-        ICING_LOG(FATAL) << "Failed to read document.";
-      }
-
-      if (corpus_id != kInvalidCorpusId && corpus_id != doc.corpus_id()) {
-        filtered++;
-        continue;
-      }
-      if (dpf.Match(0, document_id)) {
-        filtered++;
-        continue;
-      }
-
-      tagnames.clear();
-      ds.GetAllSetUserTagNames(document_id, &tagnames);
-
-      printf("%d %s %s\n", doc.corpus_id(), doc.uri().c_str(),
-             StringUtil::JoinStrings("/", tagnames).c_str());
-    }
-    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-        "Processed %u filtered %d", ds.num_documents(), filtered);
-  } else if (!strcmp(op, "dump-docs")) {
-    std::string out_filename = opt_values[OPT_FILENAME];
-    out_filename.append("/docs-dump");
-    CorpusId corpus_id = kInvalidCorpusId;
-    if (opt_values[OPT_QUERY]) {
-      // Query is corpus id.
-      corpus_id = atoi(opt_values[OPT_QUERY]);  // NOLINT
-      out_filename.push_back('.');
-      out_filename.append(opt_values[OPT_QUERY]);
-    }
-    DocumentStore ds(opt_values[OPT_FILENAME], options);
-    if (!ds.Init()) {
-      ICING_LOG(FATAL) << "Legacy document store failed to initialize";
-    }
-
-    DocPropertyFilter dpf;
-    ds.AddDeletedTagFilter(&dpf);
-
-    // Dump with format (<32-bit length><serialized content>)*.
-    FILE* fp = fopen(out_filename.c_str(), "w");
-    int filtered = 0;
-    for (DocId document_id = 0; document_id < ds.num_documents();
-         document_id++) {
-      Document doc;
-      if (!ds.ReadDocument(document_id, &doc)) {
-        ICING_LOG(FATAL) << "Failed to read document.";
-      }
-
-      if (corpus_id != kInvalidCorpusId && corpus_id != doc.corpus_id()) {
-        filtered++;
-        continue;
-      }
-      if (dpf.Match(0, document_id)) {
-        filtered++;
-        continue;
-      }
-
-      std::string serialized = doc.SerializeAsString();
-      uint32_t length = serialized.size();
-      if (fwrite(&length, 1, sizeof(length), fp) != sizeof(length)) {
-        ICING_LOG(FATAL) << "Failed to write length information to file";
-      }
-
-      if (fwrite(serialized.data(), 1, serialized.size(), fp) !=
-          serialized.size()) {
-        ICING_LOG(FATAL) << "Failed to write document to file";
-      }
-    }
-    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
-        "Processed %u filtered %d", ds.num_documents(), filtered);
-    fclose(fp);
-  } else {
-    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unknown op %s", op);
-    return -1;
-  }
-
-  return 0;
-}
-
-}  // namespace lib
-}  // namespace icing
-
-int main(int argc, char** argv) { return icing::lib::IcingTool(argc, argv); }
diff --git a/icing/transform/icu/icu-normalizer.cc b/icing/transform/icu/icu-normalizer.cc
index 0bb8326..58d4956 100644
--- a/icing/transform/icu/icu-normalizer.cc
+++ b/icing/transform/icu/icu-normalizer.cc
@@ -29,6 +29,7 @@
 #include "icing/util/status-macros.h"
 #include "unicode/umachine.h"
 #include "unicode/unorm2.h"
+#include "unicode/ustring.h"
 #include "unicode/utrans.h"
 
 namespace icing {
@@ -41,13 +42,15 @@ namespace {
 // form decomposition) and NFKC (compatible normalization form composition)
 // are applied as well as some other rules we need. More information at
 // http://www.unicode.org/reports/tr15/
-// TODO(samzheng) Figure out if we need to support small hiragana to katakana
+//
+// Please note that the following rules don't support small hiragana to katakana
 // transformation.
 constexpr UChar kTransformRulesUtf16[] =
     u"Lower; "                      // Lowercase
     "Latin-ASCII; "                 // Map Latin characters to ASCII characters
     "Hiragana-Katakana; "           // Map hiragana to katakana
     "[:Latin:] NFD; "               // Decompose Latin letters
+    "[:Greek:] NFD; "               // Decompose Greek letters
     "[:Nonspacing Mark:] Remove; "  // Remove accent / diacritic marks
     "NFKC";                         // Decompose and compose everything
 
@@ -74,7 +77,7 @@ bool DiacriticCharToAscii(const UNormalizer2* normalizer2, UChar32 uchar32_in,
   }
 
   // Maximum number of pieces a Unicode character can be decomposed into.
-  // TODO(samzheng) figure out if this number is proper.
+  // TODO(tjbarron) figure out if this number is proper.
   constexpr int kDecompositionBufferCapacity = 5;
 
   // A buffer used to store Unicode decomposition mappings of only one
@@ -132,17 +135,16 @@ std::string IcuNormalizer::NormalizeTerm(const std::string_view term) const {
     ICING_LOG(WARNING) << "Failed to create a UNormalizer2 instance";
   }
 
-  // Checks if the first character is within ASCII range or can be transformed
-  // into an ASCII char. Since the term is tokenized, we know that the whole
-  // term can be transformed into ASCII if the first character can.
-  UChar32 first_uchar32 =
-      i18n_utils::GetUChar32At(term.data(), term.length(), 0);
-  if (normalizer2 != nullptr && first_uchar32 != i18n_utils::kInvalidUChar32 &&
-      DiacriticCharToAscii(normalizer2, first_uchar32, nullptr)) {
-    // This is a faster method to normalize Latin terms.
-    normalized_text = NormalizeLatin(normalizer2, term);
-  } else {
-    normalized_text = term_transformer_->Transform(term);
+  // Normalize the prefix that can be transformed into ASCII.
+  // This is a faster method to normalize Latin terms.
+  NormalizeLatinResult result = NormalizeLatin(normalizer2, term);
+  normalized_text = std::move(result.text);
+  if (result.end_pos < term.length()) {
+    // Some portion of term couldn't be normalized via NormalizeLatin. Use
+    // term_transformer to handle this portion.
+    std::string_view rest_term = term.substr(result.end_pos);
+    absl_ports::StrAppend(&normalized_text,
+                          term_transformer_->Transform(rest_term));
   }
 
   if (normalized_text.length() > max_term_byte_size_) {
@@ -152,35 +154,32 @@ std::string IcuNormalizer::NormalizeTerm(const std::string_view term) const {
   return normalized_text;
 }
 
-std::string IcuNormalizer::NormalizeLatin(const UNormalizer2* normalizer2,
-                                          const std::string_view term) const {
-  std::string result;
-  result.reserve(term.length());
-  for (int i = 0; i < term.length(); i++) {
-    if (i18n_utils::IsAscii(term[i])) {
-      result.push_back(std::tolower(term[i]));
-    } else if (i18n_utils::IsLeadUtf8Byte(term[i])) {
-      UChar32 uchar32 = i18n_utils::GetUChar32At(term.data(), term.length(), i);
-      if (uchar32 == i18n_utils::kInvalidUChar32) {
-        ICING_LOG(WARNING) << "Unable to get uchar32 from " << term
-                           << " at position" << i;
-        continue;
-      }
-      char ascii_char;
-      if (DiacriticCharToAscii(normalizer2, uchar32, &ascii_char)) {
-        result.push_back(std::tolower(ascii_char));
-      } else {
-        // We don't know how to transform / decompose this Unicode character, it
-        // probably means that some other Unicode characters are mixed with
-        // Latin characters. This shouldn't happen if input term is properly
-        // tokenized. We handle it here in case there're something wrong with
-        // the tokenizers.
-        int utf8_length = i18n_utils::GetUtf8Length(uchar32);
-        absl_ports::StrAppend(&result, term.substr(i, utf8_length));
-      }
+IcuNormalizer::NormalizeLatinResult IcuNormalizer::NormalizeLatin(
+    const UNormalizer2* normalizer2, const std::string_view term) const {
+  NormalizeLatinResult result = {};
+  if (normalizer2 == nullptr) {
+    return result;
+  }
+  CharacterIterator char_itr(term);
+  result.text.reserve(term.length());
+  char ascii_char;
+  while (char_itr.utf8_index() < term.length()) {
+    UChar32 c = char_itr.GetCurrentChar();
+    if (i18n_utils::IsAscii(c)) {
+      result.text.push_back(std::tolower(c));
+    } else if (DiacriticCharToAscii(normalizer2, c, &ascii_char)) {
+      result.text.push_back(std::tolower(ascii_char));
+    } else {
+      // We don't know how to transform / decompose this Unicode character, it
+      // probably means that some other Unicode characters are mixed with Latin
+      // characters. We return the partial result here and let the caller handle
+      // the rest.
+      result.end_pos = char_itr.utf8_index();
+      return result;
     }
+    char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1);
   }
-
+  result.end_pos = term.length();
   return result;
 }
 
@@ -260,5 +259,114 @@ std::string IcuNormalizer::TermTransformer::Transform(
   return std::move(utf8_term_or).ValueOrDie();
 }
 
+bool IcuNormalizer::FindNormalizedLatinMatchEndPosition(
+    const UNormalizer2* normalizer2, std::string_view term,
+    CharacterIterator& char_itr, std::string_view normalized_term,
+    CharacterIterator& normalized_char_itr) const {
+  if (normalizer2 == nullptr) {
+    return false;
+  }
+  char ascii_char;
+  while (char_itr.utf8_index() < term.length() &&
+         normalized_char_itr.utf8_index() < normalized_term.length()) {
+    UChar32 c = char_itr.GetCurrentChar();
+    if (i18n_utils::IsAscii(c)) {
+      c = std::tolower(c);
+    } else if (DiacriticCharToAscii(normalizer2, c, &ascii_char)) {
+      c = std::tolower(ascii_char);
+    } else {
+      return false;
+    }
+    UChar32 normalized_c = normalized_char_itr.GetCurrentChar();
+    if (c != normalized_c) {
+      return true;
+    }
+    char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1);
+    normalized_char_itr.AdvanceToUtf32(normalized_char_itr.utf32_index() + 1);
+  }
+  return true;
+}
+
+CharacterIterator
+IcuNormalizer::TermTransformer::FindNormalizedNonLatinMatchEndPosition(
+    std::string_view term, CharacterIterator char_itr,
+    std::string_view normalized_term) const {
+  CharacterIterator normalized_char_itr(normalized_term);
+  UErrorCode status = U_ZERO_ERROR;
+
+  constexpr int kUtf16CharBufferLength = 6;
+  UChar c16[kUtf16CharBufferLength];
+  int32_t c16_length;
+  int32_t limit;
+
+  constexpr int kCharBufferLength = 3 * 4;
+  char normalized_buffer[kCharBufferLength];
+  int32_t c8_length;
+  while (char_itr.utf8_index() < term.length() &&
+         normalized_char_itr.utf8_index() < normalized_term.length()) {
+    UChar32 c = char_itr.GetCurrentChar();
+    int c_lenth = i18n_utils::GetUtf8Length(c);
+    u_strFromUTF8(c16, kUtf16CharBufferLength, &c16_length,
+                  term.data() + char_itr.utf8_index(),
+                  /*srcLength=*/c_lenth, &status);
+    if (U_FAILURE(status)) {
+      break;
+    }
+
+    limit = c16_length;
+    utrans_transUChars(u_transliterator_, c16, &c16_length,
+                       kUtf16CharBufferLength,
+                       /*start=*/0, &limit, &status);
+    if (U_FAILURE(status)) {
+      break;
+    }
+
+    u_strToUTF8(normalized_buffer, kCharBufferLength, &c8_length, c16,
+                c16_length, &status);
+    if (U_FAILURE(status)) {
+      break;
+    }
+
+    for (int i = 0; i < c8_length; ++i) {
+      if (normalized_buffer[i] !=
+          normalized_term[normalized_char_itr.utf8_index() + i]) {
+        return char_itr;
+      }
+    }
+    normalized_char_itr.AdvanceToUtf8(normalized_char_itr.utf8_index() +
+                                      c8_length);
+    char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1);
+  }
+  if (U_FAILURE(status)) {
+    // Failed to transform, return its original form.
+    ICING_LOG(WARNING) << "Failed to normalize UTF8 term: " << term;
+  }
+  return char_itr;
+}
+
+CharacterIterator IcuNormalizer::FindNormalizedMatchEndPosition(
+    std::string_view term, std::string_view normalized_term) const {
+  UErrorCode status = U_ZERO_ERROR;
+  // ICU manages the singleton instance
+  const UNormalizer2* normalizer2 = unorm2_getNFCInstance(&status);
+  if (U_FAILURE(status)) {
+    ICING_LOG(WARNING) << "Failed to create a UNormalizer2 instance";
+  }
+
+  CharacterIterator char_itr(term);
+  CharacterIterator normalized_char_itr(normalized_term);
+  if (FindNormalizedLatinMatchEndPosition(
+          normalizer2, term, char_itr, normalized_term, normalized_char_itr)) {
+    return char_itr;
+  }
+  // Some portion of term couldn't be normalized via
+  // FindNormalizedLatinMatchEndPosition. Use term_transformer to handle this
+  // portion.
+  std::string_view rest_normalized_term =
+      normalized_term.substr(normalized_char_itr.utf8_index());
+  return term_transformer_->FindNormalizedNonLatinMatchEndPosition(
+      term, char_itr, rest_normalized_term);
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/transform/icu/icu-normalizer.h b/icing/transform/icu/icu-normalizer.h
index f20a9fb..f6f2b78 100644
--- a/icing/transform/icu/icu-normalizer.h
+++ b/icing/transform/icu/icu-normalizer.h
@@ -21,6 +21,7 @@
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/transform/normalizer.h"
+#include "icing/util/character-iterator.h"
 #include "unicode/unorm2.h"
 #include "unicode/utrans.h"
 
@@ -32,7 +33,8 @@ namespace lib {
 //  2. Transforms full-width Latin characters to ASCII characters if possible.
 //  3. Transforms hiragana to katakana.
 //  4. Removes accent / diacritic marks on Latin characters
-//  5. Normalized text must be less than or equal to max_term_byte_size,
+//  5. Removes accent / diacritic marks on Greek characters
+//  6. Normalized text must be less than or equal to max_term_byte_size,
 //     otherwise it will be truncated.
 //
 // There're some other rules from ICU not listed here, please see .cc file for
@@ -56,6 +58,17 @@ class IcuNormalizer : public Normalizer {
   // result in the non-Latin characters not properly being normalized
   std::string NormalizeTerm(std::string_view term) const override;
 
+  // Returns a CharacterIterator pointing to one past the end of the segment of
+  // term that (once normalized) matches with normalized_term.
+  //
+  // Ex. FindNormalizedMatchEndPosition("YELLOW", "yell") will return
+  // CharacterIterator(u8:4, u16:4, u32:4).
+  //
+  // Ex. FindNormalizedMatchEndPosition("YELLOW", "red") will return
+  // CharacterIterator(u8:0, u16:0, u32:0).
+  CharacterIterator FindNormalizedMatchEndPosition(
+      std::string_view term, std::string_view normalized_term) const override;
+
  private:
   // A handler class that helps manage the lifecycle of UTransliterator. It's
   // used in IcuNormalizer to transform terms into the formats we need.
@@ -75,6 +88,12 @@ class IcuNormalizer : public Normalizer {
     // Transforms the text based on our rules described at top of this file
     std::string Transform(std::string_view term) const;
 
+    // Returns a CharacterIterator pointing to one past the end of the segment
+    // of a non-latin term that (once normalized) matches with normalized_term.
+    CharacterIterator FindNormalizedNonLatinMatchEndPosition(
+        std::string_view term, CharacterIterator char_itr,
+        std::string_view normalized_term) const;
+
    private:
     explicit TermTransformer(UTransliterator* u_transliterator);
 
@@ -83,14 +102,36 @@ class IcuNormalizer : public Normalizer {
     UTransliterator* u_transliterator_;
   };
 
+  struct NormalizeLatinResult {
+    // A string representing the maximum prefix of term (can be empty or term
+    // itself) that can be normalized into ASCII.
+    std::string text;
+    // The first position of the char within term that normalization failed to
+    // transform into an ASCII char, or term.length() if all chars can be
+    // transformed.
+    size_t end_pos;
+  };
+
   explicit IcuNormalizer(std::unique_ptr<TermTransformer> term_transformer,
                          int max_term_byte_size);
 
   // Helper method to normalize Latin terms only. Rules applied:
   // 1. Uppercase to lowercase
   // 2. Remove diacritic (accent) marks
-  std::string NormalizeLatin(const UNormalizer2* normalizer2,
-                             std::string_view term) const;
+  NormalizeLatinResult NormalizeLatin(const UNormalizer2* normalizer2,
+                                      std::string_view term) const;
+
+  // Set char_itr and normalized_char_itr to point to one past the end of the
+  // segments of term and normalized_term that can match if normalized into
+  // ASCII. In this case, true will be returned.
+  //
+  // The method stops at the position when char_itr cannot be normalized into
+  // ASCII and returns false, so that term_transformer can handle the remaining
+  // portion.
+  bool FindNormalizedLatinMatchEndPosition(
+      const UNormalizer2* normalizer2, std::string_view term,
+      CharacterIterator& char_itr, std::string_view normalized_term,
+      CharacterIterator& normalized_char_itr) const;
 
   // Used to transform terms into their normalized forms.
   std::unique_ptr<TermTransformer> term_transformer_;
diff --git a/icing/transform/icu/icu-normalizer_benchmark.cc b/icing/transform/icu/icu-normalizer_benchmark.cc
index b037538..89d5f1e 100644
--- a/icing/transform/icu/icu-normalizer_benchmark.cc
+++ b/icing/transform/icu/icu-normalizer_benchmark.cc
@@ -14,8 +14,8 @@
 
 #include "testing/base/public/benchmark.h"
 #include "gmock/gmock.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
@@ -25,7 +25,7 @@
 //    //icing/transform/icu:icu-normalizer_benchmark
 //
 //    $ blaze-bin/icing/transform/icu/icu-normalizer_benchmark
-//    --benchmarks=all
+//    --benchmark_filter=all
 //
 // Run on an Android device:
 //    Make target //icing/transform:normalizer depend on
@@ -39,8 +39,8 @@
 //    blaze-bin/icing/transform/icu/icu-normalizer_benchmark
 //    /data/local/tmp/
 //
-//    $ adb shell /data/local/tmp/icu-normalizer_benchmark --benchmarks=all
-//    --adb
+//    $ adb shell /data/local/tmp/icu-normalizer_benchmark
+//    --benchmark_filter=all --adb
 
 // Flag to tell the benchmark that it'll be run on an Android device via adb,
 // the benchmark will set up data files accordingly.
@@ -61,7 +61,6 @@ void BM_NormalizeUppercase(benchmark::State& state) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Normalizer> normalizer,
       normalizer_factory::Create(
-
           /*max_term_byte_size=*/std::numeric_limits<int>::max()));
 
   std::string input_string(state.range(0), 'A');
@@ -95,7 +94,6 @@ void BM_NormalizeAccent(benchmark::State& state) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Normalizer> normalizer,
       normalizer_factory::Create(
-
           /*max_term_byte_size=*/std::numeric_limits<int>::max()));
 
   std::string input_string;
@@ -123,7 +121,7 @@ BENCHMARK(BM_NormalizeAccent)
     ->Arg(2048000)
     ->Arg(4096000);
 
-void BM_NormalizeHiragana(benchmark::State& state) {
+void BM_NormalizeGreekAccent(benchmark::State& state) {
   bool run_via_adb = absl::GetFlag(FLAGS_adb);
   if (!run_via_adb) {
     ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
@@ -133,7 +131,43 @@ void BM_NormalizeHiragana(benchmark::State& state) {
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<Normalizer> normalizer,
       normalizer_factory::Create(
+          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+  std::string input_string;
+  while (input_string.length() < state.range(0)) {
+    input_string.append("άὰᾶἀἄ");
+  }
+
+  for (auto _ : state) {
+    normalizer->NormalizeTerm(input_string);
+  }
+}
+BENCHMARK(BM_NormalizeGreekAccent)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_NormalizeHiragana(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
+        GetTestFilePath("icing/icu.dat")));
+  }
 
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Normalizer> normalizer,
+      normalizer_factory::Create(
           /*max_term_byte_size=*/std::numeric_limits<int>::max()));
 
   std::string input_string;
@@ -161,6 +195,121 @@ BENCHMARK(BM_NormalizeHiragana)
     ->Arg(2048000)
     ->Arg(4096000);
 
+void BM_UppercaseSubTokenLength(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
+        GetTestFilePath("icing/icu.dat")));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Normalizer> normalizer,
+      normalizer_factory::Create(
+          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+  std::string input_string(state.range(0), 'A');
+  std::string normalized_input_string(state.range(0), 'a');
+  for (auto _ : state) {
+    normalizer->FindNormalizedMatchEndPosition(input_string,
+                                               normalized_input_string);
+  }
+}
+BENCHMARK(BM_UppercaseSubTokenLength)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_AccentSubTokenLength(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
+        GetTestFilePath("icing/icu.dat")));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Normalizer> normalizer,
+      normalizer_factory::Create(
+          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+  std::string input_string;
+  std::string normalized_input_string;
+  while (input_string.length() < state.range(0)) {
+    input_string.append("àáâãā");
+    normalized_input_string.append("aaaaa");
+  }
+
+  for (auto _ : state) {
+    normalizer->FindNormalizedMatchEndPosition(input_string,
+                                               normalized_input_string);
+  }
+}
+BENCHMARK(BM_AccentSubTokenLength)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_HiraganaSubTokenLength(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
+        GetTestFilePath("icing/icu.dat")));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Normalizer> normalizer,
+      normalizer_factory::Create(
+          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+  std::string input_string;
+  std::string normalized_input_string;
+  while (input_string.length() < state.range(0)) {
+    input_string.append("あいうえお");
+    normalized_input_string.append("アイウエオ");
+  }
+
+  for (auto _ : state) {
+    normalizer->FindNormalizedMatchEndPosition(input_string,
+                                               normalized_input_string);
+  }
+}
+BENCHMARK(BM_HiraganaSubTokenLength)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/transform/icu/icu-normalizer_test.cc b/icing/transform/icu/icu-normalizer_test.cc
index 83fa972..0df23fc 100644
--- a/icing/transform/icu/icu-normalizer_test.cc
+++ b/icing/transform/icu/icu-normalizer_test.cc
@@ -16,8 +16,8 @@
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/icu-i18n-test-utils.h"
 #include "icing/testing/test-data.h"
 #include "icing/transform/normalizer-factory.h"
@@ -83,14 +83,12 @@ TEST_F(IcuNormalizerTest, LatinLetterRemoveAccent) {
               Eq("eeeeeeeeeeeeeeeeeeeeeeeeeee"));
   EXPECT_THAT(normalizer_->NormalizeTerm("Ḟḟ"), Eq("ff"));
   EXPECT_THAT(normalizer_->NormalizeTerm("ĜĞĠĢḠḡĝğġģ"), Eq("gggggggggg"));
-  EXPECT_THAT(normalizer_->NormalizeTerm("ĤḢḤḦḨḪḣḥḧḩḫĥẖ"),
-              Eq("hhhhhhhhhhhhh"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("ĤḢḤḦḨḪḣḥḧḩḫĥẖ"), Eq("hhhhhhhhhhhhh"));
   EXPECT_THAT(normalizer_->NormalizeTerm("ÌÍÎÏĨĪĬḬḭḯìíîïĩīĭ"),
               Eq("iiiiiiiiiiiiiiiii"));
   EXPECT_THAT(normalizer_->NormalizeTerm("Ĵĵ"), Eq("jj"));
   EXPECT_THAT(normalizer_->NormalizeTerm("ĶḰḲḴḵḱḳķ"), Eq("kkkkkkkk"));
-  EXPECT_THAT(normalizer_->NormalizeTerm("ĹĻĽḶḸḼḷḹḻḽĺļľ"),
-              Eq("lllllllllllll"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("ĹĻĽḶḸḼḷḹḻḽĺļľ"), Eq("lllllllllllll"));
   EXPECT_THAT(normalizer_->NormalizeTerm("ḾṀṂḿṁṃ"), Eq("mmmmmm"));
   EXPECT_THAT(normalizer_->NormalizeTerm("ÑŃŅŇṄṆṈṊṅṇṉṋñńņň"),
               Eq("nnnnnnnnnnnnnnnn"));
@@ -109,23 +107,42 @@ TEST_F(IcuNormalizerTest, LatinLetterRemoveAccent) {
   EXPECT_THAT(normalizer_->NormalizeTerm("ŴẀẂẄẆẈẁẃẅẇẉŵ"), Eq("wwwwwwwwwwww"));
   EXPECT_THAT(normalizer_->NormalizeTerm("ẊẌẋẍ"), Eq("xxxx"));
   EXPECT_THAT(normalizer_->NormalizeTerm("ÝŶŸẎẏŷýÿ"), Eq("yyyyyyyy"));
-  EXPECT_THAT(normalizer_->NormalizeTerm("ŹŻŽẐẒẔẑẓẕźżž"),
-              Eq("zzzzzzzzzzzz"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("ŹŻŽẐẒẔẑẓẕźżž"), Eq("zzzzzzzzzzzz"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("Barış"), Eq("baris"));
+}
+
+TEST_F(IcuNormalizerTest, GreekLetterRemoveAccent) {
+  EXPECT_THAT(normalizer_->NormalizeTerm("kαλημέρα"), Eq("kαλημερα"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("εγγραφή"), Eq("εγγραφη"));
+  EXPECT_THAT(normalizer_->NormalizeTerm(
+                  "ἈἉἊἋἌἍἎἏᾈᾉᾊᾋᾌᾍᾎᾏᾸᾹᾺΆᾼἀἁἂἃἄἅἆἇὰάᾀᾁᾂᾃᾄᾅᾆᾇᾰᾱᾲᾳᾴᾶᾷ"),
+              Eq("αααααααααααααααααααααααααααααααααααααααααααααα"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("ἘἙἚἛἜἝῈΈἐἑἒἓἔἕὲέ"),
+              Eq("εεεεεεεεεεεεεεεε"));
+  EXPECT_THAT(
+      normalizer_->NormalizeTerm("ἨἩἪἫἬἭἮἯᾘᾙᾚᾛᾜᾝᾞᾟῊΉῌἠἡἢἣἤἥἦἧὴήᾐᾑᾒᾓᾔᾕᾖᾗῂῃῄῆῇ"),
+      Eq("ηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηηη"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("ἸἹἺἻἼἽἾἿῘῙῚΊἰἱἲἳἴἵἶἷὶίῐῑῒΐῖῗ"),
+              Eq("ιιιιιιιιιιιιιιιιιιιιιιιιιιιι"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("ὈὉὊὋὌὍῸΌὀὁὂὃὄὅὸό"),
+              Eq("οοοοοοοοοοοοοοοο"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("ὙὛὝὟῨῩῪΎὐὑὒὓὔὕὖὗὺύῠῡῢΰῦῧ"),
+              Eq("υυυυυυυυυυυυυυυυυυυυυυυυ"));
+  EXPECT_THAT(
+      normalizer_->NormalizeTerm("ὨὩὪὫὬὭὮὯᾨᾩᾪᾫᾬᾭᾮᾯῺΏῼὠὡὢὣὤὥὦὧὼώᾠᾡᾢᾣᾤᾥᾦᾧῲῳῴῶῷ"),
+      Eq("ωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωωω"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("Ῥῤῥ"), Eq("ρρρ"));
 }
 
 // Accent / diacritic marks won't be removed in non-latin chars, e.g. in
-// Japanese and Greek
+// Japanese
 TEST_F(IcuNormalizerTest, NonLatinLetterNotRemoveAccent) {
   // Katakana
   EXPECT_THAT(normalizer_->NormalizeTerm("ダヂヅデド"), Eq("ダヂヅデド"));
-  // Greek
-  EXPECT_THAT(normalizer_->NormalizeTerm("kαλημέρα"), Eq("kαλημέρα"));
-  EXPECT_THAT(normalizer_->NormalizeTerm("εγγραφή"), Eq("εγγραφή"));
 
   // Our current ICU rules can't handle Hebrew properly, e.g. the accents in
   // "אָלֶף־בֵּית עִבְרִי"
   // will be removed.
-  // TODO (samzheng): figure out how we should handle Hebrew.
 }
 
 TEST_F(IcuNormalizerTest, FullWidthCharsToASCII) {
@@ -232,6 +249,158 @@ TEST_F(IcuNormalizerTest, Truncate) {
   }
 }
 
+TEST_F(IcuNormalizerTest, PrefixMatchLength) {
+  // Verify that FindNormalizedMatchEndPosition will properly find the length of
+  // the prefix match when given a non-normalized term and a normalized term
+  // is a prefix of the non-normalized one.
+  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
+                                                  /*max_term_byte_size=*/1000));
+
+  // Upper to lower
+  std::string term = "MDI";
+  CharacterIterator match_end =
+      normalizer->FindNormalizedMatchEndPosition(term, "md");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("MD"));
+
+  term = "Icing";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "icin");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Icin"));
+
+  // Full-width
+  term = "５２５６００";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "525");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("５２５"));
+
+  term = "ＦＵＬＬＷＩＤＴＨ";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "full");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ＦＵＬＬ"));
+
+  // Hiragana to Katakana
+  term = "あいうえお";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "アイ");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("あい"));
+
+  term = "かきくけこ";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "カ");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("か"));
+
+  // Latin accents
+  term = "Zürich";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "zur");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Zür"));
+
+  term = "après-midi";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "apre");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("aprè"));
+
+  term = "Buenos días";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "buenos di");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Buenos dí"));
+
+  term = "BarışIcing";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "baris");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Barış"));
+
+  term = "ÀĄḁáIcing";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "aaaa");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ÀĄḁá"));
+
+  // Greek accents
+  term = "άνθρωπος";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "ανθ");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("άνθ"));
+
+  term = "καλημέρα";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "καλημε");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("καλημέ"));
+
+  term = "όχι";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "οχ");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("όχ"));
+
+  term = "πότε";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "ποτ");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("πότ"));
+
+  term = "ἈἉἊἋIcing";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "αααα");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ἈἉἊἋ"));
+}
+
+TEST_F(IcuNormalizerTest, SharedPrefixMatchLength) {
+  // Verify that FindNormalizedMatchEndPosition will properly find the length of
+  // the prefix match when given a non-normalized term and a normalized term
+  // that share a common prefix.
+  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
+                                                  /*max_term_byte_size=*/1000));
+
+  // Upper to lower
+  std::string term = "MDI";
+  CharacterIterator match_end =
+      normalizer->FindNormalizedMatchEndPosition(term, "mgm");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("M"));
+
+  term = "Icing";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "icky");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Ic"));
+
+  // Full-width
+  term = "５２５６００";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "525788");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("５２５"));
+
+  term = "ＦＵＬＬＷＩＤＴＨ";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "fully");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ＦＵＬＬ"));
+
+  // Hiragana to Katakana
+  term = "あいうえお";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "アイエオ");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("あい"));
+
+  term = "かきくけこ";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "カケコ");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("か"));
+
+  // Latin accents
+  term = "Zürich";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "zurg");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Zür"));
+
+  term = "après-midi";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "apreciate");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("aprè"));
+
+  term = "días";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "diamond");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("día"));
+
+  term = "BarışIcing";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "barismdi");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Barış"));
+
+  // Greek accents
+  term = "άνθρωπος";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "ανθν");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("άνθ"));
+
+  term = "καλημέρα";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "καλημεος");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("καλημέ"));
+
+  term = "όχι";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "οχκα");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("όχ"));
+
+  term = "πότε";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "ποτρα");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("πότ"));
+
+  term = "ἈἉἊἋIcing";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "ααααmdi");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ἈἉἊἋ"));
+}
+
 }  // namespace
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/transform/map/map-normalizer.cc b/icing/transform/map/map-normalizer.cc
index c888551..61fce65 100644
--- a/icing/transform/map/map-normalizer.cc
+++ b/icing/transform/map/map-normalizer.cc
@@ -14,8 +14,7 @@
 
 #include "icing/transform/map/map-normalizer.h"
 
-#include <ctype.h>
-
+#include <cctype>
 #include <string>
 #include <string_view>
 #include <unordered_map>
@@ -23,6 +22,7 @@
 
 #include "icing/absl_ports/str_cat.h"
 #include "icing/transform/map/normalization-map.h"
+#include "icing/util/character-iterator.h"
 #include "icing/util/i18n-utils.h"
 #include "icing/util/logging.h"
 #include "unicode/utypes.h"
@@ -30,48 +30,70 @@
 namespace icing {
 namespace lib {
 
+namespace {
+
+UChar32 NormalizeChar(UChar32 c) {
+  if (i18n_utils::GetUtf16Length(c) > 1) {
+    // All the characters we need to normalize can be encoded into a
+    // single char16_t. If this character needs more than 1 char16_t code
+    // unit, we can skip normalization and append it directly.
+    return c;
+  }
+
+  // The original character can be encoded into a single char16_t.
+  const std::unordered_map<char16_t, char16_t>* normalization_map =
+      GetNormalizationMap();
+  if (normalization_map == nullptr) {
+    // Normalization map couldn't be properly initialized, append the original
+    // character.
+    ICING_LOG(WARNING) << "Unable to get a valid pointer to normalization map!";
+    return c;
+  }
+  auto iterator = normalization_map->find(static_cast<char16_t>(c));
+  if (iterator == normalization_map->end()) {
+    // Normalization mapping not found, append the original character.
+    return c;
+  }
+
+  // Found a normalization mapping. The normalized character (stored in a
+  // char16_t) can have 1 or 2 bytes.
+  if (i18n_utils::IsAscii(iterator->second)) {
+    // The normalized character has 1 byte. It may be an upper-case char.
+    // Lower-case it before returning it.
+    return std::tolower(static_cast<char>(iterator->second));
+  } else {
+    return iterator->second;
+  }
+}
+
+}  // namespace
+
 std::string MapNormalizer::NormalizeTerm(std::string_view term) const {
   std::string normalized_text;
   normalized_text.reserve(term.length());
 
-  for (int i = 0; i < term.length(); ++i) {
-    if (i18n_utils::IsAscii(term[i])) {
-      // The original character has 1 byte.
-      normalized_text.push_back(std::tolower(term[i]));
-    } else if (i18n_utils::IsLeadUtf8Byte(term[i])) {
-      UChar32 uchar32 = i18n_utils::GetUChar32At(term.data(), term.length(), i);
+  int current_pos = 0;
+  while (current_pos < term.length()) {
+    if (i18n_utils::IsAscii(term[current_pos])) {
+      normalized_text.push_back(std::tolower(term[current_pos]));
+      ++current_pos;
+    } else {
+      UChar32 uchar32 =
+          i18n_utils::GetUChar32At(term.data(), term.length(), current_pos);
       if (uchar32 == i18n_utils::kInvalidUChar32) {
         ICING_LOG(WARNING) << "Unable to get uchar32 from " << term
-                           << " at position" << i;
+                           << " at position" << current_pos;
+        ++current_pos;
         continue;
       }
-      int utf8_length = i18n_utils::GetUtf8Length(uchar32);
-      if (i18n_utils::GetUtf16Length(uchar32) > 1) {
-        // All the characters we need to normalize can be encoded into a
-        // single char16_t. If this character needs more than 1 char16_t code
-        // unit, we can skip normalization and append it directly.
-        absl_ports::StrAppend(&normalized_text, term.substr(i, utf8_length));
-        continue;
-      }
-      // The original character can be encoded into a single char16_t.
-      const std::unordered_map<char16_t, char16_t>& normalization_map =
-          GetNormalizationMap();
-      auto iterator = normalization_map.find(static_cast<char16_t>(uchar32));
-      if (iterator != normalization_map.end()) {
-        // Found a normalization mapping. The normalized character (stored in a
-        // char16_t) can have 1 or 2 bytes.
-        if (i18n_utils::IsAscii(iterator->second)) {
-          // The normalized character has 1 byte.
-          normalized_text.push_back(
-              std::tolower(static_cast<char>(iterator->second)));
-        } else {
-          // The normalized character has 2 bytes.
-          i18n_utils::AppendUchar32ToUtf8(&normalized_text, iterator->second);
-        }
+      UChar32 normalized_char32 = NormalizeChar(uchar32);
+      if (i18n_utils::IsAscii(normalized_char32)) {
+        normalized_text.push_back(normalized_char32);
       } else {
-        // Normalization mapping not found, append the original character.
-        absl_ports::StrAppend(&normalized_text, term.substr(i, utf8_length));
+        // The normalized character has 2 bytes.
+        i18n_utils::AppendUchar32ToUtf8(&normalized_text, normalized_char32);
       }
+      current_pos += i18n_utils::GetUtf8Length(uchar32);
     }
   }
 
@@ -82,5 +104,27 @@ std::string MapNormalizer::NormalizeTerm(std::string_view term) const {
   return normalized_text;
 }
 
+CharacterIterator MapNormalizer::FindNormalizedMatchEndPosition(
+    std::string_view term, std::string_view normalized_term) const {
+  CharacterIterator char_itr(term);
+  CharacterIterator normalized_char_itr(normalized_term);
+  while (char_itr.utf8_index() < term.length() &&
+         normalized_char_itr.utf8_index() < normalized_term.length()) {
+    UChar32 c = char_itr.GetCurrentChar();
+    if (i18n_utils::IsAscii(c)) {
+      c = std::tolower(c);
+    } else {
+      c = NormalizeChar(c);
+    }
+    UChar32 normalized_c = normalized_char_itr.GetCurrentChar();
+    if (c != normalized_c) {
+      return char_itr;
+    }
+    char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1);
+    normalized_char_itr.AdvanceToUtf32(normalized_char_itr.utf32_index() + 1);
+  }
+  return char_itr;
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/transform/map/map-normalizer.h b/icing/transform/map/map-normalizer.h
index f9c0e42..ed996ae 100644
--- a/icing/transform/map/map-normalizer.h
+++ b/icing/transform/map/map-normalizer.h
@@ -19,6 +19,7 @@
 #include <string_view>
 
 #include "icing/transform/normalizer.h"
+#include "icing/util/character-iterator.h"
 
 namespace icing {
 namespace lib {
@@ -39,6 +40,17 @@ class MapNormalizer : public Normalizer {
   // Read more mapping details in normalization-map.cc
   std::string NormalizeTerm(std::string_view term) const override;
 
+  // Returns a CharacterIterator pointing to one past the end of the segment of
+  // term that (once normalized) matches with normalized_term.
+  //
+  // Ex. FindNormalizedMatchEndPosition("YELLOW", "yell") will return
+  // CharacterIterator(u8:4, u16:4, u32:4).
+  //
+  // Ex. FindNormalizedMatchEndPosition("YELLOW", "red") will return
+  // CharacterIterator(u8:0, u16:0, u32:0).
+  CharacterIterator FindNormalizedMatchEndPosition(
+      std::string_view term, std::string_view normalized_term) const override;
+
  private:
   // The maximum term length allowed after normalization.
   int max_term_byte_size_;
diff --git a/icing/transform/map/map-normalizer_benchmark.cc b/icing/transform/map/map-normalizer_benchmark.cc
index 691afc6..4560329 100644
--- a/icing/transform/map/map-normalizer_benchmark.cc
+++ b/icing/transform/map/map-normalizer_benchmark.cc
@@ -24,7 +24,7 @@
 //    //icing/transform/map:map-normalizer_benchmark
 //
 //    $ blaze-bin/icing/transform/map/map-normalizer_benchmark
-//    --benchmarks=all
+//    --benchmark_filter=all
 //
 // Run on an Android device:
 //    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
@@ -35,7 +35,7 @@
 //    blaze-bin/icing/transform/map/map-normalizer_benchmark
 //    /data/local/tmp/
 //
-//    $ adb shell /data/local/tmp/map-normalizer_benchmark --benchmarks=all
+//    $ adb shell /data/local/tmp/map-normalizer_benchmark --benchmark_filter=all
 namespace icing {
 namespace lib {
 
@@ -143,6 +143,104 @@ BENCHMARK(BM_NormalizeHiragana)
     ->Arg(2048000)
     ->Arg(4096000);
 
+void BM_UppercaseSubTokenLength(benchmark::State& state) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Normalizer> normalizer,
+      normalizer_factory::Create(
+
+          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+  std::string input_string(state.range(0), 'A');
+  std::string normalized_input_string(state.range(0), 'a');
+  for (auto _ : state) {
+    normalizer->FindNormalizedMatchEndPosition(input_string,
+                                               normalized_input_string);
+  }
+}
+BENCHMARK(BM_UppercaseSubTokenLength)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_AccentSubTokenLength(benchmark::State& state) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Normalizer> normalizer,
+      normalizer_factory::Create(
+          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+  std::string input_string;
+  std::string normalized_input_string;
+  while (input_string.length() < state.range(0)) {
+    input_string.append("àáâãā");
+    normalized_input_string.append("aaaaa");
+  }
+
+  for (auto _ : state) {
+    normalizer->FindNormalizedMatchEndPosition(input_string,
+                                               normalized_input_string);
+  }
+}
+BENCHMARK(BM_AccentSubTokenLength)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_HiraganaSubTokenLength(benchmark::State& state) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Normalizer> normalizer,
+      normalizer_factory::Create(
+          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+  std::string input_string;
+  std::string normalized_input_string;
+  while (input_string.length() < state.range(0)) {
+    input_string.append("あいうえお");
+    normalized_input_string.append("アイウエオ");
+  }
+
+  for (auto _ : state) {
+    normalizer->FindNormalizedMatchEndPosition(input_string,
+                                               normalized_input_string);
+  }
+}
+BENCHMARK(BM_HiraganaSubTokenLength)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/transform/map/map-normalizer_test.cc b/icing/transform/map/map-normalizer_test.cc
index b62ae0e..adc5623 100644
--- a/icing/transform/map/map-normalizer_test.cc
+++ b/icing/transform/map/map-normalizer_test.cc
@@ -23,6 +23,7 @@
 #include "icing/testing/icu-i18n-test-utils.h"
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
+#include "icing/util/character-iterator.h"
 
 namespace icing {
 namespace lib {
@@ -199,6 +200,104 @@ TEST(MapNormalizerTest, Truncate) {
   }
 }
 
+TEST(MapNormalizerTest, PrefixMatchLength) {
+  // Verify that FindNormalizedMatchEndPosition will properly find the length of
+  // the prefix match when given a non-normalized term and a normalized term
+  // is a prefix of the non-normalized one.
+  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
+                                                  /*max_term_byte_size=*/1000));
+
+  // Upper to lower
+  std::string term = "MDI";
+  CharacterIterator match_end =
+      normalizer->FindNormalizedMatchEndPosition(term, "md");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("MD"));
+
+  term = "Icing";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "icin");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Icin"));
+
+  // Full-width
+  term = "５２５６００";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "525");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("５２５"));
+
+  term = "ＦＵＬＬＷＩＤＴＨ";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "full");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ＦＵＬＬ"));
+
+  // Hiragana to Katakana
+  term = "あいうえお";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "アイ");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("あい"));
+
+  term = "かきくけこ";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "カ");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("か"));
+
+  // Latin accents
+  term = "Zürich";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "zur");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Zür"));
+
+  term = "après-midi";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "apre");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("aprè"));
+
+  term = "Buenos días";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "buenos di");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Buenos dí"));
+}
+
+TEST(MapNormalizerTest, SharedPrefixMatchLength) {
+  // Verify that FindNormalizedMatchEndPosition will properly find the length of
+  // the prefix match when given a non-normalized term and a normalized term
+  // that share a common prefix.
+  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
+                                                  /*max_term_byte_size=*/1000));
+
+  // Upper to lower
+  std::string term = "MDI";
+  CharacterIterator match_end =
+      normalizer->FindNormalizedMatchEndPosition(term, "mgm");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("M"));
+
+  term = "Icing";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "icky");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Ic"));
+
+  // Full-width
+  term = "５２５６００";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "525788");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("５２５"));
+
+  term = "ＦＵＬＬＷＩＤＴＨ";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "fully");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ＦＵＬＬ"));
+
+  // Hiragana to Katakana
+  term = "あいうえお";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "アイエオ");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("あい"));
+
+  term = "かきくけこ";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "カケコ");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("か"));
+
+  // Latin accents
+  term = "Zürich";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "zurg");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Zür"));
+
+  term = "après-midi";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "apreciate");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("aprè"));
+
+  term = "días";
+  match_end = normalizer->FindNormalizedMatchEndPosition(term, "diamond");
+  EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("día"));
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/transform/map/normalization-map.cc b/icing/transform/map/normalization-map.cc
index c318036..0994ab8 100644
--- a/icing/transform/map/normalization-map.cc
+++ b/icing/transform/map/normalization-map.cc
@@ -691,19 +691,21 @@ constexpr NormalizationPair kNormalizationMappings[] = {
 
 }  // namespace
 
-const std::unordered_map<char16_t, char16_t>& GetNormalizationMap() {
+const std::unordered_map<char16_t, char16_t> *GetNormalizationMap() {
   // The map is allocated dynamically the first time this function is executed.
-  static const std::unordered_map<char16_t, char16_t> normalization_map = [] {
-    std::unordered_map<char16_t, char16_t> map;
-    // Size of all the mappings is about 2.5 KiB.
-    constexpr int numMappings =
-        sizeof(kNormalizationMappings) / sizeof(NormalizationPair);
-    map.reserve(numMappings);
-    for (size_t i = 0; i < numMappings; ++i) {
-      map.emplace(kNormalizationMappings[i].from, kNormalizationMappings[i].to);
-    }
-    return map;
-  }();
+  static const std::unordered_map<char16_t, char16_t> *const normalization_map =
+      [] {
+        auto *map = new std::unordered_map<char16_t, char16_t>();
+        // Size of all the mappings is about 2.5 KiB.
+        constexpr int numMappings =
+            sizeof(kNormalizationMappings) / sizeof(NormalizationPair);
+        map->reserve(numMappings);
+        for (size_t i = 0; i < numMappings; ++i) {
+          map->emplace(kNormalizationMappings[i].from,
+                       kNormalizationMappings[i].to);
+        }
+        return map;
+      }();
 
   return normalization_map;
 }
diff --git a/icing/transform/map/normalization-map.h b/icing/transform/map/normalization-map.h
index aea85bd..ac7872b 100644
--- a/icing/transform/map/normalization-map.h
+++ b/icing/transform/map/normalization-map.h
@@ -23,7 +23,7 @@ namespace lib {
 // Returns a map containing normalization mappings. A mapping (A -> B) means
 // that we'll transform every character 'A' into 'B'. See normalization-map.cc
 // for mapping details.
-const std::unordered_map<char16_t, char16_t>& GetNormalizationMap();
+const std::unordered_map<char16_t, char16_t>* GetNormalizationMap();
 
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/transform/normalizer.h b/icing/transform/normalizer.h
index 4cbfa63..2110f0f 100644
--- a/icing/transform/normalizer.h
+++ b/icing/transform/normalizer.h
@@ -20,6 +20,7 @@
 #include <string_view>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/util/character-iterator.h"
 
 namespace icing {
 namespace lib {
@@ -39,6 +40,17 @@ class Normalizer {
   // Normalizes the input term based on rules. See implementation classes for
   // specific transformation rules.
   virtual std::string NormalizeTerm(std::string_view term) const = 0;
+
+  // Returns a CharacterIterator pointing to one past the end of the segment of
+  // term that (once normalized) matches with normalized_term.
+  //
+  // Ex. FindNormalizedMatchEndPosition("YELLOW", "yell") will return
+  // CharacterIterator(u8:4, u16:4, u32:4).
+  //
+  // Ex. FindNormalizedMatchEndPosition("YELLOW", "red") will return
+  // CharacterIterator(u8:0, u16:0, u32:0).
+  virtual CharacterIterator FindNormalizedMatchEndPosition(
+      std::string_view term, std::string_view normalized_term) const = 0;
 };
 
 }  // namespace lib
diff --git a/icing/transform/simple/none-normalizer-factory.cc b/icing/transform/simple/none-normalizer-factory.cc
deleted file mode 100644
index 6b35270..0000000
--- a/icing/transform/simple/none-normalizer-factory.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_FACTORY_H_
-#define ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_FACTORY_H_
-
-#include <memory>
-#include <string_view>
-
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/absl_ports/canonical_errors.h"
-#include "icing/transform/normalizer.h"
-#include "icing/transform/simple/none-normalizer.h"
-
-namespace icing {
-namespace lib {
-
-namespace normalizer_factory {
-
-// Creates a dummy normalizer. The term is not normalized, but
-// the text will be truncated to max_term_byte_size if it exceeds the max size.
-//
-// Returns:
-//   A normalizer on success
-//   INVALID_ARGUMENT if max_term_byte_size <= 0
-//   INTERNAL_ERROR on errors
-libtextclassifier3::StatusOr<std::unique_ptr<Normalizer>> Create(
-    int max_term_byte_size) {
-  if (max_term_byte_size <= 0) {
-    return absl_ports::InvalidArgumentError(
-        "max_term_byte_size must be greater than zero.");
-  }
-
-  return std::make_unique<NoneNormalizer>(max_term_byte_size);
-}
-
-}  // namespace normalizer_factory
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_FACTORY_H_
diff --git a/icing/transform/simple/none-normalizer.h b/icing/transform/simple/none-normalizer.h
deleted file mode 100644
index 47085e1..0000000
--- a/icing/transform/simple/none-normalizer.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_H_
-#define ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_H_
-
-#include <string>
-#include <string_view>
-
-#include "icing/transform/normalizer.h"
-
-namespace icing {
-namespace lib {
-
-// This normalizer is not meant for production use. Currently only used to get
-// the Icing library to compile in Jetpack.
-//
-// No normalization is done, but the term is truncated if it exceeds
-// max_term_byte_size.
-class NoneNormalizer : public Normalizer {
- public:
-  explicit NoneNormalizer(int max_term_byte_size)
-      : max_term_byte_size_(max_term_byte_size){};
-
-  std::string NormalizeTerm(std::string_view term) const override {
-    if (term.length() > max_term_byte_size_) {
-      return std::string(term.substr(0, max_term_byte_size_));
-    }
-    return std::string(term);
-  }
-
- private:
-  // The maximum term length allowed after normalization.
-  int max_term_byte_size_;
-};
-
-}  // namespace lib
-}  // namespace icing
-
-#endif  // ICING_TRANSFORM_SIMPLE_NONE_NORMALIZER_H_
diff --git a/icing/transform/simple/none-normalizer_test.cc b/icing/transform/simple/none-normalizer_test.cc
deleted file mode 100644
index e074828..0000000
--- a/icing/transform/simple/none-normalizer_test.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <memory>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "icing/testing/common-matchers.h"
-#include "icing/transform/normalizer-factory.h"
-#include "icing/transform/normalizer.h"
-
-namespace icing {
-namespace lib {
-namespace {
-
-using ::testing::Eq;
-
-TEST(NoneNormalizerTest, Creation) {
-  EXPECT_THAT(normalizer_factory::Create(
-                  /*max_term_byte_size=*/5),
-              IsOk());
-  EXPECT_THAT(normalizer_factory::Create(
-                  /*max_term_byte_size=*/0),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(normalizer_factory::Create(
-                  /*max_term_byte_size=*/-1),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
-TEST(IcuNormalizerTest, NoNormalizationDone) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
-                                                  /*max_term_byte_size=*/1000));
-  EXPECT_THAT(normalizer->NormalizeTerm(""), Eq(""));
-  EXPECT_THAT(normalizer->NormalizeTerm("hello world"), Eq("hello world"));
-
-  // Capitalization
-  EXPECT_THAT(normalizer->NormalizeTerm("MDI"), Eq("MDI"));
-
-  // Accents
-  EXPECT_THAT(normalizer->NormalizeTerm("Zürich"), Eq("Zürich"));
-
-  // Full-width punctuation to ASCII punctuation
-  EXPECT_THAT(normalizer->NormalizeTerm("。，！？：”"), Eq("。，！？：”"));
-
-  // Half-width katakana
-  EXPECT_THAT(normalizer->NormalizeTerm("ｶ"), Eq("ｶ"));
-}
-
-TEST(NoneNormalizerTest, Truncate) {
-  ICING_ASSERT_OK_AND_ASSIGN(auto normalizer, normalizer_factory::Create(
-                                                  /*max_term_byte_size=*/5));
-
-  // Won't be truncated
-  EXPECT_THAT(normalizer->NormalizeTerm("hi"), Eq("hi"));
-  EXPECT_THAT(normalizer->NormalizeTerm("hello"), Eq("hello"));
-
-  // Truncated to length 5.
-  EXPECT_THAT(normalizer->NormalizeTerm("hello!"), Eq("hello"));
-}
-
-}  // namespace
-}  // namespace lib
-}  // namespace icing
diff --git a/icing/util/bit-util.h b/icing/util/bit-util.h
index e2bb817..7ca20b4 100644
--- a/icing/util/bit-util.h
+++ b/icing/util/bit-util.h
@@ -24,19 +24,18 @@ namespace bit_util {
 
 // Manipulating bit fields.
 //
-// x       value containing the bit field(s)
-// offset  offset of bit field in x
-// len     len of bit field in x
+// value       value containing the bit field(s)
+// lsb_offset  offset of bit field in value, starting from the least significant
+//             bit. for example, the '1' in '0100' has a lsb_offset of 2
+// len         len of bit field in value
 //
 // REQUIREMENTS
 //
-// - x an unsigned integer <= 64 bits
-// - offset + len <= sizeof(x) * 8
+// - value is an unsigned integer <= 64 bits
+// - lsb_offset + len <= sizeof(value) * 8
 //
 // There is no error checking so you will get garbage if you don't
 // ensure the above.
-//
-// To set a value, use BITFIELD_CLEAR then BITFIELD_OR.
 
 // Shifting by more than the word length is undefined (on ARM it has the
 // intended effect, but on Intel it shifts by % word length), so check the
@@ -44,20 +43,65 @@ namespace bit_util {
 inline uint64_t BitfieldMask(uint32_t len) {
   return ((len == 0) ? 0U : ((~uint64_t{0}) >> (64 - (len))));
 }
-inline uint64_t BitfieldGet(uint64_t mask, uint32_t lsb_offset, uint32_t len) {
-  return ((mask) >> (lsb_offset)) & BitfieldMask(len);
+
+inline void BitfieldClear(uint32_t lsb_offset, uint32_t len,
+                          uint8_t* value_out) {
+  *value_out &= ~(BitfieldMask(len) << lsb_offset);
+}
+
+inline void BitfieldClear(uint32_t lsb_offset, uint32_t len,
+                          uint16_t* value_out) {
+  *value_out &= ~(BitfieldMask(len) << lsb_offset);
+}
+
+inline void BitfieldClear(uint32_t lsb_offset, uint32_t len,
+                          uint32_t* value_out) {
+  *value_out &= ~(BitfieldMask(len) << lsb_offset);
+}
+
+inline void BitfieldClear(uint32_t lsb_offset, uint32_t len,
+                          uint64_t* value_out) {
+  *value_out &= ~(BitfieldMask(len) << lsb_offset);
+}
+
+inline uint64_t BitfieldGet(uint64_t value, uint32_t lsb_offset, uint32_t len) {
+  return ((value) >> (lsb_offset)) & BitfieldMask(len);
+}
+
+inline void BitfieldSet(uint8_t new_value, uint32_t lsb_offset, uint32_t len,
+                        uint8_t* value_out) {
+  BitfieldClear(lsb_offset, len, value_out);
+
+  // We conservatively mask new_value at len so value won't be corrupted if
+  // new_value >= (1 << len).
+  *value_out |= (new_value & BitfieldMask(len)) << (lsb_offset);
+}
+
+inline void BitfieldSet(uint16_t new_value, uint32_t lsb_offset, uint32_t len,
+                        uint16_t* value_out) {
+  BitfieldClear(lsb_offset, len, value_out);
+
+  // We conservatively mask new_value at len so value won't be corrupted if
+  // new_value >= (1 << len).
+  *value_out |= (new_value & BitfieldMask(len)) << (lsb_offset);
 }
-inline void BitfieldSet(uint32_t value, uint32_t lsb_offset, uint32_t len,
-                        uint32_t* mask) {
-  // We conservatively mask val at len so x won't be corrupted if val >=
-  // 1 << len.
-  *mask |= (uint64_t{value} & BitfieldMask(len)) << (lsb_offset);
+
+inline void BitfieldSet(uint32_t new_value, uint32_t lsb_offset, uint32_t len,
+                        uint32_t* value_out) {
+  BitfieldClear(lsb_offset, len, value_out);
+
+  // We conservatively mask new_value at len so value won't be corrupted if
+  // new_value >= (1 << len).
+  *value_out |= (new_value & BitfieldMask(len)) << (lsb_offset);
 }
-inline void BitfieldSet(uint64_t value, uint32_t lsb_offset, uint32_t len,
-                        uint64_t* mask) {
-  // We conservatively mask val at len so x won't be corrupted if val >=
-  // 1 << len.
-  *mask |= (value & BitfieldMask(len)) << (lsb_offset);
+
+inline void BitfieldSet(uint64_t new_value, uint32_t lsb_offset, uint32_t len,
+                        uint64_t* value_out) {
+  BitfieldClear(lsb_offset, len, value_out);
+
+  // We conservatively mask new_value at len so value won't be corrupted if
+  // new_value >= (1 << len).
+  *value_out |= (new_value & BitfieldMask(len)) << (lsb_offset);
 }
 
 }  // namespace bit_util
diff --git a/icing/util/bit-util_test.cc b/icing/util/bit-util_test.cc
new file mode 100644
index 0000000..3b86a21
--- /dev/null
+++ b/icing/util/bit-util_test.cc
@@ -0,0 +1,145 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/bit-util.h"
+
+#include <memory>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+using ::testing::Eq;
+
+TEST(BitUtilTest, BitfieldMask) {
+  // Check that we can handle up to uint8_t's
+  EXPECT_THAT(bit_util::BitfieldMask(/*len=*/0), Eq(0b0));
+  EXPECT_THAT(bit_util::BitfieldMask(/*len=*/1), Eq(0b01));
+
+  // Check that we can handle up to uint32_t's
+  EXPECT_THAT(bit_util::BitfieldMask(/*len=*/16), Eq(0b01111111111111111));
+
+  // Check that we can handle up to uint64_t's
+  EXPECT_THAT(
+      bit_util::BitfieldMask(/*len=*/63),
+      Eq(0b0111111111111111111111111111111111111111111111111111111111111111));
+}
+
+TEST(BitUtilTest, BitfieldClear) {
+  // Check that we can handle up to uint8_t's
+  uint8_t value_8 = 0b0;
+  bit_util::BitfieldClear(/*lsb_offset=*/0, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b0));
+
+  value_8 = 0b01;
+  bit_util::BitfieldClear(/*lsb_offset=*/0, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b00));
+
+  value_8 = 0b011;
+  bit_util::BitfieldClear(/*lsb_offset=*/1, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b001));
+
+  value_8 = 0b011;
+  bit_util::BitfieldClear(/*lsb_offset=*/0, /*len=*/2, &value_8);
+  EXPECT_THAT(value_8, Eq(0b000));
+
+  value_8 = 0b0110;
+  bit_util::BitfieldClear(/*lsb_offset=*/1, /*len=*/2, &value_8);
+  EXPECT_THAT(value_8, Eq(0b0000));
+
+  // Check that we can handle up to uint32_t's
+  uint32_t value_32 = 0b010000000000000000000000;
+  bit_util::BitfieldClear(/*lsb_offset=*/22, /*len=*/1, &value_32);
+  EXPECT_THAT(value_32, Eq(0b0));
+
+  // Check that we can handle up to uint64_t's
+  uint64_t value_64 = 0b0100000000000000000000000000000000000;
+  bit_util::BitfieldClear(/*lsb_offset=*/35, /*len=*/1, &value_64);
+  EXPECT_THAT(value_64, Eq(0b0));
+}
+
+TEST(BitUtilTest, BitfieldGet) {
+  // Get something in the uint8_t range
+  EXPECT_THAT(bit_util::BitfieldGet(0b0, /*lsb_offset=*/0, /*len=*/1), Eq(0b0));
+  EXPECT_THAT(bit_util::BitfieldGet(0b01, /*lsb_offset=*/0, /*len=*/1),
+              Eq(0b01));
+  EXPECT_THAT(bit_util::BitfieldGet(0b010, /*lsb_offset=*/1, /*len=*/1),
+              Eq(0b01));
+  EXPECT_THAT(bit_util::BitfieldGet(0b001, /*lsb_offset=*/1, /*len=*/1),
+              Eq(0b0));
+  EXPECT_THAT(bit_util::BitfieldGet(0b011, /*lsb_offset=*/0, /*len=*/2),
+              Eq(0b011));
+  EXPECT_THAT(bit_util::BitfieldGet(0b0110, /*lsb_offset=*/1, /*len=*/2),
+              Eq(0b011));
+  EXPECT_THAT(bit_util::BitfieldGet(0b0101, /*lsb_offset=*/0, /*len=*/3),
+              Eq(0b0101));
+
+  // Get something in the uint32_t range
+  EXPECT_THAT(
+      bit_util::BitfieldGet(0b01000000000000, /*lsb_offset=*/12, /*len=*/1),
+      Eq(0b01));
+
+  // Get something in the uint64_t range
+  EXPECT_THAT(bit_util::BitfieldGet(0b010000000000000000000000000000000000,
+                                    /*lsb_offset=*/34, /*len=*/1),
+              Eq(0b01));
+}
+
+TEST(BitUtilTest, BitfieldSet) {
+  // Set something in the uint8_t range
+  uint8_t value_8 = 0b0;
+  bit_util::BitfieldSet(0b0, /*lsb_offset=*/0, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b0));
+
+  value_8 = 0b01;
+  bit_util::BitfieldSet(0b01, /*lsb_offset=*/0, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b01));
+
+  value_8 = 0b00;
+  bit_util::BitfieldSet(0b01, /*lsb_offset=*/0, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b01));
+
+  value_8 = 0b00;
+  bit_util::BitfieldSet(0b011, /*lsb_offset=*/0, /*len=*/2, &value_8);
+  EXPECT_THAT(value_8, Eq(0b011));
+
+  value_8 = 0b01;
+  bit_util::BitfieldSet(0b011, /*lsb_offset=*/0, /*len=*/2, &value_8);
+  EXPECT_THAT(value_8, Eq(0b011));
+
+  value_8 = 0b01;
+  bit_util::BitfieldSet(0b01, /*lsb_offset=*/1, /*len=*/1, &value_8);
+  EXPECT_THAT(value_8, Eq(0b011));
+
+  value_8 = 0b0001;
+  bit_util::BitfieldSet(0b011, /*lsb_offset=*/1, /*len=*/2, &value_8);
+  EXPECT_THAT(value_8, Eq(0b0111));
+
+  // Set something in the uint32_t range
+  uint32_t value_32 = 0b0;
+  bit_util::BitfieldSet(0b01, /*lsb_offset=*/16, /*len=*/1, &value_32);
+  EXPECT_THAT(value_32, Eq(0b010000000000000000));
+
+  // Set something in the uint64_t range
+  uint64_t value_64 = 0b0;
+  bit_util::BitfieldSet(0b01, /*lsb_offset=*/34, /*len=*/1, &value_64);
+  EXPECT_THAT(value_64, Eq(0b010000000000000000000000000000000000));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/character-iterator.cc b/icing/util/character-iterator.cc
new file mode 100644
index 0000000..0ab1e50
--- /dev/null
+++ b/icing/util/character-iterator.cc
@@ -0,0 +1,269 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/character-iterator.h"
+
+#include "icing/util/i18n-utils.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Returns the lead byte of the UTF-8 character that includes the byte at
+// current_byte_index within it.
+int GetUTF8StartPosition(std::string_view text, int current_byte_index) {
+  while (!i18n_utils::IsLeadUtf8Byte(text[current_byte_index])) {
+    --current_byte_index;
+  }
+  return current_byte_index;
+}
+
+}  // namespace
+
+UChar32 CharacterIterator::GetCurrentChar() {
+  if (cached_current_char_ == i18n_utils::kInvalidUChar32) {
+    // Our indices point to the right character, we just need to read that
+    // character. No need to worry about an error. If GetUChar32At fails, then
+    // current_char will be i18n_utils::kInvalidUChar32.
+    cached_current_char_ =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
+  }
+  return cached_current_char_;
+}
+
+bool CharacterIterator::MoveToUtf8(int desired_utf8_index) {
+  return (desired_utf8_index > utf8_index_) ? AdvanceToUtf8(desired_utf8_index)
+                                            : RewindToUtf8(desired_utf8_index);
+}
+
+bool CharacterIterator::AdvanceToUtf8(int desired_utf8_index) {
+  ResetToStartIfNecessary();
+
+  if (desired_utf8_index > text_.length()) {
+    // Enforce the requirement.
+    return false;
+  }
+  // Need to work forwards.
+  UChar32 uchar32 = cached_current_char_;
+  while (utf8_index_ < desired_utf8_index) {
+    uchar32 =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
+    if (uchar32 == i18n_utils::kInvalidUChar32) {
+      // Unable to retrieve a valid UTF-32 character at the previous position.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    int utf8_length = i18n_utils::GetUtf8Length(uchar32);
+    if (utf8_index_ + utf8_length > desired_utf8_index) {
+      // Ah! Don't go too far!
+      break;
+    }
+    utf8_index_ += utf8_length;
+    utf16_index_ += i18n_utils::GetUtf16Length(uchar32);
+    ++utf32_index_;
+  }
+  cached_current_char_ =
+      i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
+  return true;
+}
+
+bool CharacterIterator::RewindToUtf8(int desired_utf8_index) {
+  if (desired_utf8_index < 0) {
+    // Enforce the requirement.
+    return false;
+  }
+  // Need to work backwards.
+  UChar32 uchar32 = cached_current_char_;
+  while (utf8_index_ > desired_utf8_index) {
+    int utf8_index = utf8_index_ - 1;
+    utf8_index = GetUTF8StartPosition(text_, utf8_index);
+    if (utf8_index < 0) {
+      // Somehow, there wasn't a single UTF-8 lead byte at
+      // requested_byte_index or an earlier byte.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    // We've found the start of a unicode char!
+    uchar32 =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index);
+    int expected_length = utf8_index_ - utf8_index;
+    if (uchar32 == i18n_utils::kInvalidUChar32 ||
+        expected_length != i18n_utils::GetUtf8Length(uchar32)) {
+      // Either unable to retrieve a valid UTF-32 character at the previous
+      // position or we skipped past an invalid sequence while seeking the
+      // previous start position.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    cached_current_char_ = uchar32;
+    utf8_index_ = utf8_index;
+    utf16_index_ -= i18n_utils::GetUtf16Length(uchar32);
+    --utf32_index_;
+  }
+  return true;
+}
+
+bool CharacterIterator::MoveToUtf16(int desired_utf16_index) {
+  return (desired_utf16_index > utf16_index_)
+             ? AdvanceToUtf16(desired_utf16_index)
+             : RewindToUtf16(desired_utf16_index);
+}
+
+bool CharacterIterator::AdvanceToUtf16(int desired_utf16_index) {
+  ResetToStartIfNecessary();
+
+  UChar32 uchar32 = cached_current_char_;
+  while (utf16_index_ < desired_utf16_index) {
+    uchar32 =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
+    if (uchar32 == i18n_utils::kInvalidUChar32) {
+      // Unable to retrieve a valid UTF-32 character at the previous position.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    int utf16_length = i18n_utils::GetUtf16Length(uchar32);
+    if (utf16_index_ + utf16_length > desired_utf16_index) {
+      // Ah! Don't go too far!
+      break;
+    }
+    int utf8_length = i18n_utils::GetUtf8Length(uchar32);
+    if (utf8_index_ + utf8_length > text_.length()) {
+      // Enforce the requirement.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    utf8_index_ += utf8_length;
+    utf16_index_ += utf16_length;
+    ++utf32_index_;
+  }
+  cached_current_char_ =
+      i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
+  return true;
+}
+
+bool CharacterIterator::RewindToUtf16(int desired_utf16_index) {
+  if (desired_utf16_index < 0) {
+    return false;
+  }
+  UChar32 uchar32 = cached_current_char_;
+  while (utf16_index_ > desired_utf16_index) {
+    int utf8_index = utf8_index_ - 1;
+    utf8_index = GetUTF8StartPosition(text_, utf8_index);
+    if (utf8_index < 0) {
+      // Somehow, there wasn't a single UTF-8 lead byte at
+      // requested_byte_index or an earlier byte.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    // We've found the start of a unicode char!
+    uchar32 =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index);
+    int expected_length = utf8_index_ - utf8_index;
+    if (uchar32 == i18n_utils::kInvalidUChar32 ||
+        expected_length != i18n_utils::GetUtf8Length(uchar32)) {
+      // Either unable to retrieve a valid UTF-32 character at the previous
+      // position or we skipped past an invalid sequence while seeking the
+      // previous start position.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    cached_current_char_ = uchar32;
+    utf8_index_ = utf8_index;
+    utf16_index_ -= i18n_utils::GetUtf16Length(uchar32);
+    --utf32_index_;
+  }
+  return true;
+}
+
+bool CharacterIterator::MoveToUtf32(int desired_utf32_index) {
+  return (desired_utf32_index > utf32_index_)
+             ? AdvanceToUtf32(desired_utf32_index)
+             : RewindToUtf32(desired_utf32_index);
+}
+
+bool CharacterIterator::AdvanceToUtf32(int desired_utf32_index) {
+  ResetToStartIfNecessary();
+
+  UChar32 uchar32 = cached_current_char_;
+  while (utf32_index_ < desired_utf32_index) {
+    uchar32 =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
+    if (uchar32 == i18n_utils::kInvalidUChar32) {
+      // Unable to retrieve a valid UTF-32 character at the previous position.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    int utf16_length = i18n_utils::GetUtf16Length(uchar32);
+    int utf8_length = i18n_utils::GetUtf8Length(uchar32);
+    if (utf8_index_ + utf8_length > text_.length()) {
+      // Enforce the requirement.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    utf8_index_ += utf8_length;
+    utf16_index_ += utf16_length;
+    ++utf32_index_;
+  }
+  cached_current_char_ =
+      i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index_);
+  return true;
+}
+
+bool CharacterIterator::RewindToUtf32(int desired_utf32_index) {
+  if (desired_utf32_index < 0) {
+    return false;
+  }
+  UChar32 uchar32 = cached_current_char_;
+  while (utf32_index_ > desired_utf32_index) {
+    int utf8_index = utf8_index_ - 1;
+    utf8_index = GetUTF8StartPosition(text_, utf8_index);
+    if (utf8_index < 0) {
+      // Somehow, there wasn't a single UTF-8 lead byte at
+      // requested_byte_index or an earlier byte.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    // We've found the start of a unicode char!
+    uchar32 =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), utf8_index);
+    int expected_length = utf8_index_ - utf8_index;
+    if (uchar32 == i18n_utils::kInvalidUChar32 ||
+        expected_length != i18n_utils::GetUtf8Length(uchar32)) {
+      // Either unable to retrieve a valid UTF-32 character at the previous
+      // position or we skipped past an invalid sequence while seeking the
+      // previous start position.
+      cached_current_char_ = i18n_utils::kInvalidUChar32;
+      return false;
+    }
+    cached_current_char_ = uchar32;
+    utf8_index_ = utf8_index;
+    utf16_index_ -= i18n_utils::GetUtf16Length(uchar32);
+    --utf32_index_;
+  }
+  return true;
+}
+
+void CharacterIterator::ResetToStartIfNecessary() {
+  if (utf8_index_ < 0 || utf16_index_ < 0 || utf32_index_ < 0) {
+    utf8_index_ = 0;
+    utf16_index_ = 0;
+    utf32_index_ = 0;
+    cached_current_char_ =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), 0);
+  }
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/character-iterator.h b/icing/util/character-iterator.h
new file mode 100644
index 0000000..893718a
--- /dev/null
+++ b/icing/util/character-iterator.h
@@ -0,0 +1,116 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_CHARACTER_ITERATOR_H_
+#define ICING_UTIL_CHARACTER_ITERATOR_H_
+
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/i18n-utils.h"
+
+namespace icing {
+namespace lib {
+
+class CharacterIterator {
+ public:
+  explicit CharacterIterator(std::string_view text)
+      : CharacterIterator(text, 0, 0, 0) {}
+
+  CharacterIterator(std::string_view text, int utf8_index, int utf16_index,
+                    int utf32_index)
+      : text_(text),
+        cached_current_char_(i18n_utils::kInvalidUChar32),
+        utf8_index_(utf8_index),
+        utf16_index_(utf16_index),
+        utf32_index_(utf32_index) {}
+
+  // Returns the character that the iterator currently points to.
+  // i18n_utils::kInvalidUChar32 if unable to read that character.
+  UChar32 GetCurrentChar();
+
+  // Moves current position to desired_utf8_index.
+  // REQUIRES: 0 <= desired_utf8_index <= text_.length()
+  bool MoveToUtf8(int desired_utf8_index);
+
+  // Advances from current position to the character that includes the specified
+  // UTF-8 index.
+  // REQUIRES: desired_utf8_index <= text_.length()
+  // desired_utf8_index is allowed to point one index past the end, but no
+  // further.
+  bool AdvanceToUtf8(int desired_utf8_index);
+
+  // Rewinds from current position to the character that includes the specified
+  // UTF-8 index.
+  // REQUIRES: 0 <= desired_utf8_index
+  bool RewindToUtf8(int desired_utf8_index);
+
+  // Moves current position to desired_utf16_index.
+  // REQUIRES: 0 <= desired_utf16_index <= text_.utf16_length()
+  bool MoveToUtf16(int desired_utf16_index);
+
+  // Advances current position to desired_utf16_index.
+  // REQUIRES: desired_utf16_index <= text_.utf16_length()
+  // desired_utf16_index is allowed to point one index past the end, but no
+  // further.
+  bool AdvanceToUtf16(int desired_utf16_index);
+
+  // Rewinds current position to desired_utf16_index.
+  // REQUIRES: 0 <= desired_utf16_index
+  bool RewindToUtf16(int desired_utf16_index);
+
+  // Moves current position to desired_utf32_index.
+  // REQUIRES: 0 <= desired_utf32_index <= text_.utf32_length()
+  bool MoveToUtf32(int desired_utf32_index);
+
+  // Advances current position to desired_utf32_index.
+  // REQUIRES: desired_utf32_index <= text_.utf32_length()
+  // desired_utf32_index is allowed to point one index past the end, but no
+  // further.
+  bool AdvanceToUtf32(int desired_utf32_index);
+
+  // Rewinds current position to desired_utf32_index.
+  // REQUIRES: 0 <= desired_utf32_index
+  bool RewindToUtf32(int desired_utf32_index);
+
+  int utf8_index() const { return utf8_index_; }
+  int utf16_index() const { return utf16_index_; }
+  int utf32_index() const { return utf32_index_; }
+
+  bool operator==(const CharacterIterator& rhs) const {
+    // cached_current_char_ is just that: a cached value. As such, it's not
+    // considered for equality.
+    return text_ == rhs.text_ && utf8_index_ == rhs.utf8_index_ &&
+           utf16_index_ == rhs.utf16_index_ && utf32_index_ == rhs.utf32_index_;
+  }
+
+  std::string DebugString() const {
+    return IcingStringUtil::StringPrintf("(u8:%d,u16:%d,u32:%d)", utf8_index_,
+                                         utf16_index_, utf32_index_);
+  }
+
+ private:
+  // Resets the character iterator to the start of the text if any of the
+  // indices are negative.
+  void ResetToStartIfNecessary();
+
+  std::string_view text_;
+  UChar32 cached_current_char_;
+  int utf8_index_;
+  int utf16_index_;
+  int utf32_index_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_UTIL_CHARACTER_ITERATOR_H_
diff --git a/icing/util/character-iterator_test.cc b/icing/util/character-iterator_test.cc
new file mode 100644
index 0000000..195a47b
--- /dev/null
+++ b/icing/util/character-iterator_test.cc
@@ -0,0 +1,266 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/character-iterator.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/icu-i18n-test-utils.h"
+
+namespace icing {
+namespace lib {
+
+using ::testing::Eq;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+
+TEST(CharacterIteratorTest, BasicUtf8) {
+  constexpr std::string_view kText = "¿Dónde está la biblioteca?";
+  CharacterIterator iterator(kText);
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
+
+  EXPECT_THAT(iterator.AdvanceToUtf8(4), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
+                                   /*utf32_index=*/2)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf8(18), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
+                                   /*utf32_index=*/15)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf8(28), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
+                                   /*utf32_index=*/25)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf8(29), IsTrue());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
+                                   /*utf32_index=*/26)));
+
+  EXPECT_THAT(iterator.RewindToUtf8(28), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
+                                   /*utf32_index=*/25)));
+
+  EXPECT_THAT(iterator.RewindToUtf8(18), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
+                                   /*utf32_index=*/15)));
+
+  EXPECT_THAT(iterator.RewindToUtf8(4), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
+                                   /*utf32_index=*/2)));
+
+  EXPECT_THAT(iterator.RewindToUtf8(0), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
+                                   /*utf32_index=*/0)));
+}
+
+TEST(CharacterIteratorTest, BasicUtf16) {
+  constexpr std::string_view kText = "¿Dónde está la biblioteca?";
+  CharacterIterator iterator(kText);
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
+
+  EXPECT_THAT(iterator.AdvanceToUtf16(2), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
+                                   /*utf32_index=*/2)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf16(15), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
+                                   /*utf32_index=*/15)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf16(25), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
+                                   /*utf32_index=*/25)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf16(26), IsTrue());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
+                                   /*utf32_index=*/26)));
+
+  EXPECT_THAT(iterator.RewindToUtf16(25), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
+                                   /*utf32_index=*/25)));
+
+  EXPECT_THAT(iterator.RewindToUtf16(15), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
+                                   /*utf32_index=*/15)));
+
+  EXPECT_THAT(iterator.RewindToUtf16(2), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
+                                   /*utf32_index=*/2)));
+
+  EXPECT_THAT(iterator.RewindToUtf8(0), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
+                                   /*utf32_index=*/0)));
+}
+
+TEST(CharacterIteratorTest, BasicUtf32) {
+  constexpr std::string_view kText = "¿Dónde está la biblioteca?";
+  CharacterIterator iterator(kText);
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
+
+  EXPECT_THAT(iterator.AdvanceToUtf32(2), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
+                                   /*utf32_index=*/2)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf32(15), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
+                                   /*utf32_index=*/15)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf32(25), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
+                                   /*utf32_index=*/25)));
+
+  EXPECT_THAT(iterator.AdvanceToUtf32(26), IsTrue());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
+                                   /*utf32_index=*/26)));
+
+  EXPECT_THAT(iterator.RewindToUtf32(25), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
+                                   /*utf32_index=*/25)));
+
+  EXPECT_THAT(iterator.RewindToUtf32(15), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
+                                   /*utf32_index=*/15)));
+
+  EXPECT_THAT(iterator.RewindToUtf32(2), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
+                                   /*utf32_index=*/2)));
+
+  EXPECT_THAT(iterator.RewindToUtf32(0), IsTrue());
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
+  EXPECT_THAT(iterator,
+              Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
+                                   /*utf32_index=*/0)));
+}
+
+TEST(CharacterIteratorTest, InvalidUtf) {
+  // "\255" is an invalid sequence.
+  constexpr std::string_view kText = "foo \255 bar";
+  CharacterIterator iterator(kText);
+
+  // Try to advance to the 'b' in 'bar'. This will fail and leave us pointed at
+  // the invalid sequence '\255'. Get CurrentChar() should return an invalid
+  // character.
+  EXPECT_THAT(iterator.AdvanceToUtf8(6), IsFalse());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
+  CharacterIterator exp_iterator(kText, /*utf8_index=*/4, /*utf16_index=*/4,
+                                 /*utf32_index=*/4);
+  EXPECT_THAT(iterator, Eq(exp_iterator));
+
+  EXPECT_THAT(iterator.AdvanceToUtf16(6), IsFalse());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
+  EXPECT_THAT(iterator, Eq(exp_iterator));
+
+  EXPECT_THAT(iterator.AdvanceToUtf32(6), IsFalse());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
+  EXPECT_THAT(iterator, Eq(exp_iterator));
+
+  // Create the iterator with it pointing at the 'b' in 'bar'.
+  iterator = CharacterIterator(kText, /*utf8_index=*/6, /*utf16_index=*/6,
+                               /*utf32_index=*/6);
+  EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
+
+  // Try to advance to the last 'o' in 'foo'. This will fail and leave us
+  // pointed at the ' ' before the invalid sequence '\255'.
+  exp_iterator = CharacterIterator(kText, /*utf8_index=*/5, /*utf16_index=*/5,
+                                   /*utf32_index=*/5);
+  EXPECT_THAT(iterator.RewindToUtf8(2), IsFalse());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
+  EXPECT_THAT(iterator, Eq(exp_iterator));
+
+  EXPECT_THAT(iterator.RewindToUtf16(2), IsFalse());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
+  EXPECT_THAT(iterator, Eq(exp_iterator));
+
+  EXPECT_THAT(iterator.RewindToUtf32(2), IsFalse());
+  EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
+  EXPECT_THAT(iterator, Eq(exp_iterator));
+}
+
+TEST(CharacterIteratorTest, MoveToUtfNegativeIndex) {
+  constexpr std::string_view kText = "¿Dónde está la biblioteca?";
+
+  CharacterIterator iterator_utf8(kText, /*utf8_index=*/-1, /*utf16_index=*/0,
+                             /*utf32_index=*/0);
+  // We should be able to successfully move when the index is negative.
+  EXPECT_THAT(iterator_utf8.MoveToUtf8(0), IsTrue());
+  // The character cache should be reset and contain the first character when
+  // resetting to index 0.
+  EXPECT_THAT(UCharToString(iterator_utf8.GetCurrentChar()), Eq("¿"));
+  EXPECT_THAT(iterator_utf8.utf8_index(), Eq(0));
+  EXPECT_THAT(iterator_utf8.utf16_index(), Eq(0));
+  EXPECT_THAT(iterator_utf8.utf32_index(), Eq(0));
+
+  CharacterIterator iterator_utf16(kText, /*utf8_index=*/0, /*utf16_index=*/-1,
+                             /*utf32_index=*/0);
+  EXPECT_THAT(iterator_utf16.MoveToUtf16(1), IsTrue());
+  EXPECT_THAT(iterator_utf16.GetCurrentChar(), Eq('D'));
+  EXPECT_THAT(iterator_utf16.utf8_index(), Eq(2));
+  EXPECT_THAT(iterator_utf16.utf16_index(), Eq(1));
+  EXPECT_THAT(iterator_utf16.utf32_index(), Eq(1));
+
+  CharacterIterator iterator_utf32(kText, /*utf8_index=*/0, /*utf16_index=*/0,
+                             /*utf32_index=*/-1);
+  EXPECT_THAT(iterator_utf32.MoveToUtf32(2), IsTrue());
+  EXPECT_THAT(UCharToString(iterator_utf32.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator_utf32.utf8_index(), Eq(3));
+  EXPECT_THAT(iterator_utf32.utf16_index(), Eq(2));
+  EXPECT_THAT(iterator_utf32.utf32_index(), Eq(2));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/clock.cc b/icing/util/clock.cc
index 3593f13..270b5f0 100644
--- a/icing/util/clock.cc
+++ b/icing/util/clock.cc
@@ -16,20 +16,31 @@
 
 #include <chrono>  // NOLINT. Abseil library is not available in AOSP so we have
                    // to use chrono to get current time in milliseconds.
+#include <memory>
 
 namespace icing {
 namespace lib {
 
+int64_t GetSteadyTimeNanoseconds() {
+  return std::chrono::duration_cast<std::chrono::nanoseconds>(
+             std::chrono::steady_clock::now().time_since_epoch())
+      .count();
+}
+
+int64_t GetSteadyTimeMilliseconds() {
+  return std::chrono::duration_cast<std::chrono::milliseconds>(
+             std::chrono::steady_clock::now().time_since_epoch())
+      .count();
+}
+
 int64_t Clock::GetSystemTimeMilliseconds() const {
   return std::chrono::duration_cast<std::chrono::milliseconds>(
              std::chrono::system_clock::now().time_since_epoch())
       .count();
 }
 
-uint64_t GetSteadyTimeNanoseconds() {
-  return std::chrono::duration_cast<std::chrono::nanoseconds>(
-             std::chrono::steady_clock::now().time_since_epoch())
-      .count();
+std::unique_ptr<Timer> Clock::GetNewTimer() const {
+  return std::make_unique<Timer>();
 }
 
 }  // namespace lib
diff --git a/icing/util/clock.h b/icing/util/clock.h
index 58628f3..d987a4c 100644
--- a/icing/util/clock.h
+++ b/icing/util/clock.h
@@ -16,10 +16,45 @@
 #define ICING_UTIL_CLOCK_H_
 
 #include <cstdint>
+#include <functional>
+#include <memory>
 
 namespace icing {
 namespace lib {
 
+// Returns the current steady time in nanoseconds. The steady clock is different
+// from the system clock. It's monotonic and never returns a lower value than a
+// previous call, while a system clock can be occasionally adjusted.
+int64_t GetSteadyTimeNanoseconds();
+
+// Returns the current steady time in Milliseconds. The steady clock is
+// different from the system clock. It's monotonic and never returns a lower
+// value than a previous call, while a system clock can be occasionally
+// adjusted.
+int64_t GetSteadyTimeMilliseconds();
+
+// Used to calculate the elapsed time.
+class Timer {
+ public:
+  // Creates and starts the timer.
+  Timer() : start_timestamp_nanoseconds_(GetSteadyTimeNanoseconds()) {}
+
+  virtual ~Timer() = default;
+
+  // Returns the elapsed time from when timer started.
+  virtual int64_t GetElapsedMilliseconds() const {
+    return GetElapsedNanoseconds() / 1000000;
+  }
+
+  // Returns the elapsed time from when timer started.
+  virtual int64_t GetElapsedNanoseconds() const {
+    return GetSteadyTimeNanoseconds() - start_timestamp_nanoseconds_;
+  }
+
+ private:
+  int64_t start_timestamp_nanoseconds_;
+};
+
 // Wrapper around real-time clock functions. This is separated primarily so
 // tests can override this clock and inject it into the class under test.
 class Clock {
@@ -29,12 +64,39 @@ class Clock {
   // Returns the current time in milliseconds, it's guaranteed that the return
   // value is non-negative.
   virtual int64_t GetSystemTimeMilliseconds() const;
+
+  // Returns a timer used to calculate the elapsed time. The timer starts when
+  // the method returns.
+  virtual std::unique_ptr<Timer> GetNewTimer() const;
 };
 
-// Returns the current steady time in nanoseconds. The steady clock is different
-// from the system clock. It's monotonic and never returns a lower value than a
-// previous call, while a system clock can be occasionally adjusted.
-uint64_t GetSteadyTimeNanoseconds();
+// A convenient RAII timer class that receives a callback. Upon destruction, the
+// callback will be called with the elapsed milliseconds or nanoseconds passed
+// as a parameter, depending on which Unit was passed in the constructor.
+class ScopedTimer {
+ public:
+  enum class Unit { kMillisecond, kNanosecond };
+
+  ScopedTimer(std::unique_ptr<Timer> timer,
+              std::function<void(int64_t)> callback,
+              Unit unit = Unit::kMillisecond)
+      : timer_(std::move(timer)), callback_(std::move(callback)), unit_(unit) {}
+
+  ~ScopedTimer() {
+    if (unit_ == Unit::kMillisecond) {
+      callback_(timer_->GetElapsedMilliseconds());
+    } else {
+      callback_(timer_->GetElapsedNanoseconds());
+    }
+  }
+
+  const Timer& timer() const { return *timer_; }
+
+ private:
+  std::unique_ptr<Timer> timer_;
+  std::function<void(int64_t)> callback_;
+  Unit unit_;
+};
 
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/util/crc32.h b/icing/util/crc32.h
index e8c7c8f..207a80a 100644
--- a/icing/util/crc32.h
+++ b/icing/util/crc32.h
@@ -28,10 +28,6 @@ namespace lib {
 // implementation.
 //
 // See https://www.zlib.net/manual.html#Checksum for more details.
-//
-// TODO (samzheng): investigate/benchmark swapping zlib crc32 with
-// util/hash/crc32c.h. Regarding util/hash/crc32c.h, CRC32C::Extend crashes as
-// described in b/145837799.
 class Crc32 {
  public:
   // Default to the checksum of an empty string, that is "0".
@@ -39,6 +35,8 @@ class Crc32 {
 
   explicit Crc32(uint32_t init_crc) : crc_(init_crc) {}
 
+  explicit Crc32(std::string_view str) : crc_(0) { Append(str); }
+
   inline bool operator==(const Crc32& other) const {
     return crc_ == other.Get();
   }
diff --git a/icing/util/data-loss.h b/icing/util/data-loss.h
new file mode 100644
index 0000000..cb19ce2
--- /dev/null
+++ b/icing/util/data-loss.h
@@ -0,0 +1,36 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_DATA_LOSS_H_
+#define ICING_UTIL_DATA_LOSS_H_
+
+namespace icing {
+namespace lib {
+
+enum DataLoss {
+  // No data loss happened. Everything initialized correctly.
+  NONE,
+
+  // Anything changes made after a persist to disk call were lost. This includes
+  // adding new data, removing old data, and modifying existing data.
+  PARTIAL,
+
+  // All data is lost. IcingSearchEngine has completely reset.
+  COMPLETE
+};
+
+}
+}  // namespace icing
+
+#endif  // ICING_UTIL_DATA_LOSS_H_
diff --git a/icing/util/document-validator.cc b/icing/util/document-validator.cc
index 36b84f8..e0880ea 100644
--- a/icing/util/document-validator.cc
+++ b/icing/util/document-validator.cc
@@ -19,6 +19,8 @@
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/absl_ports/canonical_errors.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
 #include "icing/schema/schema-util.h"
 #include "icing/util/status-macros.h"
 
@@ -32,12 +34,13 @@ DocumentValidator::DocumentValidator(const SchemaStore* schema_store)
     : schema_store_(schema_store) {}
 
 libtextclassifier3::Status DocumentValidator::Validate(
-    const DocumentProto& document) {
+    const DocumentProto& document, int depth) {
   if (document.namespace_().empty()) {
     return absl_ports::InvalidArgumentError("Field 'namespace' is empty.");
   }
 
-  if (document.uri().empty()) {
+  // Only require a non-empty uri on top-level documents.
+  if (depth == 0 && document.uri().empty()) {
     return absl_ports::InvalidArgumentError("Field 'uri' is empty.");
   }
 
@@ -96,12 +99,12 @@ libtextclassifier3::Status DocumentValidator::Validate(
     if (property_iter == parsed_property_configs.property_config_map.end()) {
       return absl_ports::NotFoundError(absl_ports::StrCat(
           "Property config '", property.name(), "' not found for key: (",
-          document.namespace_(), ", ", document.uri(), ")."));
+          document.namespace_(), ", ", document.uri(),
+          ") of type: ", document.schema(), "."));
     }
     const PropertyConfigProto& property_config = *property_iter->second;
 
     // Get the property value size according to data type.
-    // TODO (samzheng): make sure values of other data types are empty.
     int value_size = 0;
     if (property_config.data_type() == PropertyConfigProto::DataType::STRING) {
       value_size = property.string_values_size();
@@ -148,24 +151,28 @@ libtextclassifier3::Status DocumentValidator::Validate(
     // fail, we don't need to validate the extra documents.
     if (property_config.data_type() ==
         PropertyConfigProto::DataType::DOCUMENT) {
-      const std::string_view nested_type_expected =
-          property_config.schema_type();
+      ICING_ASSIGN_OR_RETURN(
+          const std::unordered_set<SchemaTypeId>* nested_type_ids_expected,
+          schema_store_->GetSchemaTypeIdsWithChildren(
+              property_config.schema_type()));
       for (const DocumentProto& nested_document : property.document_values()) {
-        if (nested_type_expected.compare(nested_document.schema()) != 0) {
+        libtextclassifier3::StatusOr<SchemaTypeId> nested_document_type_id_or =
+            schema_store_->GetSchemaTypeId(nested_document.schema());
+        if (!nested_document_type_id_or.ok() ||
+            nested_type_ids_expected->count(
+                nested_document_type_id_or.ValueOrDie()) == 0) {
           return absl_ports::InvalidArgumentError(absl_ports::StrCat(
-              "Property '", property.name(), "' should have type '",
-              nested_type_expected,
-              "' but actual "
-              "value has type '",
+              "Property '", property.name(), "' should be type or subtype of '",
+              property_config.schema_type(), "' but actual value has type '",
               nested_document.schema(), "' for key: (", document.namespace_(),
               ", ", document.uri(), ")."));
         }
-        ICING_RETURN_IF_ERROR(Validate(nested_document));
+        ICING_RETURN_IF_ERROR(Validate(nested_document, depth + 1));
       }
     }
   }
   if (num_required_properties_actual <
-      parsed_property_configs.num_required_properties) {
+      parsed_property_configs.required_properties.size()) {
     return absl_ports::InvalidArgumentError(
         absl_ports::StrCat("One or more required fields missing for key: (",
                            document.namespace_(), ", ", document.uri(), ")."));
diff --git a/icing/util/document-validator.h b/icing/util/document-validator.h
index 34a3217..28dd940 100644
--- a/icing/util/document-validator.h
+++ b/icing/util/document-validator.h
@@ -17,7 +17,6 @@
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/proto/document.pb.h"
-#include "icing/proto/schema.pb.h"
 #include "icing/schema/schema-store.h"
 
 namespace icing {
@@ -32,7 +31,8 @@ class DocumentValidator {
 
   // This function validates:
   //  1. DocumentProto.namespace is not empty
-  //  2. DocumentProto.uri is not empty
+  //  2. DocumentProto.uri is not empty in top-level documents. Nested documents
+  //     may have empty uris.
   //  3. DocumentProto.schema is not empty
   //  4. DocumentProto.schema matches one of SchemaTypeConfigProto.schema_type
   //     in the given SchemaProto in constructor
@@ -56,7 +56,8 @@ class DocumentValidator {
   // In addition, all nested DocumentProto will also be validated towards the
   // requirements above.
   //
-  // DocumentProto.custom_properties are not validated.
+  // 'depth' indicates what nesting level the document may be at. A top-level
+  // document has a nesting depth of 0.
   //
   // Returns:
   //   OK on success
@@ -65,7 +66,8 @@ class DocumentValidator {
   //   NOT_FOUND if case 4 or 7 fails
   //   ALREADY_EXISTS if case 6 fails
   //   INTERNAL on any I/O error
-  libtextclassifier3::Status Validate(const DocumentProto& document);
+  libtextclassifier3::Status Validate(const DocumentProto& document,
+                                      int depth = 0);
 
   void UpdateSchemaStore(const SchemaStore* schema_store) {
     schema_store_ = schema_store;
diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc
index 16bdf78..9d10b36 100644
--- a/icing/util/document-validator_test.cc
+++ b/icing/util/document-validator_test.cc
@@ -20,24 +20,31 @@
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
+#include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
 #include "icing/testing/tmp-directory.h"
 
 namespace icing {
 namespace lib {
 
 namespace {
+
 using ::testing::HasSubstr;
 
-// type and property names of EmailMessage
+// type and property names of EmailMessage and EmailMessageWithNote
 constexpr char kTypeEmail[] = "EmailMessage";
+constexpr char kTypeEmailWithNote[] = "EmailMessageWithNote";
 constexpr char kPropertySubject[] = "subject";
 constexpr char kPropertyText[] = "text";
 constexpr char kPropertyRecipients[] = "recipients";
+constexpr char kPropertyNote[] = "note";
 // type and property names of Conversation
 constexpr char kTypeConversation[] = "Conversation";
+constexpr char kTypeConversationWithEmailNote[] = "ConversationWithEmailNote";
 constexpr char kPropertyName[] = "name";
 constexpr char kPropertyEmails[] = "emails";
 // Other values
@@ -49,41 +56,86 @@ class DocumentValidatorTest : public ::testing::Test {
   DocumentValidatorTest() {}
 
   void SetUp() override {
-    SchemaProto schema;
-    auto type_config = schema.add_types();
-    CreateEmailTypeConfig(type_config);
-
-    type_config = schema.add_types();
-    CreateConversationTypeConfig(type_config);
-
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kTypeEmail)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertySubject)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_REQUIRED))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyText)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyRecipients)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_REPEATED)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kTypeEmailWithNote)
+                    .AddParentType(kTypeEmail)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertySubject)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_REQUIRED))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyText)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyRecipients)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_REPEATED))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyNote)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kTypeConversation)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyName)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_REQUIRED))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kPropertyEmails)
+                            .SetDataTypeDocument(
+                                kTypeEmail, /*index_nested_properties=*/true)
+                            .SetCardinality(CARDINALITY_REPEATED)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kTypeConversationWithEmailNote)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyName)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_REQUIRED))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyEmails)
+                                     .SetDataTypeDocument(
+                                         kTypeEmailWithNote,
+                                         /*index_nested_properties=*/true)
+                                     .SetCardinality(CARDINALITY_REPEATED)))
+            .Build();
+
+    schema_dir_ = GetTestTempDir() + "/schema_store";
+    ASSERT_TRUE(filesystem_.CreateDirectory(schema_dir_.c_str()));
     ICING_ASSERT_OK_AND_ASSIGN(
-        schema_store_, SchemaStore::Create(&filesystem_, GetTestTempDir()));
-    ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_dir_, &fake_clock_));
+    ASSERT_THAT(schema_store_->SetSchema(
+                    schema, /*ignore_errors_and_delete_documents=*/false,
+                    /*allow_circular_schema_definitions=*/false),
+                IsOk());
 
     document_validator_ =
         std::make_unique<DocumentValidator>(schema_store_.get());
   }
 
-  static void CreateEmailTypeConfig(SchemaTypeConfigProto* type_config) {
-    type_config->set_schema_type(kTypeEmail);
-
-    auto subject = type_config->add_properties();
-    subject->set_property_name(kPropertySubject);
-    subject->set_data_type(PropertyConfigProto::DataType::STRING);
-    subject->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-    auto text = type_config->add_properties();
-    text->set_property_name(kPropertyText);
-    text->set_data_type(PropertyConfigProto::DataType::STRING);
-    text->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
-    auto recipients = type_config->add_properties();
-    recipients->set_property_name(kPropertyRecipients);
-    recipients->set_data_type(PropertyConfigProto::DataType::STRING);
-    recipients->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-  }
-
-  static DocumentBuilder SimpleEmailBuilder() {
+  DocumentBuilder SimpleEmailBuilder() {
     return DocumentBuilder()
         .SetKey(kDefaultNamespace, "email/1")
         .SetSchema(kTypeEmail)
@@ -93,22 +145,18 @@ class DocumentValidatorTest : public ::testing::Test {
                            kDefaultString);
   }
 
-  static void CreateConversationTypeConfig(SchemaTypeConfigProto* type_config) {
-    type_config->set_schema_type(kTypeConversation);
-
-    auto name = type_config->add_properties();
-    name->set_property_name(kPropertyName);
-    name->set_data_type(PropertyConfigProto::DataType::STRING);
-    name->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-    auto emails = type_config->add_properties();
-    emails->set_property_name(kPropertyEmails);
-    emails->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-    emails->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-    emails->set_schema_type(kTypeEmail);
+  DocumentBuilder SimpleEmailWithNoteBuilder() {
+    return DocumentBuilder()
+        .SetKey(kDefaultNamespace, "email_with_note/1")
+        .SetSchema(kTypeEmailWithNote)
+        .AddStringProperty(kPropertySubject, kDefaultString)
+        .AddStringProperty(kPropertyText, kDefaultString)
+        .AddStringProperty(kPropertyRecipients, kDefaultString, kDefaultString,
+                           kDefaultString)
+        .AddStringProperty(kPropertyNote, kDefaultString);
   }
 
-  static DocumentBuilder SimpleConversationBuilder() {
+  DocumentBuilder SimpleConversationBuilder() {
     return DocumentBuilder()
         .SetKey(kDefaultNamespace, "conversation/1")
         .SetSchema(kTypeConversation)
@@ -118,9 +166,11 @@ class DocumentValidatorTest : public ::testing::Test {
                              SimpleEmailBuilder().Build());
   }
 
-  std::unique_ptr<DocumentValidator> document_validator_;
-  std::unique_ptr<SchemaStore> schema_store_;
+  std::string schema_dir_;
   Filesystem filesystem_;
+  FakeClock fake_clock_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentValidator> document_validator_;
 };
 
 TEST_F(DocumentValidatorTest, ValidateSimpleSchemasOk) {
@@ -138,13 +188,27 @@ TEST_F(DocumentValidatorTest, ValidateEmptyNamespaceInvalid) {
                        HasSubstr("'namespace' is empty")));
 }
 
-TEST_F(DocumentValidatorTest, ValidateEmptyUriInvalid) {
+TEST_F(DocumentValidatorTest, ValidateTopLevelEmptyUriInvalid) {
   DocumentProto email = SimpleEmailBuilder().SetUri("").Build();
   EXPECT_THAT(document_validator_->Validate(email),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
                        HasSubstr("'uri' is empty")));
 }
 
+TEST_F(DocumentValidatorTest, ValidateNestedEmptyUriValid) {
+  DocumentProto conversation =
+      SimpleConversationBuilder()
+          .ClearProperties()
+          .AddStringProperty(kPropertyName, kDefaultString)
+          .AddDocumentProperty(kPropertyEmails,
+                               SimpleEmailBuilder()
+                                   .SetUri("")  // Empty nested uri
+                                   .Build())
+          .Build();
+
+  EXPECT_THAT(document_validator_->Validate(conversation), IsOk());
+}
+
 TEST_F(DocumentValidatorTest, ValidateEmptySchemaInvalid) {
   DocumentProto email = SimpleEmailBuilder().SetSchema("").Build();
   EXPECT_THAT(document_validator_->Validate(email),
@@ -192,18 +256,6 @@ TEST_F(DocumentValidatorTest, ValidateNonexistentPropertyNotFound) {
                        HasSubstr("'WrongPropertyName' not found")));
 }
 
-TEST_F(DocumentValidatorTest, ValidateAllCustomPropertyOk) {
-  DocumentProto email =
-      SimpleEmailBuilder()
-          // A nonexistent property, would've triggered a NotFound message
-          .AddCustomStringProperty("WrongPropertyName", kDefaultString)
-          // 'subject' property should've been a string according to the schema
-          .AddCustomBooleanProperty(kPropertySubject, false, true)
-          .Build();
-
-  EXPECT_THAT(document_validator_->Validate(email), IsOk());
-}
-
 TEST_F(DocumentValidatorTest, ValidateExactlyOneRequiredValueOk) {
   // Required property should have exactly 1 value
   DocumentProto email =
@@ -297,10 +349,82 @@ TEST_F(DocumentValidatorTest,
               SimpleEmailBuilder().Build())
           .Build();
 
-  EXPECT_THAT(document_validator_->Validate(conversation),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
-                       HasSubstr("'emails' should have type 'EmailMessage' but "
-                                 "actual value has type 'Conversation'")));
+  EXPECT_THAT(
+      document_validator_->Validate(conversation),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("'emails' should be type or subtype of 'EmailMessage' "
+                         "but actual value has type 'Conversation'")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateNestedPropertyMatchSubtypeOk) {
+  DocumentProto conversation =
+      DocumentBuilder()
+          .SetKey(kDefaultNamespace, "conversation/1")
+          .SetSchema(kTypeConversation)
+          .AddStringProperty(kPropertyName, kDefaultString)
+          .AddDocumentProperty(kPropertyEmails, SimpleEmailBuilder().Build(),
+                               // This is a subtype, which is ok.
+                               SimpleEmailWithNoteBuilder().Build(),
+                               SimpleEmailBuilder().Build())
+          .Build();
+
+  EXPECT_THAT(document_validator_->Validate(conversation), IsOk());
+}
+
+TEST_F(DocumentValidatorTest, ValidateNestedPropertyNonexistentTypeInvalid) {
+  DocumentProto conversation =
+      DocumentBuilder()
+          .SetKey(kDefaultNamespace, "conversation/1")
+          .SetSchema(kTypeConversation)
+          .AddStringProperty(kPropertyName, kDefaultString)
+          .AddDocumentProperty(
+              kPropertyEmails, SimpleEmailBuilder().Build(),
+              // Nonexistent type is not allowed
+              DocumentBuilder()
+                  .SetKey(kDefaultNamespace, "email_with_note/1")
+                  .SetSchema("Nonexistent")
+                  .Build(),
+              SimpleEmailBuilder().Build())
+          .Build();
+
+  EXPECT_THAT(
+      document_validator_->Validate(conversation),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("'emails' should be type or subtype of 'EmailMessage' "
+                         "but actual value has type 'Nonexistent'")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateNestedPropertyMatchSuperTypeInvalid) {
+  DocumentProto conversation1 =
+      DocumentBuilder()
+          .SetKey(kDefaultNamespace, "conversation_with_email_note/1")
+          .SetSchema(kTypeConversationWithEmailNote)
+          .AddStringProperty(kPropertyName, kDefaultString)
+          .AddDocumentProperty(kPropertyEmails,
+                               SimpleEmailWithNoteBuilder().Build(),
+                               SimpleEmailWithNoteBuilder().Build(),
+                               SimpleEmailWithNoteBuilder().Build())
+          .Build();
+  EXPECT_THAT(document_validator_->Validate(conversation1), IsOk());
+
+  DocumentProto conversation2 =
+      DocumentBuilder()
+          .SetKey(kDefaultNamespace, "conversation_with_email_note/2")
+          .SetSchema(kTypeConversationWithEmailNote)
+          .AddStringProperty(kPropertyName, kDefaultString)
+          .AddDocumentProperty(kPropertyEmails,
+                               SimpleEmailWithNoteBuilder().Build(),
+                               // This is a super type, which is not ok.
+                               SimpleEmailBuilder().Build(),
+                               SimpleEmailWithNoteBuilder().Build())
+          .Build();
+  EXPECT_THAT(
+      document_validator_->Validate(conversation2),
+      StatusIs(
+          libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+          HasSubstr(
+              "'emails' should be type or subtype of 'EmailMessageWithNote' "
+              "but actual value has type 'EmailMessage'")));
 }
 
 TEST_F(DocumentValidatorTest, ValidateNestedPropertyInvalid) {
@@ -321,12 +445,26 @@ TEST_F(DocumentValidatorTest, ValidateNestedPropertyInvalid) {
 }
 
 TEST_F(DocumentValidatorTest, HandleTypeConfigMapChangesOk) {
-  SchemaProto email_schema;
-  auto type_config = email_schema.add_types();
-  CreateEmailTypeConfig(type_config);
+  SchemaProto email_schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kTypeEmail)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName(kPropertySubject)
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName(kPropertyText)
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName(kPropertyRecipients)
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
-  // Create a custom directory so we don't collide with the test's preset schema
-  // in SetUp
+  // Create a custom directory so we don't collide
+  // with the test's preset schema in SetUp
   const std::string custom_schema_dir = GetTestTempDir() + "/custom_schema";
   filesystem_.DeleteDirectoryRecursively(custom_schema_dir.c_str());
   filesystem_.CreateDirectoryRecursively(custom_schema_dir.c_str());
@@ -334,8 +472,11 @@ TEST_F(DocumentValidatorTest, HandleTypeConfigMapChangesOk) {
   // Set a schema with only the 'Email' type
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
-      SchemaStore::Create(&filesystem_, custom_schema_dir));
-  ASSERT_THAT(schema_store->SetSchema(email_schema), IsOk());
+      SchemaStore::Create(&filesystem_, custom_schema_dir, &fake_clock_));
+  ASSERT_THAT(schema_store->SetSchema(
+                  email_schema, /*ignore_errors_and_delete_documents=*/false,
+                  /*allow_circular_schema_definitions=*/false),
+              IsOk());
 
   DocumentValidator document_validator(schema_store.get());
 
@@ -347,13 +488,29 @@ TEST_F(DocumentValidatorTest, HandleTypeConfigMapChangesOk) {
                        HasSubstr("'Conversation' not found")));
 
   // Add the 'Conversation' type
-  SchemaProto email_and_conversation_schema = email_schema;
-  type_config = email_and_conversation_schema.add_types();
-  CreateConversationTypeConfig(type_config);
+  SchemaProto email_and_conversation_schema =
+      SchemaBuilder(email_schema)
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType(kTypeConversation)
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName(kPropertyName)
+                                        .SetDataType(TYPE_STRING)
+                                        .SetCardinality(CARDINALITY_REQUIRED))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName(kPropertyEmails)
+                               .SetDataTypeDocument(
+                                   kTypeEmail, /*index_nested_properties=*/true)
+                               .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
 
   // DocumentValidator should be able to handle the SchemaStore getting updated
   // separately
-  ASSERT_THAT(schema_store->SetSchema(email_and_conversation_schema), IsOk());
+  ASSERT_THAT(
+      schema_store->SetSchema(email_and_conversation_schema,
+                              /*ignore_errors_and_delete_documents=*/false,
+                              /*allow_circular_schema_definitions=*/false),
+      IsOk());
 
   ICING_EXPECT_OK(document_validator.Validate(conversation));
 }
diff --git a/icing/util/encode-util.cc b/icing/util/encode-util.cc
new file mode 100644
index 0000000..2642da7
--- /dev/null
+++ b/icing/util/encode-util.cc
@@ -0,0 +1,50 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/encode-util.h"
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+namespace icing {
+namespace lib {
+
+namespace encode_util {
+
+std::string EncodeIntToCString(uint64_t value) {
+  std::string encoded_str;
+  // Encode it in base128 and add 1 to make sure that there is no 0-byte. This
+  // increases the size of the encoded_str from 8-bytes to 10-bytes at worst.
+  do {
+    encoded_str.push_back((value & 0x7F) + 1);
+    value >>= 7;
+  } while (value);
+  return encoded_str;
+}
+
+uint64_t DecodeIntFromCString(std::string_view encoded_str) {
+  uint64_t value = 0;
+  for (int i = encoded_str.length() - 1; i >= 0; --i) {
+    value <<= 7;
+    char c = encoded_str[i] - 1;
+    value |= (c & 0x7F);
+  }
+  return value;
+}
+
+}  // namespace encode_util
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/encode-util.h b/icing/util/encode-util.h
new file mode 100644
index 0000000..5a31acb
--- /dev/null
+++ b/icing/util/encode-util.h
@@ -0,0 +1,45 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_ENCODE_UTIL_H_
+#define ICING_UTIL_ENCODE_UTIL_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+namespace icing {
+namespace lib {
+
+namespace encode_util {
+
+// Converts an unsigned 64-bit integer to a C string that doesn't contain 0-byte
+// since C string uses 0-byte as terminator. This increases the size of the
+// encoded_str from 8-bytes to 10-bytes at worst.
+//
+// Note that it is compatible with unsigned 32-bit integers, i.e. casting an
+// uint32_t to uint64_t with the same value and encoding it by this method will
+// get the same string.
+std::string EncodeIntToCString(uint64_t value);
+
+// Converts a C string (encoded from EncodeIntToCString()) to an unsigned 64-bit
+// integer.
+uint64_t DecodeIntFromCString(std::string_view encoded_str);
+
+}  // namespace encode_util
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_UTIL_ENCODE_UTIL_H_
diff --git a/icing/util/encode-util_test.cc b/icing/util/encode-util_test.cc
new file mode 100644
index 0000000..c6cb984
--- /dev/null
+++ b/icing/util/encode-util_test.cc
@@ -0,0 +1,91 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/encode-util.h"
+
+#include <cstdint>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+namespace encode_util {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::SizeIs;
+
+TEST(EncodeUtilTest, IntCStringZeroConversion) {
+  uint64_t value = 0;
+  std::string encoded_str = EncodeIntToCString(value);
+
+  EXPECT_THAT(encoded_str, SizeIs(Gt(0)));
+  EXPECT_THAT(DecodeIntFromCString(encoded_str), Eq(value));
+}
+
+TEST(EncodeUtilTest, IntCStringConversionIsReversible) {
+  uint64_t value = 123456;
+  std::string encoded_str = EncodeIntToCString(value);
+  EXPECT_THAT(DecodeIntFromCString(encoded_str), Eq(value));
+}
+
+TEST(EncodeUtilTest, MultipleIntCStringConversionsAreReversible) {
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(25)), Eq(25));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(766)), Eq(766));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(2305)), Eq(2305));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(6922)), Eq(6922));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(62326)), Eq(62326));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(186985)), Eq(186985));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(560962)), Eq(560962));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(1682893)), Eq(1682893));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(15146065)), Eq(15146065));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(136314613)),
+              Eq(136314613));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(1226831545)),
+              Eq(1226831545));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(11041483933)),
+              Eq(11041483933));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(2683080596566)),
+              Eq(2683080596566));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(72443176107373)),
+              Eq(72443176107373));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(1955965754899162)),
+              Eq(1955965754899162));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(52811075382277465)),
+              Eq(52811075382277465));
+  EXPECT_THAT(DecodeIntFromCString(EncodeIntToCString(4277697105964474945)),
+              Eq(4277697105964474945));
+}
+
+TEST(EncodeUtilTest, MultipleValidEncodedCStringIntConversionsAreReversible) {
+  // Only valid encoded C string (no zero bytes, length is between 1 and 10) are
+  // reversible.
+  EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("foo")), Eq("foo"));
+  EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("bar")), Eq("bar"));
+  EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("baz")), Eq("baz"));
+  EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("Icing")), Eq("Icing"));
+  EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("Google")), Eq("Google"));
+  EXPECT_THAT(EncodeIntToCString(DecodeIntFromCString("Youtube")),
+              Eq("Youtube"));
+}
+
+}  // namespace
+
+}  // namespace encode_util
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/fingerprint-util.cc b/icing/util/fingerprint-util.cc
new file mode 100644
index 0000000..0ea843f
--- /dev/null
+++ b/icing/util/fingerprint-util.cc
@@ -0,0 +1,48 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/fingerprint-util.h"
+
+namespace icing {
+namespace lib {
+
+namespace fingerprint_util {
+
+// A formatter to properly handle a string that is actually just a hash value.
+std::string GetFingerprintString(uint64_t fingerprint) {
+  std::string encoded_fprint;
+  // DynamicTrie cannot handle keys with '0' as bytes. So, we encode it in
+  // base128 and add 1 to make sure that no byte is '0'. This increases the
+  // size of the encoded_fprint from 8-bytes to 10-bytes.
+  while (fingerprint) {
+    encoded_fprint.push_back((fingerprint & 0x7F) + 1);
+    fingerprint >>= 7;
+  }
+  return encoded_fprint;
+}
+
+uint64_t GetFingerprint(std::string_view fingerprint_string) {
+  uint64_t fprint = 0;
+  for (int i = fingerprint_string.length() - 1; i >= 0; --i) {
+    fprint <<= 7;
+    char c = fingerprint_string[i] - 1;
+    fprint |= (c & 0x7F);
+  }
+  return fprint;
+}
+
+}  // namespace fingerprint_util
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/fingerprint-util.h b/icing/util/fingerprint-util.h
new file mode 100644
index 0000000..9e98617
--- /dev/null
+++ b/icing/util/fingerprint-util.h
@@ -0,0 +1,47 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_FINGERPRINT_UTIL_H_
+#define ICING_UTIL_FINGERPRINT_UTIL_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+namespace icing {
+namespace lib {
+
+namespace fingerprint_util {
+
+// Converts from a fingerprint to a fingerprint string.
+std::string GetFingerprintString(uint64_t fingerprint);
+
+// Converts from a fingerprint string to a fingerprint.
+uint64_t GetFingerprint(std::string_view fingerprint_string);
+
+// A formatter to properly handle a string that is actually just a hash value.
+class FingerprintStringFormatter {
+ public:
+  std::string operator()(std::string_view fingerprint_string) {
+    uint64_t fingerprint = GetFingerprint(fingerprint_string);
+    return std::to_string(fingerprint);
+  }
+};
+
+}  // namespace fingerprint_util
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_UTIL_FINGERPRINT_UTIL_H_
diff --git a/icing/util/fingerprint-util_test.cc b/icing/util/fingerprint-util_test.cc
new file mode 100644
index 0000000..948c75a
--- /dev/null
+++ b/icing/util/fingerprint-util_test.cc
@@ -0,0 +1,75 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/fingerprint-util.h"
+
+#include <cstdint>
+#include <limits>
+
+#include "icing/text_classifier/lib3/utils/hash/farmhash.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+namespace fingerprint_util {
+
+namespace {
+
+using ::testing::Eq;
+
+TEST(FingerprintUtilTest, ConversionIsReversible) {
+  std::string str = "foo-bar-baz";
+  uint64_t fprint = tc3farmhash::Fingerprint64(str);
+  std::string fprint_string = GetFingerprintString(fprint);
+  EXPECT_THAT(GetFingerprint(fprint_string), Eq(fprint));
+}
+
+TEST(FingerprintUtilTest, ZeroConversionIsReversible) {
+  uint64_t fprint = 0;
+  std::string fprint_string = GetFingerprintString(fprint);
+  EXPECT_THAT(GetFingerprint(fprint_string), Eq(fprint));
+}
+
+TEST(FingerprintUtilTest, MultipleConversionsAreReversible) {
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(25)), Eq(25));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(766)), Eq(766));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(2305)), Eq(2305));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(6922)), Eq(6922));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(62326)), Eq(62326));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(186985)), Eq(186985));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(560962)), Eq(560962));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(1682893)), Eq(1682893));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(15146065)), Eq(15146065));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(136314613)), Eq(136314613));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(1226831545)), Eq(1226831545));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(11041483933)),
+              Eq(11041483933));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(2683080596566)),
+              Eq(2683080596566));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(72443176107373)),
+              Eq(72443176107373));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(1955965754899162)),
+              Eq(1955965754899162));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(52811075382277465)),
+              Eq(52811075382277465));
+  EXPECT_THAT(GetFingerprint(GetFingerprintString(4277697105964474945)),
+              Eq(4277697105964474945));
+}
+
+}  // namespace
+
+}  // namespace fingerprint_util
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/i18n-utils.cc b/icing/util/i18n-utils.cc
index 9cf992f..ada9ef2 100644
--- a/icing/util/i18n-utils.cc
+++ b/icing/util/i18n-utils.cc
@@ -38,7 +38,7 @@ namespace {
 // (https://www.fileformat.info/info/unicode/category/index.htm). The set of
 // characters that are regarded as punctuation is not the same for std::ispunct
 // and u_ispunct.
-const std::string ascii_icu_punctuation = "!\"#%&'*,./:;?@\\_-([{}])";
+constexpr std::string_view kAsciiIcuPunctuation = "!\"#%&'*,./:;?@\\_-([{}])";
 
 }  // namespace
 
@@ -99,22 +99,25 @@ void SafeTruncateUtf8(std::string* str, int truncate_to_length) {
     return;
   }
 
-  while (truncate_to_length > 0) {
-    if (IsLeadUtf8Byte(str->at(truncate_to_length))) {
-      str->resize(truncate_to_length);
-      return;
+  str->resize(SafeTruncateUtf8Length(str->c_str(), truncate_to_length));
+}
+
+int SafeTruncateUtf8Length(const char* str, int desired_length) {
+  while (desired_length > 0) {
+    if (IsLeadUtf8Byte(str[desired_length])) {
+      break;
     }
-    truncate_to_length--;
+    --desired_length;
   }
-
-  // Truncates to an empty string
-  str->resize(0);
+  return desired_length;
 }
 
 bool IsAscii(char c) { return U8_IS_SINGLE((uint8_t)c); }
 
 bool IsAscii(UChar32 c) { return U8_LENGTH(c) == 1; }
 
+bool IsAlphaNumeric(UChar32 c) { return u_isalnum(c); }
+
 int GetUtf8Length(UChar32 c) { return U8_LENGTH(c); }
 
 int GetUtf16Length(UChar32 c) { return U16_LENGTH(c); }
@@ -126,7 +129,7 @@ bool IsPunctuationAt(std::string_view input, int position, int* char_len_out) {
     if (char_len_out != nullptr) {
       *char_len_out = 1;
     }
-    return ascii_icu_punctuation.find(input[position]) != std::string::npos;
+    return kAsciiIcuPunctuation.find(input[position]) != std::string_view::npos;
   }
   UChar32 c = GetUChar32At(input.data(), input.length(), position);
   if (char_len_out != nullptr) {
@@ -155,7 +158,7 @@ void AppendUchar32ToUtf8(std::string* utf8_string, UChar32 uchar) {
   uint8_t utf8_buffer[4];  // U8_APPEND writes 0 to 4 bytes
 
   int utf8_index = 0;
-  UBool has_error = FALSE;
+  UBool has_error = false;
 
   // utf8_index is advanced to the end of the contents if successful
   U8_APPEND(utf8_buffer, utf8_index, sizeof(utf8_buffer), uchar, has_error);
diff --git a/icing/util/i18n-utils.h b/icing/util/i18n-utils.h
index e103bab..491df6b 100644
--- a/icing/util/i18n-utils.h
+++ b/icing/util/i18n-utils.h
@@ -50,6 +50,13 @@ libtextclassifier3::StatusOr<std::u16string> Utf8ToUtf16(
 // Returns the char at the given position.
 UChar32 GetUChar32At(const char* data, int length, int position);
 
+// Returns the safe position to truncate a UTF8 string at so that multi-byte
+// UTF8 characters are not cut in the middle. The returned value will always be
+// 0 <= val <= desired_length.
+//
+// REQUIRES: 0 <= desired_length < strlen(str)
+int SafeTruncateUtf8Length(const char* str, int desired_length);
+
 // Safely truncates a UTF8 string so that multi-byte UTF8 characters are not cut
 // in the middle. The string will be truncated in place.
 void SafeTruncateUtf8(std::string* str, int truncate_to_length);
@@ -60,6 +67,9 @@ bool IsAscii(char c);
 // Checks if the Unicode char is within ASCII range.
 bool IsAscii(UChar32 c);
 
+// Checks if the Unicode char is alphanumeric.
+bool IsAlphaNumeric(UChar32 c);
+
 // Returns how many code units (char) are used for the UTF-8 encoding of this
 // Unicode character. Returns 0 if not valid.
 int GetUtf8Length(UChar32 c);
diff --git a/icing/util/logging.cc b/icing/util/logging.cc
new file mode 100644
index 0000000..f60526b
--- /dev/null
+++ b/icing/util/logging.cc
@@ -0,0 +1,125 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/logging.h"
+
+#include <atomic>
+#include <exception>
+#include <string_view>
+
+#include "icing/proto/debug.pb.h"
+#include "icing/util/logging_raw.h"
+
+namespace icing {
+namespace lib {
+namespace {
+// Returns pointer to beginning of last /-separated token from file_name.
+// file_name should be a pointer to a zero-terminated array of chars.
+// E.g., "foo/bar.cc" -> "bar.cc", "foo/" -> "", "foo" -> "foo".
+const char *JumpToBasename(const char *file_name) {
+  if (file_name == nullptr) {
+    return nullptr;
+  }
+
+  // Points to the beginning of the last encountered token.
+  size_t last_token_start = std::string_view(file_name).find_last_of('/');
+  if (last_token_start == std::string_view::npos) {
+    return file_name;
+  }
+  return file_name + last_token_start + 1;
+}
+
+// Calculate the logging level value based on severity and verbosity.
+constexpr uint32_t CalculateLoggingLevel(LogSeverity::Code severity,
+                                         uint16_t verbosity) {
+  uint32_t logging_level = static_cast<uint16_t>(severity);
+  logging_level = (logging_level << 16) | verbosity;
+  return logging_level;
+}
+
+#if defined(ICING_DEBUG_LOGGING)
+#define DEFAULT_LOGGING_LEVEL CalculateLoggingLevel(LogSeverity::VERBOSE, 1)
+#else
+#define DEFAULT_LOGGING_LEVEL CalculateLoggingLevel(LogSeverity::INFO, 0)
+#endif
+
+// The current global logging level for Icing, which controls which logs are
+// printed based on severity and verbosity.
+//
+// This needs to be global so that it can be easily accessed from ICING_LOG and
+// ICING_VLOG macros spread throughout the entire code base.
+//
+// The first 16 bits represent the minimal log severity.
+// The last 16 bits represent the current verbosity.
+std::atomic<uint32_t> global_logging_level = DEFAULT_LOGGING_LEVEL;
+
+}  // namespace
+
+// Whether we should log according to the current logging level.
+bool ShouldLog(LogSeverity::Code severity, int16_t verbosity) {
+  if (verbosity < 0) {
+    return false;
+  }
+  // Using the relaxed order for better performance because we only need to
+  // guarantee the atomicity for this specific statement, without the need to
+  // worry about reordering.
+  uint32_t curr_logging_level =
+      global_logging_level.load(std::memory_order_relaxed);
+  // If severity is less than the the threshold set.
+  if (static_cast<uint16_t>(severity) < (curr_logging_level >> 16)) {
+    return false;
+  }
+  if (severity == LogSeverity::VERBOSE) {
+    // return whether the verbosity is within the current verbose level set.
+    return verbosity <= (curr_logging_level & 0xffff);
+  }
+  return true;
+}
+
+bool SetLoggingLevel(LogSeverity::Code severity, int16_t verbosity) {
+  if (verbosity < 0) {
+    return false;
+  }
+  if (severity > LogSeverity::VERBOSE && verbosity > 0) {
+    return false;
+  }
+  // Using the relaxed order for better performance because we only need to
+  // guarantee the atomicity for this specific statement, without the need to
+  // worry about reordering.
+  global_logging_level.store(CalculateLoggingLevel(severity, verbosity),
+                             std::memory_order_relaxed);
+  return true;
+}
+
+LogMessage::LogMessage(LogSeverity::Code severity, uint16_t verbosity,
+                       const char *file_name, int line_number)
+    : severity_(severity),
+      verbosity_(verbosity),
+      should_log_(ShouldLog(severity_, verbosity_)),
+      stream_(should_log_) {
+  if (should_log_) {
+    stream_ << JumpToBasename(file_name) << ":" << line_number << ": ";
+  }
+}
+
+LogMessage::~LogMessage() {
+  if (should_log_) {
+    LowLevelLogging(severity_, kIcingLoggingTag, stream_.message);
+  }
+  if (severity_ == LogSeverity::FATAL) {
+    std::terminate();  // Will print a stacktrace (stdout or logcat).
+  }
+}
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/logging.h b/icing/util/logging.h
index 9d598fe..23280dc 100644
--- a/icing/util/logging.h
+++ b/icing/util/logging.h
@@ -15,14 +15,146 @@
 #ifndef ICING_UTIL_LOGGING_H_
 #define ICING_UTIL_LOGGING_H_
 
-#include "icing/text_classifier/lib3/utils/base/logging.h"
+#include <atomic>
+#include <cstdint>
+#include <string>
 
+#include "icing/proto/debug.pb.h"
+
+// This header provides base/logging.h style macros, ICING_LOG and ICING_VLOG,
+// for logging in various platforms. The macros use __android_log_write on
+// Android, and log to stdout/stderr on others. It also provides a function
+// SetLoggingLevel to control the log severity level for ICING_LOG and verbosity
+// for ICING_VLOG.
 namespace icing {
 namespace lib {
 
-// TODO(b/146903474) Add verbose level control
-#define ICING_VLOG(verbose_level) TC3_VLOG(verbose_level)
-#define ICING_LOG(severity) TC3_LOG(severity)
+// Whether we should log according to the current logging level.
+// The function will always return false when verbosity is negative.
+bool ShouldLog(LogSeverity::Code severity, int16_t verbosity = 0);
+
+// Set the minimal logging severity to be enabled, and the verbose level to see
+// from the logs.
+// Return false if severity is set higher than VERBOSE but verbosity is not 0.
+// The function will always return false when verbosity is negative.
+bool SetLoggingLevel(LogSeverity::Code severity, int16_t verbosity = 0);
+
+// A tiny code footprint string stream for assembling log messages.
+struct LoggingStringStream {
+  explicit LoggingStringStream(bool should_log) : should_log_(should_log) {}
+  LoggingStringStream& stream() { return *this; }
+
+  std::string message;
+  const bool should_log_;
+};
+
+template <typename T>
+inline LoggingStringStream& operator<<(LoggingStringStream& stream,
+                                       const T& entry) {
+  if (stream.should_log_) {
+    stream.message.append(std::to_string(entry));
+  }
+  return stream;
+}
+
+template <typename T>
+inline LoggingStringStream& operator<<(LoggingStringStream& stream,
+                                       T* const entry) {
+  if (stream.should_log_) {
+    stream.message.append(
+        std::to_string(reinterpret_cast<const uint64_t>(entry)));
+  }
+  return stream;
+}
+
+inline LoggingStringStream& operator<<(LoggingStringStream& stream,
+                                       const char* message) {
+  if (stream.should_log_) {
+    stream.message.append(message);
+  }
+  return stream;
+}
+
+inline LoggingStringStream& operator<<(LoggingStringStream& stream,
+                                       const std::string& message) {
+  if (stream.should_log_) {
+    stream.message.append(message);
+  }
+  return stream;
+}
+
+inline LoggingStringStream& operator<<(LoggingStringStream& stream,
+                                       std::string_view message) {
+  if (stream.should_log_) {
+    stream.message.append(message);
+  }
+  return stream;
+}
+
+template <typename T1, typename T2>
+inline LoggingStringStream& operator<<(LoggingStringStream& stream,
+                                       const std::pair<T1, T2>& entry) {
+  if (stream.should_log_) {
+    stream << "(" << entry.first << ", " << entry.second << ")";
+  }
+  return stream;
+}
+
+// The class that does all the work behind our ICING_LOG(severity) macros.  Each
+// ICING_LOG(severity) << obj1 << obj2 << ...; logging statement creates a
+// LogMessage temporary object containing a stringstream.  Each operator<< adds
+// info to that stringstream and the LogMessage destructor performs the actual
+// logging.  The reason this works is that in C++, "all temporary objects are
+// destroyed as the last step in evaluating the full-expression that (lexically)
+// contains the point where they were created."  For more info, see
+// http://en.cppreference.com/w/cpp/language/lifetime.  Hence, the destructor is
+// invoked after the last << from that logging statement.
+class LogMessage {
+ public:
+  LogMessage(LogSeverity::Code severity, uint16_t verbosity,
+             const char* file_name, int line_number) __attribute__((noinline));
+
+  ~LogMessage() __attribute__((noinline));
+
+  // Returns the stream associated with the logger object.
+  LoggingStringStream& stream() { return stream_; }
+
+ private:
+  const LogSeverity::Code severity_;
+  const uint16_t verbosity_;
+  const bool should_log_;
+
+  // Stream that "prints" all info into a string (not to a file).  We construct
+  // here the entire logging message and next print it in one operation.
+  LoggingStringStream stream_;
+};
+
+inline constexpr char kIcingLoggingTag[] = "AppSearchIcing";
+
+// Define consts to make it easier to refer to log severities in code.
+constexpr ::icing::lib::LogSeverity::Code VERBOSE =
+    ::icing::lib::LogSeverity::VERBOSE;
+
+constexpr ::icing::lib::LogSeverity::Code DBG = ::icing::lib::LogSeverity::DBG;
+
+constexpr ::icing::lib::LogSeverity::Code INFO =
+    ::icing::lib::LogSeverity::INFO;
+
+constexpr ::icing::lib::LogSeverity::Code WARNING =
+    ::icing::lib::LogSeverity::WARNING;
+
+constexpr ::icing::lib::LogSeverity::Code ERROR =
+    ::icing::lib::LogSeverity::ERROR;
+
+constexpr ::icing::lib::LogSeverity::Code FATAL =
+    ::icing::lib::LogSeverity::FATAL;
+
+#define ICING_VLOG(verbose_level) \
+  ::icing::lib::LogMessage(VERBOSE, verbose_level, __FILE__, __LINE__).stream()
+
+#define ICING_LOG(severity)                                               \
+  ::icing::lib::LogMessage(severity, /*verbosity=*/0, __FILE__, __LINE__) \
+      .stream()
 
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/util/logging_raw.cc b/icing/util/logging_raw.cc
new file mode 100644
index 0000000..44dd000
--- /dev/null
+++ b/icing/util/logging_raw.cc
@@ -0,0 +1,104 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/logging_raw.h"
+
+#include <cstdio>
+#include <string>
+
+#include "icing/proto/debug.pb.h"
+
+// NOTE: this file contains two implementations: one for Android, one for all
+// other cases.  We always build exactly one implementation.
+#if defined(__ANDROID__)
+
+// Compiled as part of Android.
+#include <android/log.h>
+
+namespace icing {
+namespace lib {
+
+namespace {
+// Converts LogSeverity to level for __android_log_write.
+int GetAndroidLogLevel(LogSeverity::Code severity) {
+  switch (severity) {
+    case LogSeverity::VERBOSE:
+      return ANDROID_LOG_VERBOSE;
+    case LogSeverity::DBG:
+      return ANDROID_LOG_DEBUG;
+    case LogSeverity::INFO:
+      return ANDROID_LOG_INFO;
+    case LogSeverity::WARNING:
+      return ANDROID_LOG_WARN;
+    case LogSeverity::ERROR:
+      return ANDROID_LOG_ERROR;
+    case LogSeverity::FATAL:
+      return ANDROID_LOG_FATAL;
+  }
+}
+}  // namespace
+
+void LowLevelLogging(LogSeverity::Code severity, const std::string& tag,
+                     const std::string& message) {
+  const int android_log_level = GetAndroidLogLevel(severity);
+#if __ANDROID_API__ >= 30
+  if (!__android_log_is_loggable(android_log_level, tag.c_str(),
+                                 /*default_prio=*/ANDROID_LOG_INFO)) {
+    return;
+  }
+#endif  // __ANDROID_API__ >= 30
+  __android_log_write(android_log_level, tag.c_str(), message.c_str());
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#else  // if defined(__ANDROID__)
+
+// Not on Android: implement LowLevelLogging to print to stderr (see below).
+namespace icing {
+namespace lib {
+
+namespace {
+// Converts LogSeverity to human-readable text.
+const char *LogSeverityToString(LogSeverity::Code severity) {
+  switch (severity) {
+    case LogSeverity::VERBOSE:
+      return "VERBOSE";
+    case LogSeverity::DBG:
+      return "DEBUG";
+    case LogSeverity::INFO:
+      return "INFO";
+    case LogSeverity::WARNING:
+      return "WARNING";
+    case LogSeverity::ERROR:
+      return "ERROR";
+    case LogSeverity::FATAL:
+      return "FATAL";
+  }
+}
+}  // namespace
+
+void LowLevelLogging(LogSeverity::Code severity, const std::string &tag,
+                     const std::string &message) {
+  // TODO(b/146903474) Do not log to stderr for logs other than FATAL and ERROR.
+  fprintf(stderr, "[%s] %s : %s\n", LogSeverityToString(severity), tag.c_str(),
+          message.c_str());
+  fflush(stderr);
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // if defined(__ANDROID__)
diff --git a/icing/util/logging_raw.h b/icing/util/logging_raw.h
new file mode 100644
index 0000000..99dddb6
--- /dev/null
+++ b/icing/util/logging_raw.h
@@ -0,0 +1,34 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_LOGGING_RAW_H_
+#define ICING_UTIL_LOGGING_RAW_H_
+
+#include <string>
+
+#include "icing/proto/debug.pb.h"
+
+namespace icing {
+namespace lib {
+
+// Low-level logging primitive.  Logs a message, with the indicated log
+// severity.  From android/log.h: "the tag normally corresponds to the component
+// that emits the log message, and should be reasonably small".
+void LowLevelLogging(LogSeverity::Code severity, const std::string &tag,
+                     const std::string &message);
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_UTIL_LOGGING_RAW_H_
diff --git a/icing/util/logging_test.cc b/icing/util/logging_test.cc
new file mode 100644
index 0000000..eac018e
--- /dev/null
+++ b/icing/util/logging_test.cc
@@ -0,0 +1,158 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/logging.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/util/logging_raw.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+using ::testing::EndsWith;
+using ::testing::IsEmpty;
+
+TEST(LoggingTest, SetLoggingLevelWithInvalidArguments) {
+  EXPECT_FALSE(SetLoggingLevel(LogSeverity::DBG, 1));
+  EXPECT_FALSE(SetLoggingLevel(LogSeverity::INFO, 1));
+  EXPECT_FALSE(SetLoggingLevel(LogSeverity::WARNING, 1));
+  EXPECT_FALSE(SetLoggingLevel(LogSeverity::ERROR, 1));
+  EXPECT_FALSE(SetLoggingLevel(LogSeverity::FATAL, 1));
+
+  EXPECT_FALSE(SetLoggingLevel(LogSeverity::DBG, 2));
+  EXPECT_FALSE(SetLoggingLevel(LogSeverity::INFO, 2));
+  EXPECT_FALSE(SetLoggingLevel(LogSeverity::WARNING, 2));
+  EXPECT_FALSE(SetLoggingLevel(LogSeverity::ERROR, 2));
+  EXPECT_FALSE(SetLoggingLevel(LogSeverity::FATAL, 2));
+
+  EXPECT_FALSE(SetLoggingLevel(LogSeverity::VERBOSE, -1));
+}
+
+TEST(LoggingTest, SetLoggingLevelTest) {
+  // Set to INFO
+  ASSERT_TRUE(SetLoggingLevel(LogSeverity::INFO));
+  EXPECT_FALSE(ShouldLog(LogSeverity::DBG));
+  EXPECT_TRUE(ShouldLog(LogSeverity::INFO));
+  EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+
+  // Set to WARNING
+  ASSERT_TRUE(SetLoggingLevel(LogSeverity::WARNING));
+  EXPECT_FALSE(ShouldLog(LogSeverity::DBG));
+  EXPECT_FALSE(ShouldLog(LogSeverity::INFO));
+  EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+
+  // Set to DEBUG
+  ASSERT_TRUE(SetLoggingLevel(LogSeverity::DBG));
+  EXPECT_TRUE(ShouldLog(LogSeverity::DBG));
+  EXPECT_TRUE(ShouldLog(LogSeverity::INFO));
+  EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+}
+
+TEST(LoggingTest, VerboseLoggingTest) {
+  ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 1));
+  EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 1));
+  EXPECT_TRUE(ShouldLog(LogSeverity::DBG));
+  EXPECT_TRUE(ShouldLog(LogSeverity::INFO));
+  EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+  EXPECT_TRUE(ShouldLog(LogSeverity::ERROR));
+  EXPECT_TRUE(ShouldLog(LogSeverity::FATAL));
+}
+
+TEST(LoggingTest, VerboseLoggingIsControlledByVerbosity) {
+  ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 2));
+  EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 3));
+  EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 2));
+  EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 1));
+
+  ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 1));
+  EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 2));
+  EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 1));
+
+  ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 0));
+  EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1));
+  EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 0));
+
+  // Negative verbosity is invalid.
+  EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, -1));
+}
+
+TEST(LoggingTest, DebugLoggingTest) {
+  ASSERT_TRUE(SetLoggingLevel(LogSeverity::DBG));
+  EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1));
+  EXPECT_TRUE(ShouldLog(LogSeverity::DBG));
+  EXPECT_TRUE(ShouldLog(LogSeverity::INFO));
+  EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+  EXPECT_TRUE(ShouldLog(LogSeverity::ERROR));
+  EXPECT_TRUE(ShouldLog(LogSeverity::FATAL));
+}
+
+TEST(LoggingTest, InfoLoggingTest) {
+  ASSERT_TRUE(SetLoggingLevel(LogSeverity::INFO));
+  EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1));
+  EXPECT_FALSE(ShouldLog(LogSeverity::DBG));
+  EXPECT_TRUE(ShouldLog(LogSeverity::INFO));
+  EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+  EXPECT_TRUE(ShouldLog(LogSeverity::ERROR));
+  EXPECT_TRUE(ShouldLog(LogSeverity::FATAL));
+}
+
+TEST(LoggingTest, WarningLoggingTest) {
+  ASSERT_TRUE(SetLoggingLevel(LogSeverity::WARNING));
+  EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1));
+  EXPECT_FALSE(ShouldLog(LogSeverity::DBG));
+  EXPECT_FALSE(ShouldLog(LogSeverity::INFO));
+  EXPECT_TRUE(ShouldLog(LogSeverity::WARNING));
+  EXPECT_TRUE(ShouldLog(LogSeverity::ERROR));
+  EXPECT_TRUE(ShouldLog(LogSeverity::FATAL));
+}
+
+TEST(LoggingTest, ErrorLoggingTest) {
+  ASSERT_TRUE(SetLoggingLevel(LogSeverity::ERROR));
+  EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1));
+  EXPECT_FALSE(ShouldLog(LogSeverity::DBG));
+  EXPECT_FALSE(ShouldLog(LogSeverity::INFO));
+  EXPECT_FALSE(ShouldLog(LogSeverity::WARNING));
+  EXPECT_TRUE(ShouldLog(LogSeverity::ERROR));
+  EXPECT_TRUE(ShouldLog(LogSeverity::FATAL));
+}
+
+TEST(LoggingTest, FatalLoggingTest) {
+  ASSERT_TRUE(SetLoggingLevel(LogSeverity::FATAL));
+  EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1));
+  EXPECT_FALSE(ShouldLog(LogSeverity::DBG));
+  EXPECT_FALSE(ShouldLog(LogSeverity::INFO));
+  EXPECT_FALSE(ShouldLog(LogSeverity::WARNING));
+  EXPECT_FALSE(ShouldLog(LogSeverity::ERROR));
+  EXPECT_TRUE(ShouldLog(LogSeverity::FATAL));
+}
+
+TEST(LoggingTest, LoggingStreamTest) {
+  ASSERT_TRUE(SetLoggingLevel(LogSeverity::INFO));
+  // This one should be logged.
+  LoggingStringStream stream1 = (ICING_LOG(INFO) << "Hello"
+                                                 << "World!");
+  EXPECT_THAT(stream1.message, EndsWith("HelloWorld!"));
+
+  // This one should not be logged, thus empty.
+  LoggingStringStream stream2 = (ICING_LOG(DBG) << "Hello"
+                                                << "World!");
+  EXPECT_THAT(stream2.message, IsEmpty());
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/math-util.h b/icing/util/math-util.h
index fc11a09..3f2a69d 100644
--- a/icing/util/math-util.h
+++ b/icing/util/math-util.h
@@ -37,7 +37,7 @@ inline double SafeDivide(double first, double second) {
 template <typename IntType>
 static IntType RoundDownTo(IntType input_value, IntType rounding_value) {
   static_assert(std::numeric_limits<IntType>::is_integer,
-                "RoundUpTo() operation type is not integer");
+                "RoundDownTo() operation type is not integer");
 
   if (input_value <= 0) {
     return 0;
diff --git a/icing/util/snippet-helpers.cc b/icing/util/snippet-helpers.cc
new file mode 100644
index 0000000..ca6f423
--- /dev/null
+++ b/icing/util/snippet-helpers.cc
@@ -0,0 +1,94 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/snippet-helpers.h"
+
+#include <algorithm>
+#include <string_view>
+
+#include "icing/proto/document.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/schema/property-util.h"
+
+namespace icing {
+namespace lib {
+
+std::vector<std::string_view> GetWindows(
+    std::string_view content, const SnippetProto::EntryProto& snippet_proto) {
+  std::vector<std::string_view> windows;
+  for (const SnippetMatchProto& match : snippet_proto.snippet_matches()) {
+    windows.push_back(content.substr(match.window_byte_position(),
+                                     match.window_byte_length()));
+  }
+  return windows;
+}
+
+std::vector<std::string_view> GetMatches(
+    std::string_view content, const SnippetProto::EntryProto& snippet_proto) {
+  std::vector<std::string_view> matches;
+  for (const SnippetMatchProto& match : snippet_proto.snippet_matches()) {
+    matches.push_back(content.substr(match.exact_match_byte_position(),
+                                     match.exact_match_byte_length()));
+  }
+  return matches;
+}
+
+std::vector<std::string_view> GetSubMatches(
+    std::string_view content, const SnippetProto::EntryProto& snippet_proto) {
+  std::vector<std::string_view> matches;
+  for (const SnippetMatchProto& match : snippet_proto.snippet_matches()) {
+    matches.push_back(content.substr(match.exact_match_byte_position(),
+                                     match.submatch_byte_length()));
+  }
+  return matches;
+}
+
+std::string_view GetString(const DocumentProto* document,
+                           std::string_view property_path_expr) {
+  std::vector<std::string_view> properties =
+      property_util::SplitPropertyPathExpr(property_path_expr);
+  for (int i = 0; i < properties.size(); ++i) {
+    property_util::PropertyInfo property_info =
+        property_util::ParsePropertyNameExpr(properties.at(i));
+    if (property_info.index == property_util::kWildcardPropertyIndex) {
+      // Use index = 0 by default.
+      property_info.index = 0;
+    }
+
+    const PropertyProto* prop =
+        property_util::GetPropertyProto(*document, property_info.name);
+    if (prop == nullptr) {
+      // requested property doesn't exist in the document. Return empty string.
+      return "";
+    }
+    if (i == properties.size() - 1) {
+      // The last property. Get the string_value
+      if (prop->string_values_size() - 1 < property_info.index) {
+        // The requested string doesn't exist. Return empty string.
+        return "";
+      }
+      return prop->string_values(property_info.index);
+    } else if (prop->document_values_size() - 1 < property_info.index) {
+      // The requested subproperty doesn't exist. return an empty string.
+      return "";
+    } else {
+      // Go to the next subproperty.
+      document = &prop->document_values(property_info.index);
+    }
+  }
+  return "";
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/snippet-helpers.h b/icing/util/snippet-helpers.h
new file mode 100644
index 0000000..d7349ba
--- /dev/null
+++ b/icing/util/snippet-helpers.h
@@ -0,0 +1,60 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_SNIPPET_HELPERS_H_
+#define ICING_TESTING_SNIPPET_HELPERS_H_
+
+#include <string>
+
+#include "icing/proto/document.pb.h"
+#include "icing/proto/search.pb.h"
+
+namespace icing {
+namespace lib {
+
+// Retrieve pointer to the PropertyProto identified by property_name.
+// Returns nullptr if no such property exists.
+//
+// NOTE: This function does not handle nesting or indexes. "foo.bar" will return
+// a nullptr even if document contains a property called "foo" that contains a
+// subproperty called "bar".
+const PropertyProto* GetProperty(const DocumentProto& document,
+                                 const std::string& property_name);
+
+// Retrieves all windows defined by the snippet_proto for the content.
+std::vector<std::string_view> GetWindows(
+    std::string_view content, const SnippetProto::EntryProto& snippet_proto);
+
+// Retrieves all matches defined by the snippet_proto for the content.
+std::vector<std::string_view> GetMatches(
+    std::string_view content, const SnippetProto::EntryProto& snippet_proto);
+
+// Retrieves all submatches defined by the snippet_proto for the content.
+std::vector<std::string_view> GetSubMatches(
+    std::string_view content, const SnippetProto::EntryProto& snippet_proto);
+
+// Retrieves the string value held in the document corresponding to the
+// property_path_expr.
+// Example:
+//   - GetString(doc, "foo") will retrieve the first string value in the
+//     property "foo" in document or an empty string if it doesn't exist.
+//   - GetString(doc, "foo[1].bar[2]") will retrieve the third string value in
+//     the subproperty "bar" of the second document value in the property "foo".
+std::string_view GetString(const DocumentProto* document,
+                           std::string_view property_path_expr);
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_SNIPPET_HELPERS_H_
diff --git a/icing/util/tokenized-document.cc b/icing/util/tokenized-document.cc
new file mode 100644
index 0000000..19aaddf
--- /dev/null
+++ b/icing/util/tokenized-document.cc
@@ -0,0 +1,92 @@
+// Copyright (C) 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/tokenized-document.h"
+
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/proto/document.pb.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/tokenizer-factory.h"
+#include "icing/tokenization/tokenizer.h"
+#include "icing/util/document-validator.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+libtextclassifier3::StatusOr<std::vector<TokenizedSection>> Tokenize(
+    const SchemaStore* schema_store,
+    const LanguageSegmenter* language_segmenter,
+    const std::vector<Section<std::string_view>>& string_sections) {
+  std::vector<TokenizedSection> tokenized_string_sections;
+  for (const Section<std::string_view>& section : string_sections) {
+    ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer> tokenizer,
+                           tokenizer_factory::CreateIndexingTokenizer(
+                               section.metadata.tokenizer, language_segmenter));
+    std::vector<std::string_view> token_sequence;
+    for (std::string_view subcontent : section.content) {
+      ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> itr,
+                             tokenizer->Tokenize(subcontent));
+      while (itr->Advance()) {
+        std::vector<Token> batch_tokens = itr->GetTokens();
+        for (const Token& token : batch_tokens) {
+          token_sequence.push_back(token.text);
+        }
+      }
+    }
+    tokenized_string_sections.emplace_back(SectionMetadata(section.metadata),
+                                           std::move(token_sequence));
+  }
+
+  return tokenized_string_sections;
+}
+
+}  // namespace
+
+/* static */ libtextclassifier3::StatusOr<TokenizedDocument>
+TokenizedDocument::Create(const SchemaStore* schema_store,
+                          const LanguageSegmenter* language_segmenter,
+                          DocumentProto document) {
+  DocumentValidator validator(schema_store);
+  ICING_RETURN_IF_ERROR(validator.Validate(document));
+
+  ICING_ASSIGN_OR_RETURN(SectionGroup section_group,
+                         schema_store->ExtractSections(document));
+
+  ICING_ASSIGN_OR_RETURN(JoinablePropertyGroup joinable_property_group,
+                         schema_store->ExtractJoinableProperties(document));
+
+  // Tokenize string sections
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<TokenizedSection> tokenized_string_sections,
+      Tokenize(schema_store, language_segmenter,
+               section_group.string_sections));
+
+  return TokenizedDocument(std::move(document),
+                           std::move(tokenized_string_sections),
+                           std::move(section_group.integer_sections),
+                           std::move(joinable_property_group));
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/util/tokenized-document.h b/icing/util/tokenized-document.h
new file mode 100644
index 0000000..7cc34e3
--- /dev/null
+++ b/icing/util/tokenized-document.h
@@ -0,0 +1,92 @@
+// Copyright (C) 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_TOKENIZED_DOCUMENT_H_
+#define ICING_STORE_TOKENIZED_DOCUMENT_H_
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/document.pb.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/tokenization/language-segmenter.h"
+
+namespace icing {
+namespace lib {
+
+struct TokenizedSection {
+  SectionMetadata metadata;
+  std::vector<std::string_view> token_sequence;
+
+  TokenizedSection(SectionMetadata&& metadata_in,
+                   std::vector<std::string_view>&& token_sequence_in)
+      : metadata(std::move(metadata_in)),
+        token_sequence(std::move(token_sequence_in)) {}
+};
+
+class TokenizedDocument {
+ public:
+  static libtextclassifier3::StatusOr<TokenizedDocument> Create(
+      const SchemaStore* schema_store,
+      const LanguageSegmenter* language_segmenter, DocumentProto document);
+
+  const DocumentProto& document() const { return document_; }
+
+  int32_t num_string_tokens() const {
+    int32_t num_string_tokens = 0;
+    for (const TokenizedSection& section : tokenized_string_sections_) {
+      num_string_tokens += section.token_sequence.size();
+    }
+    return num_string_tokens;
+  }
+
+  const std::vector<TokenizedSection>& tokenized_string_sections() const {
+    return tokenized_string_sections_;
+  }
+
+  const std::vector<Section<int64_t>>& integer_sections() const {
+    return integer_sections_;
+  }
+
+  const std::vector<JoinableProperty<std::string_view>>&
+  qualified_id_join_properties() const {
+    return joinable_property_group_.qualified_id_properties;
+  }
+
+ private:
+  // Use TokenizedDocument::Create() to instantiate.
+  explicit TokenizedDocument(
+      DocumentProto&& document,
+      std::vector<TokenizedSection>&& tokenized_string_sections,
+      std::vector<Section<int64_t>>&& integer_sections,
+      JoinablePropertyGroup&& joinable_property_group)
+      : document_(std::move(document)),
+        tokenized_string_sections_(std::move(tokenized_string_sections)),
+        integer_sections_(std::move(integer_sections)),
+        joinable_property_group_(std::move(joinable_property_group)) {}
+
+  DocumentProto document_;
+  std::vector<TokenizedSection> tokenized_string_sections_;
+  std::vector<Section<int64_t>> integer_sections_;
+  JoinablePropertyGroup joinable_property_group_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_TOKENIZED_DOCUMENT_H_
diff --git a/icing/util/tokenized-document_test.cc b/icing/util/tokenized-document_test.cc
new file mode 100644
index 0000000..7c97776
--- /dev/null
+++ b/icing/util/tokenized-document_test.cc
@@ -0,0 +1,455 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/tokenized-document.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+// schema types
+static constexpr std::string_view kFakeType = "FakeType";
+
+// Indexable properties and section Id. Section Id is determined by the
+// lexicographical order of indexable property path.
+static constexpr std::string_view kIndexableIntegerProperty1 =
+    "indexableInteger1";
+static constexpr std::string_view kIndexableIntegerProperty2 =
+    "indexableInteger2";
+static constexpr std::string_view kStringExactProperty = "stringExact";
+static constexpr std::string_view kStringPrefixProperty = "stringPrefix";
+
+static constexpr SectionId kIndexableInteger1SectionId = 0;
+static constexpr SectionId kIndexableInteger2SectionId = 1;
+static constexpr SectionId kStringExactSectionId = 2;
+static constexpr SectionId kStringPrefixSectionId = 3;
+
+// Joinable properties and joinable property id. Joinable property id is
+// determined by the lexicographical order of joinable property path.
+static constexpr std::string_view kQualifiedId1 = "qualifiedId1";
+static constexpr std::string_view kQualifiedId2 = "qualifiedId2";
+
+static constexpr JoinablePropertyId kQualifiedId1JoinablePropertyId = 0;
+static constexpr JoinablePropertyId kQualifiedId2JoinablePropertyId = 1;
+
+const SectionMetadata kIndexableInteger1SectionMetadata(
+    kIndexableInteger1SectionId, TYPE_INT64, TOKENIZER_NONE, TERM_MATCH_UNKNOWN,
+    NUMERIC_MATCH_RANGE, std::string(kIndexableIntegerProperty1));
+
+const SectionMetadata kIndexableInteger2SectionMetadata(
+    kIndexableInteger2SectionId, TYPE_INT64, TOKENIZER_NONE, TERM_MATCH_UNKNOWN,
+    NUMERIC_MATCH_RANGE, std::string(kIndexableIntegerProperty2));
+
+const SectionMetadata kStringExactSectionMetadata(
+    kStringExactSectionId, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_EXACT,
+    NUMERIC_MATCH_UNKNOWN, std::string(kStringExactProperty));
+
+const SectionMetadata kStringPrefixSectionMetadata(
+    kStringPrefixSectionId, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_PREFIX,
+    NUMERIC_MATCH_UNKNOWN, std::string(kStringPrefixProperty));
+
+const JoinablePropertyMetadata kQualifiedId1JoinablePropertyMetadata(
+    kQualifiedId1JoinablePropertyId, TYPE_STRING,
+    JOINABLE_VALUE_TYPE_QUALIFIED_ID, std::string(kQualifiedId1));
+
+const JoinablePropertyMetadata kQualifiedId2JoinablePropertyMetadata(
+    kQualifiedId2JoinablePropertyId, TYPE_STRING,
+    JOINABLE_VALUE_TYPE_QUALIFIED_ID, std::string(kQualifiedId2));
+
+// Other non-indexable/joinable properties.
+constexpr std::string_view kUnindexedStringProperty = "unindexedString";
+constexpr std::string_view kUnindexedIntegerProperty = "unindexedInteger";
+
+class TokenizedDocumentTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/icing";
+    schema_store_dir_ = test_dir_ + "/schema_store";
+    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    language_segmenter_factory::SegmenterOptions options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        lang_segmenter_,
+        language_segmenter_factory::Create(std::move(options)));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kFakeType)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kUnindexedStringProperty)
+                                     .SetDataType(TYPE_STRING)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kUnindexedIntegerProperty)
+                                     .SetDataType(TYPE_INT64)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kIndexableIntegerProperty1)
+                                     .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                     .SetCardinality(CARDINALITY_REPEATED))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kIndexableIntegerProperty2)
+                                     .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kStringExactProperty)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_REPEATED))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kStringPrefixProperty)
+                                     .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kQualifiedId1)
+                                     .SetDataTypeJoinableString(
+                                         JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kQualifiedId2)
+                                     .SetDataTypeJoinableString(
+                                         JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+  }
+
+  void TearDown() override {
+    schema_store_.reset();
+
+    // Check that the schema store directory is the *only* directory in the
+    // schema_store_dir_. IOW, ensure that all temporary directories have been
+    // properly cleaned up.
+    std::vector<std::string> sub_dirs;
+    ASSERT_TRUE(filesystem_.ListDirectory(test_dir_.c_str(), &sub_dirs));
+    ASSERT_THAT(sub_dirs, ElementsAre("schema_store"));
+
+    // Finally, clean everything up.
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+  }
+
+  Filesystem filesystem_;
+  FakeClock fake_clock_;
+  std::string test_dir_;
+  std::string schema_store_dir_;
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+  std::unique_ptr<SchemaStore> schema_store_;
+};
+
+TEST_F(TokenizedDocumentTest, CreateAll) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kUnindexedStringProperty),
+                             "hello world unindexed")
+          .AddStringProperty(std::string(kStringExactProperty), "test foo",
+                             "test bar", "test baz")
+          .AddStringProperty(std::string(kStringPrefixProperty), "foo bar baz")
+          .AddInt64Property(std::string(kUnindexedIntegerProperty), 789)
+          .AddInt64Property(std::string(kIndexableIntegerProperty1), 1, 2, 3)
+          .AddInt64Property(std::string(kIndexableIntegerProperty2), 456)
+          .AddStringProperty(std::string(kQualifiedId1), "pkg$db/ns#uri1")
+          .AddStringProperty(std::string(kQualifiedId2), "pkg$db/ns#uri2")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  EXPECT_THAT(tokenized_document.document(), EqualsProto(document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(9));
+
+  // string sections
+  EXPECT_THAT(tokenized_document.tokenized_string_sections(), SizeIs(2));
+  EXPECT_THAT(tokenized_document.tokenized_string_sections().at(0).metadata,
+              Eq(kStringExactSectionMetadata));
+  EXPECT_THAT(
+      tokenized_document.tokenized_string_sections().at(0).token_sequence,
+      ElementsAre("test", "foo", "test", "bar", "test", "baz"));
+  EXPECT_THAT(tokenized_document.tokenized_string_sections().at(1).metadata,
+              Eq(kStringPrefixSectionMetadata));
+  EXPECT_THAT(
+      tokenized_document.tokenized_string_sections().at(1).token_sequence,
+      ElementsAre("foo", "bar", "baz"));
+
+  // integer sections
+  EXPECT_THAT(tokenized_document.integer_sections(), SizeIs(2));
+  EXPECT_THAT(tokenized_document.integer_sections().at(0).metadata,
+              Eq(kIndexableInteger1SectionMetadata));
+  EXPECT_THAT(tokenized_document.integer_sections().at(0).content,
+              ElementsAre(1, 2, 3));
+  EXPECT_THAT(tokenized_document.integer_sections().at(1).metadata,
+              Eq(kIndexableInteger2SectionMetadata));
+  EXPECT_THAT(tokenized_document.integer_sections().at(1).content,
+              ElementsAre(456));
+
+  // Qualified id join properties
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties(), SizeIs(2));
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(0).metadata,
+              Eq(kQualifiedId1JoinablePropertyMetadata));
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(0).values,
+              ElementsAre("pkg$db/ns#uri1"));
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(1).metadata,
+              Eq(kQualifiedId2JoinablePropertyMetadata));
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(1).values,
+              ElementsAre("pkg$db/ns#uri2"));
+}
+
+TEST_F(TokenizedDocumentTest, CreateNoIndexableIntegerProperties) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddInt64Property(std::string(kUnindexedIntegerProperty), 789)
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  EXPECT_THAT(tokenized_document.document(), EqualsProto(document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(0));
+
+  // string sections
+  EXPECT_THAT(tokenized_document.tokenized_string_sections(), IsEmpty());
+
+  // integer sections
+  EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+  // Qualified id join properties
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+}
+
+TEST_F(TokenizedDocumentTest, CreateMultipleIndexableIntegerProperties) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddInt64Property(std::string(kUnindexedIntegerProperty), 789)
+          .AddInt64Property(std::string(kIndexableIntegerProperty1), 1, 2, 3)
+          .AddInt64Property(std::string(kIndexableIntegerProperty2), 456)
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  EXPECT_THAT(tokenized_document.document(), EqualsProto(document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(0));
+
+  // string sections
+  EXPECT_THAT(tokenized_document.tokenized_string_sections(), IsEmpty());
+
+  // integer sections
+  EXPECT_THAT(tokenized_document.integer_sections(), SizeIs(2));
+  EXPECT_THAT(tokenized_document.integer_sections().at(0).metadata,
+              Eq(kIndexableInteger1SectionMetadata));
+  EXPECT_THAT(tokenized_document.integer_sections().at(0).content,
+              ElementsAre(1, 2, 3));
+  EXPECT_THAT(tokenized_document.integer_sections().at(1).metadata,
+              Eq(kIndexableInteger2SectionMetadata));
+  EXPECT_THAT(tokenized_document.integer_sections().at(1).content,
+              ElementsAre(456));
+
+  // Qualified id join properties
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+}
+
+TEST_F(TokenizedDocumentTest, CreateNoIndexableStringProperties) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kUnindexedStringProperty),
+                             "hello world unindexed")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  EXPECT_THAT(tokenized_document.document(), EqualsProto(document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(0));
+
+  // string sections
+  EXPECT_THAT(tokenized_document.tokenized_string_sections(), IsEmpty());
+
+  // integer sections
+  EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+  // Qualified id join properties
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+}
+
+TEST_F(TokenizedDocumentTest, CreateMultipleIndexableStringProperties) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kUnindexedStringProperty),
+                             "hello world unindexed")
+          .AddStringProperty(std::string(kStringExactProperty), "test foo",
+                             "test bar", "test baz")
+          .AddStringProperty(std::string(kStringPrefixProperty), "foo bar baz")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  EXPECT_THAT(tokenized_document.document(), EqualsProto(document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(9));
+
+  // string sections
+  EXPECT_THAT(tokenized_document.tokenized_string_sections(), SizeIs(2));
+  EXPECT_THAT(tokenized_document.tokenized_string_sections().at(0).metadata,
+              Eq(kStringExactSectionMetadata));
+  EXPECT_THAT(
+      tokenized_document.tokenized_string_sections().at(0).token_sequence,
+      ElementsAre("test", "foo", "test", "bar", "test", "baz"));
+  EXPECT_THAT(tokenized_document.tokenized_string_sections().at(1).metadata,
+              Eq(kStringPrefixSectionMetadata));
+  EXPECT_THAT(
+      tokenized_document.tokenized_string_sections().at(1).token_sequence,
+      ElementsAre("foo", "bar", "baz"));
+
+  // integer sections
+  EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+  // Qualified id join properties
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+}
+
+TEST_F(TokenizedDocumentTest, CreateNoJoinQualifiedIdProperties) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kUnindexedStringProperty),
+                             "hello world unindexed")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  EXPECT_THAT(tokenized_document.document(), EqualsProto(document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(0));
+
+  // string sections
+  EXPECT_THAT(tokenized_document.tokenized_string_sections(), IsEmpty());
+
+  // integer sections
+  EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+  // Qualified id join properties
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+}
+
+TEST_F(TokenizedDocumentTest, CreateMultipleJoinQualifiedIdProperties) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kUnindexedStringProperty),
+                             "hello world unindexed")
+          .AddStringProperty(std::string(kQualifiedId1), "pkg$db/ns#uri1")
+          .AddStringProperty(std::string(kQualifiedId2), "pkg$db/ns#uri2")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  EXPECT_THAT(tokenized_document.document(), EqualsProto(document));
+  EXPECT_THAT(tokenized_document.num_string_tokens(), Eq(0));
+
+  // string sections
+  EXPECT_THAT(tokenized_document.tokenized_string_sections(), IsEmpty());
+
+  // integer sections
+  EXPECT_THAT(tokenized_document.integer_sections(), IsEmpty());
+
+  // Qualified id join properties
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties(), SizeIs(2));
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(0).metadata,
+              Eq(kQualifiedId1JoinablePropertyMetadata));
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(0).values,
+              ElementsAre("pkg$db/ns#uri1"));
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(1).metadata,
+              Eq(kQualifiedId2JoinablePropertyMetadata));
+  EXPECT_THAT(tokenized_document.qualified_id_join_properties().at(1).values,
+              ElementsAre("pkg$db/ns#uri2"));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/java/build.gradle b/java/build.gradle
deleted file mode 100644
index 206c74f..0000000
--- a/java/build.gradle
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (C) 2020 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-buildscript {
-    boolean unbundleBuild = (new File('unbundled-build')).exists()
-    repositories {
-        maven { url '../../../prebuilts/androidx/external' }
-        if (unbundleBuild) {
-            jcenter()
-        }
-    }
-    dependencies {
-        classpath('gradle.plugin.com.google.protobuf:protobuf-gradle-plugin:0.8.8')
-        classpath('org.anarres.jarjar:jarjar-gradle:1.0.1')
-    }
-}
-
-apply plugin: 'java-library'
-apply plugin: 'com.google.protobuf'
-apply plugin: 'org.anarres.jarjar'
-apply plugin: 'idea'
-
-sourceSets {
-    main {
-        proto {
-            srcDir '../proto'
-            include '**/*.proto'
-        }
-    }
-}
-
-compileJava {
-    sourceCompatibility = JavaVersion.VERSION_1_7
-    targetCompatibility = JavaVersion.VERSION_1_7
-}
-
-dependencies {
-    implementation('com.google.protobuf:protobuf-javalite:3.10.0')
-}
-
-protobuf {
-    protoc {
-        artifact = 'com.google.protobuf:protoc:3.10.0'
-    }
-
-    generateProtoTasks {
-        all().each { task ->
-            task.builtins {
-                java {
-                    option 'lite'
-                }
-            }
-        }
-    }
-}
-
-jarjar.repackage('jarjarTask') {
-    destinationName "icing-java-jarjar.jar"
-    from 'com.google.protobuf:protobuf-javalite:3.10.0'
-    from files(sourceSets.main.output.classesDirs)
-    dependsOn sourceSets.main.output
-    classRename 'com.google.protobuf.**', 'com.google.android.icing.protobuf.@1'
-}
-
-configurations {
-    jarjarConf
-}
-
-artifacts {
-    jarjarConf(jarjarTask.destinationPath) {
-        name 'icing-java-jarjar'
-        type 'jar'
-        builtBy jarjarTask
-    }
-}
diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java
index 3ac5eef..e73f16b 100644
--- a/java/src/com/google/android/icing/IcingSearchEngine.java
+++ b/java/src/com/google/android/icing/IcingSearchEngine.java
@@ -14,21 +14,29 @@
 
 package com.google.android.icing;
 
-import android.util.Log;
 import androidx.annotation.NonNull;
+import androidx.annotation.Nullable;
+import com.google.android.icing.proto.DebugInfoResultProto;
+import com.google.android.icing.proto.DebugInfoVerbosity;
 import com.google.android.icing.proto.DeleteByNamespaceResultProto;
+import com.google.android.icing.proto.DeleteByQueryResultProto;
 import com.google.android.icing.proto.DeleteBySchemaTypeResultProto;
 import com.google.android.icing.proto.DeleteResultProto;
 import com.google.android.icing.proto.DocumentProto;
+import com.google.android.icing.proto.GetAllNamespacesResultProto;
 import com.google.android.icing.proto.GetOptimizeInfoResultProto;
 import com.google.android.icing.proto.GetResultProto;
+import com.google.android.icing.proto.GetResultSpecProto;
 import com.google.android.icing.proto.GetSchemaResultProto;
 import com.google.android.icing.proto.GetSchemaTypeResultProto;
 import com.google.android.icing.proto.IcingSearchEngineOptions;
 import com.google.android.icing.proto.InitializeResultProto;
+import com.google.android.icing.proto.LogSeverity;
 import com.google.android.icing.proto.OptimizeResultProto;
 import com.google.android.icing.proto.PersistToDiskResultProto;
+import com.google.android.icing.proto.PersistType;
 import com.google.android.icing.proto.PutResultProto;
+import com.google.android.icing.proto.ReportUsageResultProto;
 import com.google.android.icing.proto.ResetResultProto;
 import com.google.android.icing.proto.ResultSpecProto;
 import com.google.android.icing.proto.SchemaProto;
@@ -36,354 +44,234 @@ import com.google.android.icing.proto.ScoringSpecProto;
 import com.google.android.icing.proto.SearchResultProto;
 import com.google.android.icing.proto.SearchSpecProto;
 import com.google.android.icing.proto.SetSchemaResultProto;
-import com.google.android.icing.proto.StatusProto;
-import com.google.android.icing.protobuf.InvalidProtocolBufferException;
-
-/** Java wrapper to access native APIs in external/icing/icing/icing-search-engine.h */
-public final class IcingSearchEngine {
+import com.google.android.icing.proto.StorageInfoResultProto;
+import com.google.android.icing.proto.SuggestionResponse;
+import com.google.android.icing.proto.SuggestionSpecProto;
+import com.google.android.icing.proto.UsageReport;
+
+/**
+ * Java wrapper to access {@link IcingSearchEngineImpl}.
+ *
+ * <p>It converts byte array from {@link IcingSearchEngineImpl} to corresponding protos.
+ *
+ * <p>If this instance has been closed, the instance is no longer usable.
+ *
+ * <p>Keep this class to be non-Final so that it can be mocked in AppSearch.
+ *
+ * <p>NOTE: This class is NOT thread-safe.
+ */
+public class IcingSearchEngine implements IcingSearchEngineInterface {
 
   private static final String TAG = "IcingSearchEngine";
+  private final IcingSearchEngineImpl icingSearchEngineImpl;
 
-  private final long nativePointer;
+  /**
+   * @throws IllegalStateException if IcingSearchEngine fails to be created
+   */
+  public IcingSearchEngine(@NonNull IcingSearchEngineOptions options) {
+    icingSearchEngineImpl = new IcingSearchEngineImpl(options.toByteArray());
+  }
 
-  static {
-    // NOTE: This can fail with an UnsatisfiedLinkError
-    System.loadLibrary("icing");
+  @Override
+  public void close() {
+    icingSearchEngineImpl.close();
   }
 
-  /** @throws IllegalStateException if IcingSearchEngine fails to be created */
-  public IcingSearchEngine(@NonNull IcingSearchEngineOptions options) {
-    nativePointer = nativeCreate(options.toByteArray());
-    if (nativePointer == 0) {
-      Log.e(TAG, "Failed to create IcingSearchEngine.");
-      throw new IllegalStateException("Failed to create IcingSearchEngine.");
-    }
+  @SuppressWarnings({"deprecation", "removal"}) // b/316643605
+  @Override
+  protected void finalize() throws Throwable {
+    icingSearchEngineImpl.close();
+    super.finalize();
   }
 
   @NonNull
+  @Override
   public InitializeResultProto initialize() {
-    byte[] initializeResultBytes = nativeInitialize(nativePointer);
-    if (initializeResultBytes == null) {
-      Log.e(TAG, "Received null InitializeResult from native.");
-      return InitializeResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return InitializeResultProto.parseFrom(initializeResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing InitializeResultProto.", e);
-      return InitializeResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+    return IcingSearchEngineUtils.byteArrayToInitializeResultProto(
+        icingSearchEngineImpl.initialize());
   }
 
   @NonNull
+  @Override
   public SetSchemaResultProto setSchema(@NonNull SchemaProto schema) {
     return setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false);
   }
 
   @NonNull
+  @Override
   public SetSchemaResultProto setSchema(
       @NonNull SchemaProto schema, boolean ignoreErrorsAndDeleteDocuments) {
-    byte[] setSchemaResultBytes =
-        nativeSetSchema(nativePointer, schema.toByteArray(), ignoreErrorsAndDeleteDocuments);
-    if (setSchemaResultBytes == null) {
-      Log.e(TAG, "Received null SetSchemaResultProto from native.");
-      return SetSchemaResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return SetSchemaResultProto.parseFrom(setSchemaResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing SetSchemaResultProto.", e);
-      return SetSchemaResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+    return IcingSearchEngineUtils.byteArrayToSetSchemaResultProto(
+        icingSearchEngineImpl.setSchema(schema.toByteArray(), ignoreErrorsAndDeleteDocuments));
   }
 
   @NonNull
+  @Override
   public GetSchemaResultProto getSchema() {
-    byte[] getSchemaResultBytes = nativeGetSchema(nativePointer);
-    if (getSchemaResultBytes == null) {
-      Log.e(TAG, "Received null GetSchemaResultProto from native.");
-      return GetSchemaResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return GetSchemaResultProto.parseFrom(getSchemaResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing GetSchemaResultProto.", e);
-      return GetSchemaResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+    return IcingSearchEngineUtils.byteArrayToGetSchemaResultProto(
+        icingSearchEngineImpl.getSchema());
   }
 
   @NonNull
+  @Override
   public GetSchemaTypeResultProto getSchemaType(@NonNull String schemaType) {
-    byte[] getSchemaTypeResultBytes = nativeGetSchemaType(nativePointer, schemaType);
-    if (getSchemaTypeResultBytes == null) {
-      Log.e(TAG, "Received null GetSchemaTypeResultProto from native.");
-      return GetSchemaTypeResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return GetSchemaTypeResultProto.parseFrom(getSchemaTypeResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing GetSchemaTypeResultProto.", e);
-      return GetSchemaTypeResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+    return IcingSearchEngineUtils.byteArrayToGetSchemaTypeResultProto(
+        icingSearchEngineImpl.getSchemaType(schemaType));
   }
 
   @NonNull
+  @Override
   public PutResultProto put(@NonNull DocumentProto document) {
-    byte[] putResultBytes = nativePut(nativePointer, document.toByteArray());
-    if (putResultBytes == null) {
-      Log.e(TAG, "Received null PutResultProto from native.");
-      return PutResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return PutResultProto.parseFrom(putResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing PutResultProto.", e);
-      return PutResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+    return IcingSearchEngineUtils.byteArrayToPutResultProto(
+        icingSearchEngineImpl.put(document.toByteArray()));
+  }
+
+  @NonNull
+  @Override
+  public GetResultProto get(
+      @NonNull String namespace, @NonNull String uri, @NonNull GetResultSpecProto getResultSpec) {
+    return IcingSearchEngineUtils.byteArrayToGetResultProto(
+        icingSearchEngineImpl.get(namespace, uri, getResultSpec.toByteArray()));
+  }
+
+  @NonNull
+  @Override
+  public ReportUsageResultProto reportUsage(@NonNull UsageReport usageReport) {
+    return IcingSearchEngineUtils.byteArrayToReportUsageResultProto(
+        icingSearchEngineImpl.reportUsage(usageReport.toByteArray()));
   }
 
   @NonNull
-  public GetResultProto get(@NonNull String namespace, @NonNull String uri) {
-    byte[] getResultBytes = nativeGet(nativePointer, namespace, uri);
-    if (getResultBytes == null) {
-      Log.e(TAG, "Received null GetResultProto from native.");
-      return GetResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return GetResultProto.parseFrom(getResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing GetResultProto.", e);
-      return GetResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+  @Override
+  public GetAllNamespacesResultProto getAllNamespaces() {
+    return IcingSearchEngineUtils.byteArrayToGetAllNamespacesResultProto(
+        icingSearchEngineImpl.getAllNamespaces());
   }
 
   @NonNull
+  @Override
   public SearchResultProto search(
       @NonNull SearchSpecProto searchSpec,
       @NonNull ScoringSpecProto scoringSpec,
       @NonNull ResultSpecProto resultSpec) {
-    byte[] searchResultBytes =
-        nativeSearch(
-            nativePointer,
-            searchSpec.toByteArray(),
-            scoringSpec.toByteArray(),
-            resultSpec.toByteArray());
-    if (searchResultBytes == null) {
-      Log.e(TAG, "Received null SearchResultProto from native.");
-      return SearchResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return SearchResultProto.parseFrom(searchResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing SearchResultProto.", e);
-      return SearchResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+    return IcingSearchEngineUtils.byteArrayToSearchResultProto(
+        icingSearchEngineImpl.search(
+            searchSpec.toByteArray(), scoringSpec.toByteArray(), resultSpec.toByteArray()));
   }
 
   @NonNull
-  public DeleteResultProto delete(@NonNull String namespace, @NonNull String uri) {
-    byte[] deleteResultBytes = nativeDelete(nativePointer, namespace, uri);
-    if (deleteResultBytes == null) {
-      Log.e(TAG, "Received null DeleteResultProto from native.");
-      return DeleteResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return DeleteResultProto.parseFrom(deleteResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing DeleteResultProto.", e);
-      return DeleteResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+  @Override
+  public SearchResultProto getNextPage(long nextPageToken) {
+    return IcingSearchEngineUtils.byteArrayToSearchResultProto(
+        icingSearchEngineImpl.getNextPage(nextPageToken));
   }
 
   @NonNull
-  public DeleteByNamespaceResultProto deleteByNamespace(@NonNull String namespace) {
-    byte[] deleteByNamespaceResultBytes = nativeDeleteByNamespace(nativePointer, namespace);
-    if (deleteByNamespaceResultBytes == null) {
-      Log.e(TAG, "Received null DeleteByNamespaceResultProto from native.");
-      return DeleteByNamespaceResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return DeleteByNamespaceResultProto.parseFrom(deleteByNamespaceResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing DeleteByNamespaceResultProto.", e);
-      return DeleteByNamespaceResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+  @Override
+  public void invalidateNextPageToken(long nextPageToken) {
+    icingSearchEngineImpl.invalidateNextPageToken(nextPageToken);
   }
 
   @NonNull
-  public DeleteBySchemaTypeResultProto deleteBySchemaType(@NonNull String schemaType) {
-    byte[] deleteBySchemaTypeResultBytes = nativeDeleteBySchemaType(nativePointer, schemaType);
-    if (deleteBySchemaTypeResultBytes == null) {
-      Log.e(TAG, "Received null DeleteBySchemaTypeResultProto from native.");
-      return DeleteBySchemaTypeResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return DeleteBySchemaTypeResultProto.parseFrom(deleteBySchemaTypeResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing DeleteBySchemaTypeResultProto.", e);
-      return DeleteBySchemaTypeResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+  @Override
+  public DeleteResultProto delete(@NonNull String namespace, @NonNull String uri) {
+    return IcingSearchEngineUtils.byteArrayToDeleteResultProto(
+        icingSearchEngineImpl.delete(namespace, uri));
   }
 
   @NonNull
-  public PersistToDiskResultProto persistToDisk() {
-    byte[] persistToDiskResultBytes = nativePersistToDisk(nativePointer);
-    if (persistToDiskResultBytes == null) {
-      Log.e(TAG, "Received null PersistToDiskResultProto from native.");
-      return PersistToDiskResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return PersistToDiskResultProto.parseFrom(persistToDiskResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing PersistToDiskResultProto.", e);
-      return PersistToDiskResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+  @Override
+  public SuggestionResponse searchSuggestions(@NonNull SuggestionSpecProto suggestionSpec) {
+    return IcingSearchEngineUtils.byteArrayToSuggestionResponse(
+        icingSearchEngineImpl.searchSuggestions(suggestionSpec.toByteArray()));
   }
 
   @NonNull
-  public OptimizeResultProto optimize() {
-    byte[] optimizeResultBytes = nativeOptimize(nativePointer);
-    if (optimizeResultBytes == null) {
-      Log.e(TAG, "Received null OptimizeResultProto from native.");
-      return OptimizeResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return OptimizeResultProto.parseFrom(optimizeResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing OptimizeResultProto.", e);
-      return OptimizeResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+  @Override
+  public DeleteByNamespaceResultProto deleteByNamespace(@NonNull String namespace) {
+    return IcingSearchEngineUtils.byteArrayToDeleteByNamespaceResultProto(
+        icingSearchEngineImpl.deleteByNamespace(namespace));
   }
 
   @NonNull
-  public GetOptimizeInfoResultProto getOptimizeInfo() {
-    byte[] getOptimizeInfoResultBytes = nativeGetOptimizeInfo(nativePointer);
-    if (getOptimizeInfoResultBytes == null) {
-      Log.e(TAG, "Received null GetOptimizeInfoResultProto from native.");
-      return GetOptimizeInfoResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return GetOptimizeInfoResultProto.parseFrom(getOptimizeInfoResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing GetOptimizeInfoResultProto.", e);
-      return GetOptimizeInfoResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+  @Override
+  public DeleteBySchemaTypeResultProto deleteBySchemaType(@NonNull String schemaType) {
+    return IcingSearchEngineUtils.byteArrayToDeleteBySchemaTypeResultProto(
+        icingSearchEngineImpl.deleteBySchemaType(schemaType));
   }
 
   @NonNull
-  public ResetResultProto reset() {
-    byte[] resetResultBytes = nativeReset(nativePointer);
-    if (resetResultBytes == null) {
-      Log.e(TAG, "Received null ResetResultProto from native.");
-      return ResetResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
-
-    try {
-      return ResetResultProto.parseFrom(resetResultBytes);
-    } catch (InvalidProtocolBufferException e) {
-      Log.e(TAG, "Error parsing ResetResultProto.", e);
-      return ResetResultProto.newBuilder()
-          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
-          .build();
-    }
+  @Override
+  public DeleteByQueryResultProto deleteByQuery(@NonNull SearchSpecProto searchSpec) {
+    return deleteByQuery(searchSpec, /*returnDeletedDocumentInfo=*/ false);
   }
 
-  private static native long nativeCreate(byte[] icingSearchEngineOptionsBytes);
-
-  private static native byte[] nativeInitialize(long nativePointer);
-
-  private static native byte[] nativeSetSchema(
-      long nativePointer, byte[] schemaBytes, boolean ignoreErrorsAndDeleteDocuments);
-
-  private static native byte[] nativeGetSchema(long nativePointer);
+  @NonNull
+  @Override
+  public DeleteByQueryResultProto deleteByQuery(
+      @NonNull SearchSpecProto searchSpec, boolean returnDeletedDocumentInfo) {
+    return IcingSearchEngineUtils.byteArrayToDeleteByQueryResultProto(
+        icingSearchEngineImpl.deleteByQuery(searchSpec.toByteArray(), returnDeletedDocumentInfo));
+  }
 
-  private static native byte[] nativeGetSchemaType(long nativePointer, String schemaType);
+  @NonNull
+  @Override
+  public PersistToDiskResultProto persistToDisk(@NonNull PersistType.Code persistTypeCode) {
+    return IcingSearchEngineUtils.byteArrayToPersistToDiskResultProto(
+        icingSearchEngineImpl.persistToDisk(persistTypeCode.getNumber()));
+  }
 
-  private static native byte[] nativePut(long nativePointer, byte[] documentBytes);
+  @NonNull
+  @Override
+  public OptimizeResultProto optimize() {
+    return IcingSearchEngineUtils.byteArrayToOptimizeResultProto(icingSearchEngineImpl.optimize());
+  }
 
-  private static native byte[] nativeGet(long nativePointer, String namespace, String uri);
+  @NonNull
+  @Override
+  public GetOptimizeInfoResultProto getOptimizeInfo() {
+    return IcingSearchEngineUtils.byteArrayToGetOptimizeInfoResultProto(
+        icingSearchEngineImpl.getOptimizeInfo());
+  }
 
-  private static native byte[] nativeSearch(
-      long nativePointer, byte[] searchSpecBytes, byte[] scoringSpecBytes, byte[] resultSpecBytes);
+  @NonNull
+  @Override
+  public StorageInfoResultProto getStorageInfo() {
+    return IcingSearchEngineUtils.byteArrayToStorageInfoResultProto(
+        icingSearchEngineImpl.getStorageInfo());
+  }
 
-  private static native byte[] nativeDelete(long nativePointer, String namespace, String uri);
+  @NonNull
+  @Override
+  public DebugInfoResultProto getDebugInfo(DebugInfoVerbosity.Code verbosity) {
+    return IcingSearchEngineUtils.byteArrayToDebugInfoResultProto(
+        icingSearchEngineImpl.getDebugInfo(verbosity.getNumber()));
+  }
 
-  private static native byte[] nativeDeleteByNamespace(long nativePointer, String namespace);
+  @NonNull
+  @Override
+  public ResetResultProto reset() {
+    return IcingSearchEngineUtils.byteArrayToResetResultProto(icingSearchEngineImpl.reset());
+  }
 
-  private static native byte[] nativeDeleteBySchemaType(long nativePointer, String schemaType);
+  public static boolean shouldLog(LogSeverity.Code severity) {
+    return shouldLog(severity, (short) 0);
+  }
 
-  private static native byte[] nativePersistToDisk(long nativePointer);
+  public static boolean shouldLog(LogSeverity.Code severity, short verbosity) {
+    return IcingSearchEngineImpl.shouldLog((short) severity.getNumber(), verbosity);
+  }
 
-  private static native byte[] nativeOptimize(long nativePointer);
+  public static boolean setLoggingLevel(LogSeverity.Code severity) {
+    return setLoggingLevel(severity, (short) 0);
+  }
 
-  private static native byte[] nativeGetOptimizeInfo(long nativePointer);
+  public static boolean setLoggingLevel(LogSeverity.Code severity, short verbosity) {
+    return IcingSearchEngineImpl.setLoggingLevel((short) severity.getNumber(), verbosity);
+  }
 
-  private static native byte[] nativeReset(long nativePointer);
+  @Nullable
+  public static String getLoggingTag() {
+    return IcingSearchEngineImpl.getLoggingTag();
+  }
 }
diff --git a/java/src/com/google/android/icing/IcingSearchEngineImpl.java b/java/src/com/google/android/icing/IcingSearchEngineImpl.java
new file mode 100644
index 0000000..3a00a5a
--- /dev/null
+++ b/java/src/com/google/android/icing/IcingSearchEngineImpl.java
@@ -0,0 +1,331 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.android.icing;
+
+import android.util.Log;
+import androidx.annotation.NonNull;
+import androidx.annotation.Nullable;
+import java.io.Closeable;
+
+/**
+ * Java wrapper to access native APIs in external/icing/icing/icing-search-engine.h
+ *
+ * <p>If this instance has been closed, the instance is no longer usable.
+ *
+ * <p>Keep this class to be non-Final so that it can be mocked in AppSearch.
+ *
+ * <p>NOTE: This class is NOT thread-safe.
+ */
+public class IcingSearchEngineImpl implements Closeable {
+
+  private static final String TAG = "IcingSearchEngineImpl";
+
+  private long nativePointer;
+
+  private boolean closed = false;
+
+  static {
+    // NOTE: This can fail with an UnsatisfiedLinkError
+    System.loadLibrary("icing");
+  }
+
+  /**
+   * @throws IllegalStateException if IcingSearchEngineImpl fails to be created
+   */
+  public IcingSearchEngineImpl(@NonNull byte[] optionsBytes) {
+    nativePointer = nativeCreate(optionsBytes);
+    if (nativePointer == 0) {
+      Log.e(TAG, "Failed to create IcingSearchEngineImpl.");
+      throw new IllegalStateException("Failed to create IcingSearchEngineImpl.");
+    }
+  }
+
+  private void throwIfClosed() {
+    if (closed) {
+      throw new IllegalStateException("Trying to use a closed IcingSearchEngineImpl instance.");
+    }
+  }
+
+  @Override
+  public void close() {
+    if (closed) {
+      return;
+    }
+
+    if (nativePointer != 0) {
+      nativeDestroy(this);
+    }
+    nativePointer = 0;
+    closed = true;
+  }
+
+  @SuppressWarnings({"deprecation", "removal"}) // b/316643605
+  @Override
+  protected void finalize() throws Throwable {
+    close();
+    super.finalize();
+  }
+
+  @Nullable
+  public byte[] initialize() {
+    throwIfClosed();
+    return nativeInitialize(this);
+  }
+
+  @Nullable
+  public byte[] setSchema(@NonNull byte[] schemaBytes) {
+    return setSchema(schemaBytes, /* ignoreErrorsAndDeleteDocuments= */ false);
+  }
+
+  @Nullable
+  public byte[] setSchema(@NonNull byte[] schemaBytes, boolean ignoreErrorsAndDeleteDocuments) {
+    throwIfClosed();
+    return nativeSetSchema(this, schemaBytes, ignoreErrorsAndDeleteDocuments);
+  }
+
+  @Nullable
+  public byte[] getSchema() {
+    throwIfClosed();
+    return nativeGetSchema(this);
+  }
+
+  @Nullable
+  public byte[] getSchemaType(@NonNull String schemaType) {
+    throwIfClosed();
+    return nativeGetSchemaType(this, schemaType);
+  }
+
+  @Nullable
+  public byte[] put(@NonNull byte[] documentBytes) {
+    throwIfClosed();
+    return nativePut(this, documentBytes);
+  }
+
+  @Nullable
+  public byte[] get(
+      @NonNull String namespace, @NonNull String uri, @NonNull byte[] getResultSpecBytes) {
+    throwIfClosed();
+    return nativeGet(this, namespace, uri, getResultSpecBytes);
+  }
+
+  @Nullable
+  public byte[] reportUsage(@NonNull byte[] usageReportBytes) {
+    throwIfClosed();
+    return nativeReportUsage(this, usageReportBytes);
+  }
+
+  @Nullable
+  public byte[] getAllNamespaces() {
+    throwIfClosed();
+    return nativeGetAllNamespaces(this);
+  }
+
+  @Nullable
+  public byte[] search(
+      @NonNull byte[] searchSpecBytes,
+      @NonNull byte[] scoringSpecBytes,
+      @NonNull byte[] resultSpecBytes) {
+    throwIfClosed();
+
+    // Note that on Android System.currentTimeMillis() is the standard "wall" clock and can be set
+    // by the user or the phone network so the time may jump backwards or forwards unpredictably.
+    // This could lead to inaccurate final JNI latency calculations or unexpected negative numbers
+    // in the case where the phone time is changed while sending data across JNI layers.
+    // However these occurrences should be very rare, so we will keep usage of
+    // System.currentTimeMillis() due to the lack of better time functions that can provide a
+    // consistent timestamp across all platforms.
+    long javaToNativeStartTimestampMs = System.currentTimeMillis();
+    return nativeSearch(
+        this, searchSpecBytes, scoringSpecBytes, resultSpecBytes, javaToNativeStartTimestampMs);
+  }
+
+  @Nullable
+  public byte[] getNextPage(long nextPageToken) {
+    throwIfClosed();
+    return nativeGetNextPage(this, nextPageToken, System.currentTimeMillis());
+  }
+
+  @NonNull
+  public void invalidateNextPageToken(long nextPageToken) {
+    throwIfClosed();
+    nativeInvalidateNextPageToken(this, nextPageToken);
+  }
+
+  @Nullable
+  public byte[] delete(@NonNull String namespace, @NonNull String uri) {
+    throwIfClosed();
+    return nativeDelete(this, namespace, uri);
+  }
+
+  @Nullable
+  public byte[] searchSuggestions(@NonNull byte[] suggestionSpecBytes) {
+    throwIfClosed();
+    return nativeSearchSuggestions(this, suggestionSpecBytes);
+  }
+
+  @Nullable
+  public byte[] deleteByNamespace(@NonNull String namespace) {
+    throwIfClosed();
+    return nativeDeleteByNamespace(this, namespace);
+  }
+
+  @Nullable
+  public byte[] deleteBySchemaType(@NonNull String schemaType) {
+    throwIfClosed();
+    return nativeDeleteBySchemaType(this, schemaType);
+  }
+
+  @Nullable
+  public byte[] deleteByQuery(@NonNull byte[] searchSpecBytes) {
+    return deleteByQuery(searchSpecBytes, /* returnDeletedDocumentInfo= */ false);
+  }
+
+  @Nullable
+  public byte[] deleteByQuery(@NonNull byte[] searchSpecBytes, boolean returnDeletedDocumentInfo) {
+    throwIfClosed();
+    return nativeDeleteByQuery(this, searchSpecBytes, returnDeletedDocumentInfo);
+  }
+
+  @Nullable
+  public byte[] persistToDisk(int persistTypeCode) {
+    throwIfClosed();
+    return nativePersistToDisk(this, persistTypeCode);
+  }
+
+  @Nullable
+  public byte[] optimize() {
+    throwIfClosed();
+    return nativeOptimize(this);
+  }
+
+  @Nullable
+  public byte[] getOptimizeInfo() {
+    throwIfClosed();
+    return nativeGetOptimizeInfo(this);
+  }
+
+  @Nullable
+  public byte[] getStorageInfo() {
+    throwIfClosed();
+    return nativeGetStorageInfo(this);
+  }
+
+  @Nullable
+  public byte[] getDebugInfo(int verbosityCode) {
+    throwIfClosed();
+    return nativeGetDebugInfo(this, verbosityCode);
+  }
+
+  @Nullable
+  public byte[] reset() {
+    throwIfClosed();
+    return nativeReset(this);
+  }
+
+  public static boolean shouldLog(short severity) {
+    return shouldLog(severity, (short) 0);
+  }
+
+  public static boolean shouldLog(short severity, short verbosity) {
+    return nativeShouldLog(severity, verbosity);
+  }
+
+  public static boolean setLoggingLevel(short severity) {
+    return setLoggingLevel(severity, (short) 0);
+  }
+
+  public static boolean setLoggingLevel(short severity, short verbosity) {
+    return nativeSetLoggingLevel(severity, verbosity);
+  }
+
+  @Nullable
+  public static String getLoggingTag() {
+    String tag = nativeGetLoggingTag();
+    if (tag == null) {
+      Log.e(TAG, "Received null logging tag from native.");
+    }
+    return tag;
+  }
+
+  private static native long nativeCreate(byte[] icingSearchEngineOptionsBytes);
+
+  private static native void nativeDestroy(IcingSearchEngineImpl instance);
+
+  private static native byte[] nativeInitialize(IcingSearchEngineImpl instance);
+
+  private static native byte[] nativeSetSchema(
+      IcingSearchEngineImpl instance, byte[] schemaBytes, boolean ignoreErrorsAndDeleteDocuments);
+
+  private static native byte[] nativeGetSchema(IcingSearchEngineImpl instance);
+
+  private static native byte[] nativeGetSchemaType(
+      IcingSearchEngineImpl instance, String schemaType);
+
+  private static native byte[] nativePut(IcingSearchEngineImpl instance, byte[] documentBytes);
+
+  private static native byte[] nativeGet(
+      IcingSearchEngineImpl instance, String namespace, String uri, byte[] getResultSpecBytes);
+
+  private static native byte[] nativeReportUsage(
+      IcingSearchEngineImpl instance, byte[] usageReportBytes);
+
+  private static native byte[] nativeGetAllNamespaces(IcingSearchEngineImpl instance);
+
+  private static native byte[] nativeSearch(
+      IcingSearchEngineImpl instance,
+      byte[] searchSpecBytes,
+      byte[] scoringSpecBytes,
+      byte[] resultSpecBytes,
+      long javaToNativeStartTimestampMs);
+
+  private static native byte[] nativeGetNextPage(
+      IcingSearchEngineImpl instance, long nextPageToken, long javaToNativeStartTimestampMs);
+
+  private static native void nativeInvalidateNextPageToken(
+      IcingSearchEngineImpl instance, long nextPageToken);
+
+  private static native byte[] nativeDelete(
+      IcingSearchEngineImpl instance, String namespace, String uri);
+
+  private static native byte[] nativeDeleteByNamespace(
+      IcingSearchEngineImpl instance, String namespace);
+
+  private static native byte[] nativeDeleteBySchemaType(
+      IcingSearchEngineImpl instance, String schemaType);
+
+  private static native byte[] nativeDeleteByQuery(
+      IcingSearchEngineImpl instance, byte[] searchSpecBytes, boolean returnDeletedDocumentInfo);
+
+  private static native byte[] nativePersistToDisk(IcingSearchEngineImpl instance, int persistType);
+
+  private static native byte[] nativeOptimize(IcingSearchEngineImpl instance);
+
+  private static native byte[] nativeGetOptimizeInfo(IcingSearchEngineImpl instance);
+
+  private static native byte[] nativeGetStorageInfo(IcingSearchEngineImpl instance);
+
+  private static native byte[] nativeReset(IcingSearchEngineImpl instance);
+
+  private static native byte[] nativeSearchSuggestions(
+      IcingSearchEngineImpl instance, byte[] suggestionSpecBytes);
+
+  private static native byte[] nativeGetDebugInfo(IcingSearchEngineImpl instance, int verbosity);
+
+  private static native boolean nativeShouldLog(short severity, short verbosity);
+
+  private static native boolean nativeSetLoggingLevel(short severity, short verbosity);
+
+  private static native String nativeGetLoggingTag();
+}
diff --git a/java/src/com/google/android/icing/IcingSearchEngineInterface.java b/java/src/com/google/android/icing/IcingSearchEngineInterface.java
new file mode 100644
index 0000000..0bc58f1
--- /dev/null
+++ b/java/src/com/google/android/icing/IcingSearchEngineInterface.java
@@ -0,0 +1,148 @@
+package com.google.android.icing;
+
+import com.google.android.icing.proto.DebugInfoResultProto;
+import com.google.android.icing.proto.DebugInfoVerbosity;
+import com.google.android.icing.proto.DeleteByNamespaceResultProto;
+import com.google.android.icing.proto.DeleteByQueryResultProto;
+import com.google.android.icing.proto.DeleteBySchemaTypeResultProto;
+import com.google.android.icing.proto.DeleteResultProto;
+import com.google.android.icing.proto.DocumentProto;
+import com.google.android.icing.proto.GetAllNamespacesResultProto;
+import com.google.android.icing.proto.GetOptimizeInfoResultProto;
+import com.google.android.icing.proto.GetResultProto;
+import com.google.android.icing.proto.GetResultSpecProto;
+import com.google.android.icing.proto.GetSchemaResultProto;
+import com.google.android.icing.proto.GetSchemaTypeResultProto;
+import com.google.android.icing.proto.InitializeResultProto;
+import com.google.android.icing.proto.OptimizeResultProto;
+import com.google.android.icing.proto.PersistToDiskResultProto;
+import com.google.android.icing.proto.PersistType;
+import com.google.android.icing.proto.PutResultProto;
+import com.google.android.icing.proto.ReportUsageResultProto;
+import com.google.android.icing.proto.ResetResultProto;
+import com.google.android.icing.proto.ResultSpecProto;
+import com.google.android.icing.proto.SchemaProto;
+import com.google.android.icing.proto.ScoringSpecProto;
+import com.google.android.icing.proto.SearchResultProto;
+import com.google.android.icing.proto.SearchSpecProto;
+import com.google.android.icing.proto.SetSchemaResultProto;
+import com.google.android.icing.proto.StorageInfoResultProto;
+import com.google.android.icing.proto.SuggestionResponse;
+import com.google.android.icing.proto.SuggestionSpecProto;
+import com.google.android.icing.proto.UsageReport;
+import java.io.Closeable;
+
+/** A common user-facing interface to expose the funcationalities provided by Icing Library. */
+public interface IcingSearchEngineInterface extends Closeable {
+  /**
+   * Initializes the current IcingSearchEngine implementation.
+   *
+   * <p>Internally the icing instance will be initialized.
+   */
+  InitializeResultProto initialize();
+
+  /** Sets the schema for the icing instance. */
+  SetSchemaResultProto setSchema(SchemaProto schema);
+
+  /**
+   * Sets the schema for the icing instance.
+   *
+   * @param ignoreErrorsAndDeleteDocuments force to set the schema and delete documents in case of
+   *     incompatible schema change.
+   */
+  SetSchemaResultProto setSchema(SchemaProto schema, boolean ignoreErrorsAndDeleteDocuments);
+
+  /** Gets the schema for the icing instance. */
+  GetSchemaResultProto getSchema();
+
+  /**
+   * Gets the schema for the icing instance.
+   *
+   * @param schemaType type of the schema.
+   */
+  GetSchemaTypeResultProto getSchemaType(String schemaType);
+
+  /** Puts the document. */
+  PutResultProto put(DocumentProto document);
+
+  /**
+   * Gets the document.
+   *
+   * @param namespace namespace of the document.
+   * @param uri uri of the document.
+   * @param getResultSpec the spec for getting the document.
+   */
+  GetResultProto get(String namespace, String uri, GetResultSpecProto getResultSpec);
+
+  /** Reports usage. */
+  ReportUsageResultProto reportUsage(UsageReport usageReport);
+
+  /** Gets all namespaces. */
+  GetAllNamespacesResultProto getAllNamespaces();
+
+  /**
+   * Searches over the documents.
+   *
+   * <p>Documents need to be retrieved on the following {@link #getNextPage} calls on the returned
+   * {@link SearchResultProto}.
+   */
+  SearchResultProto search(
+      SearchSpecProto searchSpec, ScoringSpecProto scoringSpec, ResultSpecProto resultSpec);
+
+  /** Gets the next page. */
+  SearchResultProto getNextPage(long nextPageToken);
+
+  /** Invalidates the next page token. */
+  void invalidateNextPageToken(long nextPageToken);
+
+  /**
+   * Deletes the document.
+   *
+   * @param namespace the namespace the document to be deleted belong to.
+   * @param uri the uri for the document to be deleted.
+   */
+  DeleteResultProto delete(String namespace, String uri);
+
+  /** Returns the suggestions for the search query. */
+  SuggestionResponse searchSuggestions(SuggestionSpecProto suggestionSpec);
+
+  /** Deletes documents by the namespace. */
+  DeleteByNamespaceResultProto deleteByNamespace(String namespace);
+
+  /** Deletes documents by the schema type. */
+  DeleteBySchemaTypeResultProto deleteBySchemaType(String schemaType);
+
+  /** Deletes documents by the search query. */
+  DeleteByQueryResultProto deleteByQuery(SearchSpecProto searchSpec);
+
+  /**
+   * Deletes document by the search query
+   *
+   * @param returnDeletedDocumentInfo whether additional information about deleted documents will be
+   *     included in {@link DeleteByQueryResultProto}.
+   */
+  DeleteByQueryResultProto deleteByQuery(
+      SearchSpecProto searchSpec, boolean returnDeletedDocumentInfo);
+
+  /** Makes sure every update/delete received till this point is flushed to disk. */
+  PersistToDiskResultProto persistToDisk(PersistType.Code persistTypeCode);
+
+  /** Makes the icing instance run tasks that are too expensive to be run in real-time. */
+  OptimizeResultProto optimize();
+
+  /** Gets information about the optimization. */
+  GetOptimizeInfoResultProto getOptimizeInfo();
+
+  /** Gets information about the storage. */
+  StorageInfoResultProto getStorageInfo();
+
+  /** Gets the debug information for the current icing instance. */
+  DebugInfoResultProto getDebugInfo(DebugInfoVerbosity.Code verbosity);
+
+  /** Clears all data from the current icing instance, and reinitializes it. */
+  ResetResultProto reset();
+
+  /** Closes the current icing instance. */
+  @Override
+  void close();
+}
diff --git a/java/src/com/google/android/icing/IcingSearchEngineUtils.java b/java/src/com/google/android/icing/IcingSearchEngineUtils.java
new file mode 100644
index 0000000..0913216
--- /dev/null
+++ b/java/src/com/google/android/icing/IcingSearchEngineUtils.java
@@ -0,0 +1,471 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.android.icing;
+
+import android.util.Log;
+import androidx.annotation.NonNull;
+import androidx.annotation.Nullable;
+import com.google.android.icing.proto.DebugInfoResultProto;
+import com.google.android.icing.proto.DeleteByNamespaceResultProto;
+import com.google.android.icing.proto.DeleteByQueryResultProto;
+import com.google.android.icing.proto.DeleteBySchemaTypeResultProto;
+import com.google.android.icing.proto.DeleteResultProto;
+import com.google.android.icing.proto.GetAllNamespacesResultProto;
+import com.google.android.icing.proto.GetOptimizeInfoResultProto;
+import com.google.android.icing.proto.GetResultProto;
+import com.google.android.icing.proto.GetSchemaResultProto;
+import com.google.android.icing.proto.GetSchemaTypeResultProto;
+import com.google.android.icing.proto.InitializeResultProto;
+import com.google.android.icing.proto.OptimizeResultProto;
+import com.google.android.icing.proto.PersistToDiskResultProto;
+import com.google.android.icing.proto.PutResultProto;
+import com.google.android.icing.proto.ReportUsageResultProto;
+import com.google.android.icing.proto.ResetResultProto;
+import com.google.android.icing.proto.SearchResultProto;
+import com.google.android.icing.proto.SetSchemaResultProto;
+import com.google.android.icing.proto.StatusProto;
+import com.google.android.icing.proto.StorageInfoResultProto;
+import com.google.android.icing.proto.SuggestionResponse;
+import com.google.protobuf.ExtensionRegistryLite;
+import com.google.protobuf.InvalidProtocolBufferException;
+
+/**
+ * Contains utility methods for IcingSearchEngine to convert byte arrays to the corresponding
+ * protos.
+ *
+ * <p>It is also being used by AppSearch dynamite 0p client APIs to convert byte arrays to the
+ * protos.
+ */
+public final class IcingSearchEngineUtils {
+  private static final String TAG = "IcingSearchEngineUtils";
+  private static final ExtensionRegistryLite EXTENSION_REGISTRY_LITE =
+      ExtensionRegistryLite.getEmptyRegistry();
+
+  private IcingSearchEngineUtils() {}
+
+  // TODO(b/240333360) Check to see if we can use one template function to replace those
+  @NonNull
+  public static InitializeResultProto byteArrayToInitializeResultProto(
+      @Nullable byte[] initializeResultBytes) {
+    if (initializeResultBytes == null) {
+      Log.e(TAG, "Received null InitializeResult from native.");
+      return InitializeResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return InitializeResultProto.parseFrom(initializeResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing InitializeResultProto.", e);
+      return InitializeResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static SetSchemaResultProto byteArrayToSetSchemaResultProto(
+      @Nullable byte[] setSchemaResultBytes) {
+    if (setSchemaResultBytes == null) {
+      Log.e(TAG, "Received null SetSchemaResultProto from native.");
+      return SetSchemaResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return SetSchemaResultProto.parseFrom(setSchemaResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing SetSchemaResultProto.", e);
+      return SetSchemaResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static GetSchemaResultProto byteArrayToGetSchemaResultProto(
+      @Nullable byte[] getSchemaResultBytes) {
+    if (getSchemaResultBytes == null) {
+      Log.e(TAG, "Received null GetSchemaResultProto from native.");
+      return GetSchemaResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return GetSchemaResultProto.parseFrom(getSchemaResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing GetSchemaResultProto.", e);
+      return GetSchemaResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static GetSchemaTypeResultProto byteArrayToGetSchemaTypeResultProto(
+      @Nullable byte[] getSchemaTypeResultBytes) {
+    if (getSchemaTypeResultBytes == null) {
+      Log.e(TAG, "Received null GetSchemaTypeResultProto from native.");
+      return GetSchemaTypeResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return GetSchemaTypeResultProto.parseFrom(getSchemaTypeResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing GetSchemaTypeResultProto.", e);
+      return GetSchemaTypeResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static PutResultProto byteArrayToPutResultProto(@Nullable byte[] putResultBytes) {
+    if (putResultBytes == null) {
+      Log.e(TAG, "Received null PutResultProto from native.");
+      return PutResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return PutResultProto.parseFrom(putResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing PutResultProto.", e);
+      return PutResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static GetResultProto byteArrayToGetResultProto(@Nullable byte[] getResultBytes) {
+    if (getResultBytes == null) {
+      Log.e(TAG, "Received null GetResultProto from native.");
+      return GetResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return GetResultProto.parseFrom(getResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing GetResultProto.", e);
+      return GetResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static ReportUsageResultProto byteArrayToReportUsageResultProto(
+      @Nullable byte[] reportUsageResultBytes) {
+    if (reportUsageResultBytes == null) {
+      Log.e(TAG, "Received null ReportUsageResultProto from native.");
+      return ReportUsageResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return ReportUsageResultProto.parseFrom(reportUsageResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing ReportUsageResultProto.", e);
+      return ReportUsageResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static GetAllNamespacesResultProto byteArrayToGetAllNamespacesResultProto(
+      @Nullable byte[] getAllNamespacesResultBytes) {
+    if (getAllNamespacesResultBytes == null) {
+      Log.e(TAG, "Received null GetAllNamespacesResultProto from native.");
+      return GetAllNamespacesResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return GetAllNamespacesResultProto.parseFrom(
+          getAllNamespacesResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing GetAllNamespacesResultProto.", e);
+      return GetAllNamespacesResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static SearchResultProto byteArrayToSearchResultProto(@Nullable byte[] searchResultBytes) {
+    if (searchResultBytes == null) {
+      Log.e(TAG, "Received null SearchResultProto from native.");
+      return SearchResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      SearchResultProto.Builder searchResultProtoBuilder =
+          SearchResultProto.newBuilder().mergeFrom(searchResultBytes, EXTENSION_REGISTRY_LITE);
+      setNativeToJavaJniLatency(searchResultProtoBuilder);
+      return searchResultProtoBuilder.build();
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing SearchResultProto.", e);
+      return SearchResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  private static void setNativeToJavaJniLatency(
+      SearchResultProto.Builder searchResultProtoBuilder) {
+    int nativeToJavaLatencyMs =
+        (int)
+            (System.currentTimeMillis()
+                - searchResultProtoBuilder.getQueryStats().getNativeToJavaStartTimestampMs());
+    searchResultProtoBuilder.setQueryStats(
+        searchResultProtoBuilder.getQueryStats().toBuilder()
+            .setNativeToJavaJniLatencyMs(nativeToJavaLatencyMs));
+  }
+
+  @NonNull
+  public static DeleteResultProto byteArrayToDeleteResultProto(@Nullable byte[] deleteResultBytes) {
+    if (deleteResultBytes == null) {
+      Log.e(TAG, "Received null DeleteResultProto from native.");
+      return DeleteResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return DeleteResultProto.parseFrom(deleteResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing DeleteResultProto.", e);
+      return DeleteResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static SuggestionResponse byteArrayToSuggestionResponse(
+      @Nullable byte[] suggestionResponseBytes) {
+    if (suggestionResponseBytes == null) {
+      Log.e(TAG, "Received null suggestionResponseBytes from native.");
+      return SuggestionResponse.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return SuggestionResponse.parseFrom(suggestionResponseBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing suggestionResponseBytes.", e);
+      return SuggestionResponse.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static DeleteByNamespaceResultProto byteArrayToDeleteByNamespaceResultProto(
+      @Nullable byte[] deleteByNamespaceResultBytes) {
+    if (deleteByNamespaceResultBytes == null) {
+      Log.e(TAG, "Received null DeleteByNamespaceResultProto from native.");
+      return DeleteByNamespaceResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return DeleteByNamespaceResultProto.parseFrom(
+          deleteByNamespaceResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing DeleteByNamespaceResultProto.", e);
+      return DeleteByNamespaceResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static DeleteBySchemaTypeResultProto byteArrayToDeleteBySchemaTypeResultProto(
+      @Nullable byte[] deleteBySchemaTypeResultBytes) {
+    if (deleteBySchemaTypeResultBytes == null) {
+      Log.e(TAG, "Received null DeleteBySchemaTypeResultProto from native.");
+      return DeleteBySchemaTypeResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return DeleteBySchemaTypeResultProto.parseFrom(
+          deleteBySchemaTypeResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing DeleteBySchemaTypeResultProto.", e);
+      return DeleteBySchemaTypeResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static DeleteByQueryResultProto byteArrayToDeleteByQueryResultProto(
+      @Nullable byte[] deleteResultBytes) {
+    if (deleteResultBytes == null) {
+      Log.e(TAG, "Received null DeleteResultProto from native.");
+      return DeleteByQueryResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return DeleteByQueryResultProto.parseFrom(deleteResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing DeleteResultProto.", e);
+      return DeleteByQueryResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static PersistToDiskResultProto byteArrayToPersistToDiskResultProto(
+      @Nullable byte[] persistToDiskResultBytes) {
+    if (persistToDiskResultBytes == null) {
+      Log.e(TAG, "Received null PersistToDiskResultProto from native.");
+      return PersistToDiskResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return PersistToDiskResultProto.parseFrom(persistToDiskResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing PersistToDiskResultProto.", e);
+      return PersistToDiskResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static OptimizeResultProto byteArrayToOptimizeResultProto(
+      @Nullable byte[] optimizeResultBytes) {
+    if (optimizeResultBytes == null) {
+      Log.e(TAG, "Received null OptimizeResultProto from native.");
+      return OptimizeResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return OptimizeResultProto.parseFrom(optimizeResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing OptimizeResultProto.", e);
+      return OptimizeResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static GetOptimizeInfoResultProto byteArrayToGetOptimizeInfoResultProto(
+      @Nullable byte[] getOptimizeInfoResultBytes) {
+    if (getOptimizeInfoResultBytes == null) {
+      Log.e(TAG, "Received null GetOptimizeInfoResultProto from native.");
+      return GetOptimizeInfoResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return GetOptimizeInfoResultProto.parseFrom(
+          getOptimizeInfoResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing GetOptimizeInfoResultProto.", e);
+      return GetOptimizeInfoResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static StorageInfoResultProto byteArrayToStorageInfoResultProto(
+      @Nullable byte[] storageInfoResultProtoBytes) {
+    if (storageInfoResultProtoBytes == null) {
+      Log.e(TAG, "Received null StorageInfoResultProto from native.");
+      return StorageInfoResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return StorageInfoResultProto.parseFrom(storageInfoResultProtoBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing GetOptimizeInfoResultProto.", e);
+      return StorageInfoResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static DebugInfoResultProto byteArrayToDebugInfoResultProto(
+      @Nullable byte[] debugInfoResultProtoBytes) {
+    if (debugInfoResultProtoBytes == null) {
+      Log.e(TAG, "Received null DebugInfoResultProto from native.");
+      return DebugInfoResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return DebugInfoResultProto.parseFrom(debugInfoResultProtoBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing DebugInfoResultProto.", e);
+      return DebugInfoResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+
+  @NonNull
+  public static ResetResultProto byteArrayToResetResultProto(@Nullable byte[] resetResultBytes) {
+    if (resetResultBytes == null) {
+      Log.e(TAG, "Received null ResetResultProto from native.");
+      return ResetResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+
+    try {
+      return ResetResultProto.parseFrom(resetResultBytes, EXTENSION_REGISTRY_LITE);
+    } catch (InvalidProtocolBufferException e) {
+      Log.e(TAG, "Error parsing ResetResultProto.", e);
+      return ResetResultProto.newBuilder()
+          .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+          .build();
+    }
+  }
+}
diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
index 01a6050..1ed2d9a 100644
--- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
+++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
@@ -15,25 +15,32 @@
 package com.google.android.icing;
 
 import static com.google.common.truth.Truth.assertThat;
+import static com.google.common.truth.Truth.assertWithMessage;
 
-import androidx.test.core.app.ApplicationProvider;
+import com.google.android.icing.IcingSearchEngine;
+import com.google.android.icing.proto.DebugInfoResultProto;
+import com.google.android.icing.proto.DebugInfoVerbosity;
 import com.google.android.icing.proto.DeleteByNamespaceResultProto;
+import com.google.android.icing.proto.DeleteByQueryResultProto;
 import com.google.android.icing.proto.DeleteBySchemaTypeResultProto;
 import com.google.android.icing.proto.DeleteResultProto;
 import com.google.android.icing.proto.DocumentProto;
+import com.google.android.icing.proto.GetAllNamespacesResultProto;
 import com.google.android.icing.proto.GetOptimizeInfoResultProto;
 import com.google.android.icing.proto.GetResultProto;
+import com.google.android.icing.proto.GetResultSpecProto;
 import com.google.android.icing.proto.GetSchemaResultProto;
 import com.google.android.icing.proto.GetSchemaTypeResultProto;
 import com.google.android.icing.proto.IcingSearchEngineOptions;
-import com.google.android.icing.proto.IndexingConfig;
-import com.google.android.icing.proto.IndexingConfig.TokenizerType;
 import com.google.android.icing.proto.InitializeResultProto;
+import com.google.android.icing.proto.LogSeverity;
 import com.google.android.icing.proto.OptimizeResultProto;
 import com.google.android.icing.proto.PersistToDiskResultProto;
+import com.google.android.icing.proto.PersistType;
 import com.google.android.icing.proto.PropertyConfigProto;
 import com.google.android.icing.proto.PropertyProto;
 import com.google.android.icing.proto.PutResultProto;
+import com.google.android.icing.proto.ReportUsageResultProto;
 import com.google.android.icing.proto.ResetResultProto;
 import com.google.android.icing.proto.ResultSpecProto;
 import com.google.android.icing.proto.SchemaProto;
@@ -42,11 +49,26 @@ import com.google.android.icing.proto.ScoringSpecProto;
 import com.google.android.icing.proto.SearchResultProto;
 import com.google.android.icing.proto.SearchSpecProto;
 import com.google.android.icing.proto.SetSchemaResultProto;
+import com.google.android.icing.proto.SnippetMatchProto;
+import com.google.android.icing.proto.SnippetProto;
 import com.google.android.icing.proto.StatusProto;
+import com.google.android.icing.proto.StorageInfoResultProto;
+import com.google.android.icing.proto.StringIndexingConfig;
+import com.google.android.icing.proto.StringIndexingConfig.TokenizerType;
+import com.google.android.icing.proto.SuggestionResponse;
+import com.google.android.icing.proto.SuggestionScoringSpecProto;
+import com.google.android.icing.proto.SuggestionSpecProto;
 import com.google.android.icing.proto.TermMatchType;
-import com.google.android.icing.IcingSearchEngine;
+import com.google.android.icing.proto.TermMatchType.Code;
+import com.google.android.icing.proto.UsageReport;
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+import org.junit.After;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -56,10 +78,13 @@ import org.junit.runners.JUnit4;
  */
 @RunWith(JUnit4.class)
 public final class IcingSearchEngineTest {
+  @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder();
 
   private static final String EMAIL_TYPE = "Email";
 
-  private String filesDir;
+  private File tempDir;
+
+  private IcingSearchEngine icingSearchEngine;
 
   private static SchemaTypeConfigProto createEmailTypeConfig() {
     return SchemaTypeConfigProto.newBuilder()
@@ -69,8 +94,8 @@ public final class IcingSearchEngineTest {
                 .setPropertyName("subject")
                 .setDataType(PropertyConfigProto.DataType.Code.STRING)
                 .setCardinality(PropertyConfigProto.Cardinality.Code.OPTIONAL)
-                .setIndexingConfig(
-                    IndexingConfig.newBuilder()
+                .setStringIndexingConfig(
+                    StringIndexingConfig.newBuilder()
                         .setTokenizerType(TokenizerType.Code.PLAIN)
                         .setTermMatchType(TermMatchType.Code.PREFIX)))
         .addProperties(
@@ -78,8 +103,8 @@ public final class IcingSearchEngineTest {
                 .setPropertyName("body")
                 .setDataType(PropertyConfigProto.DataType.Code.STRING)
                 .setCardinality(PropertyConfigProto.Cardinality.Code.OPTIONAL)
-                .setIndexingConfig(
-                    IndexingConfig.newBuilder()
+                .setStringIndexingConfig(
+                    StringIndexingConfig.newBuilder()
                         .setTokenizerType(TokenizerType.Code.PLAIN)
                         .setTermMatchType(TermMatchType.Code.PREFIX)))
         .build();
@@ -96,78 +121,74 @@ public final class IcingSearchEngineTest {
 
   @Before
   public void setUp() throws Exception {
-    filesDir = ApplicationProvider.getApplicationContext().getFilesDir().getCanonicalPath();
+    tempDir = temporaryFolder.newFolder();
+    IcingSearchEngineOptions options =
+        IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
+    icingSearchEngine = new IcingSearchEngine(options);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    icingSearchEngine.close();
   }
 
   @Test
   public void testInitialize() throws Exception {
-    IcingSearchEngineOptions options =
-        IcingSearchEngineOptions.newBuilder().setBaseDir(filesDir).build();
-    IcingSearchEngine icing = new IcingSearchEngine(options);
-
-    InitializeResultProto initializeResultProto = icing.initialize();
-    assertThat(initializeResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    InitializeResultProto initializeResultProto = icingSearchEngine.initialize();
+    assertStatusOk(initializeResultProto.getStatus());
   }
 
   @Test
   public void testSetAndGetSchema() throws Exception {
-    IcingSearchEngineOptions options =
-        IcingSearchEngineOptions.newBuilder().setBaseDir(filesDir).build();
-    IcingSearchEngine icing = new IcingSearchEngine(options);
-    assertThat(icing.initialize().getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
 
     SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
     SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
     SetSchemaResultProto setSchemaResultProto =
-        icing.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false);
-    assertThat(setSchemaResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+        icingSearchEngine.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false);
+    assertStatusOk(setSchemaResultProto.getStatus());
 
-    GetSchemaResultProto getSchemaResultProto = icing.getSchema();
-    assertThat(getSchemaResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    GetSchemaResultProto getSchemaResultProto = icingSearchEngine.getSchema();
+    assertStatusOk(getSchemaResultProto.getStatus());
     assertThat(getSchemaResultProto.getSchema()).isEqualTo(schema);
 
     GetSchemaTypeResultProto getSchemaTypeResultProto =
-        icing.getSchemaType(emailTypeConfig.getSchemaType());
-    assertThat(getSchemaTypeResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+        icingSearchEngine.getSchemaType(emailTypeConfig.getSchemaType());
+    assertStatusOk(getSchemaTypeResultProto.getStatus());
     assertThat(getSchemaTypeResultProto.getSchemaTypeConfig()).isEqualTo(emailTypeConfig);
   }
 
   @Test
   public void testPutAndGetDocuments() throws Exception {
-    IcingSearchEngineOptions options =
-        IcingSearchEngineOptions.newBuilder().setBaseDir(filesDir).build();
-    IcingSearchEngine icing = new IcingSearchEngine(options);
-    assertThat(icing.initialize().getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
 
     SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
     SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
     assertThat(
-            icing
+            icingSearchEngine
                 .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
                 .getStatus()
                 .getCode())
         .isEqualTo(StatusProto.Code.OK);
 
     DocumentProto emailDocument = createEmailDocument("namespace", "uri");
-    PutResultProto putResultProto = icing.put(emailDocument);
-    assertThat(putResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    PutResultProto putResultProto = icingSearchEngine.put(emailDocument);
+    assertStatusOk(putResultProto.getStatus());
 
-    GetResultProto getResultProto = icing.get("namespace", "uri");
-    assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    GetResultProto getResultProto =
+        icingSearchEngine.get("namespace", "uri", GetResultSpecProto.getDefaultInstance());
+    assertStatusOk(getResultProto.getStatus());
     assertThat(getResultProto.getDocument()).isEqualTo(emailDocument);
   }
 
   @Test
   public void testSearch() throws Exception {
-    IcingSearchEngineOptions options =
-        IcingSearchEngineOptions.newBuilder().setBaseDir(filesDir).build();
-    IcingSearchEngine icing = new IcingSearchEngine(options);
-    assertThat(icing.initialize().getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
 
     SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
     SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
     assertThat(
-            icing
+            icingSearchEngine
                 .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
                 .getStatus()
                 .getCode())
@@ -177,7 +198,7 @@ public final class IcingSearchEngineTest {
         createEmailDocument("namespace", "uri").toBuilder()
             .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("foo"))
             .build();
-    assertThat(icing.put(emailDocument).getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
 
     SearchSpecProto searchSpec =
         SearchSpecProto.newBuilder()
@@ -186,138 +207,595 @@ public final class IcingSearchEngineTest {
             .build();
 
     SearchResultProto searchResultProto =
-        icing.search(
+        icingSearchEngine.search(
             searchSpec,
             ScoringSpecProto.getDefaultInstance(),
             ResultSpecProto.getDefaultInstance());
-    assertThat(searchResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(searchResultProto.getStatus());
     assertThat(searchResultProto.getResultsCount()).isEqualTo(1);
     assertThat(searchResultProto.getResults(0).getDocument()).isEqualTo(emailDocument);
+
+    assertThat(searchResultProto.getQueryStats().hasNativeToJavaStartTimestampMs()).isTrue();
+    assertThat(searchResultProto.getQueryStats().hasNativeToJavaJniLatencyMs()).isTrue();
+    assertThat(searchResultProto.getQueryStats().hasJavaToNativeJniLatencyMs()).isTrue();
+    assertThat(searchResultProto.getQueryStats().getNativeToJavaStartTimestampMs())
+        .isGreaterThan(0);
+    assertThat(searchResultProto.getQueryStats().getNativeToJavaJniLatencyMs()).isAtLeast(0);
+    assertThat(searchResultProto.getQueryStats().getJavaToNativeJniLatencyMs()).isAtLeast(0);
+  }
+
+  @Test
+  public void testGetNextPage() throws Exception {
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
+    SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
+    assertThat(
+            icingSearchEngine
+                .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
+                .getStatus()
+                .getCode())
+        .isEqualTo(StatusProto.Code.OK);
+
+    Map<String, DocumentProto> documents = new HashMap<>();
+    for (int i = 0; i < 10; i++) {
+      DocumentProto emailDocument =
+          createEmailDocument("namespace", "uri:" + i).toBuilder()
+              .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("foo"))
+              .build();
+      documents.put("uri:" + i, emailDocument);
+      assertWithMessage(icingSearchEngine.put(emailDocument).getStatus().getMessage())
+          .that(icingSearchEngine.put(emailDocument).getStatus().getCode())
+          .isEqualTo(StatusProto.Code.OK);
+    }
+
+    SearchSpecProto searchSpec =
+        SearchSpecProto.newBuilder()
+            .setQuery("foo")
+            .setTermMatchType(TermMatchType.Code.PREFIX)
+            .build();
+    ResultSpecProto resultSpecProto = ResultSpecProto.newBuilder().setNumPerPage(1).build();
+
+    SearchResultProto searchResultProto =
+        icingSearchEngine.search(
+            searchSpec, ScoringSpecProto.getDefaultInstance(), resultSpecProto);
+    assertStatusOk(searchResultProto.getStatus());
+    assertThat(searchResultProto.getResultsCount()).isEqualTo(1);
+    DocumentProto resultDocument = searchResultProto.getResults(0).getDocument();
+    assertThat(resultDocument).isEqualTo(documents.remove(resultDocument.getUri()));
+
+    assertThat(searchResultProto.getQueryStats().hasNativeToJavaStartTimestampMs()).isTrue();
+    assertThat(searchResultProto.getQueryStats().hasNativeToJavaJniLatencyMs()).isTrue();
+    assertThat(searchResultProto.getQueryStats().hasJavaToNativeJniLatencyMs()).isTrue();
+    assertThat(searchResultProto.getQueryStats().getNativeToJavaStartTimestampMs())
+        .isGreaterThan(0);
+    assertThat(searchResultProto.getQueryStats().getNativeToJavaJniLatencyMs()).isAtLeast(0);
+    assertThat(searchResultProto.getQueryStats().getJavaToNativeJniLatencyMs()).isAtLeast(0);
+
+    // fetch rest pages
+    for (int i = 1; i < 5; i++) {
+      searchResultProto = icingSearchEngine.getNextPage(searchResultProto.getNextPageToken());
+      assertWithMessage(searchResultProto.getStatus().getMessage())
+          .that(searchResultProto.getStatus().getCode())
+          .isEqualTo(StatusProto.Code.OK);
+      assertThat(searchResultProto.getResultsCount()).isEqualTo(1);
+      resultDocument = searchResultProto.getResults(0).getDocument();
+      assertThat(resultDocument).isEqualTo(documents.remove(resultDocument.getUri()));
+    }
+
+    // invalidate rest result
+    icingSearchEngine.invalidateNextPageToken(searchResultProto.getNextPageToken());
+
+    searchResultProto = icingSearchEngine.getNextPage(searchResultProto.getNextPageToken());
+    assertStatusOk(searchResultProto.getStatus());
+    assertThat(searchResultProto.getResultsCount()).isEqualTo(0);
   }
 
   @Test
   public void testDelete() throws Exception {
-    IcingSearchEngineOptions options =
-        IcingSearchEngineOptions.newBuilder().setBaseDir(filesDir).build();
-    IcingSearchEngine icing = new IcingSearchEngine(options);
-    assertThat(icing.initialize().getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
 
     SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
     SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
     assertThat(
-            icing
+            icingSearchEngine
                 .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
                 .getStatus()
                 .getCode())
         .isEqualTo(StatusProto.Code.OK);
 
     DocumentProto emailDocument = createEmailDocument("namespace", "uri");
-    assertThat(icing.put(emailDocument).getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
 
-    DeleteResultProto deleteResultProto = icing.delete("namespace", "uri");
-    assertThat(deleteResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    DeleteResultProto deleteResultProto = icingSearchEngine.delete("namespace", "uri");
+    assertStatusOk(deleteResultProto.getStatus());
 
-    GetResultProto getResultProto = icing.get("namespace", "uri");
+    GetResultProto getResultProto =
+        icingSearchEngine.get("namespace", "uri", GetResultSpecProto.getDefaultInstance());
     assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
   }
 
   @Test
   public void testDeleteByNamespace() throws Exception {
-    IcingSearchEngineOptions options =
-        IcingSearchEngineOptions.newBuilder().setBaseDir(filesDir).build();
-    IcingSearchEngine icing = new IcingSearchEngine(options);
-    assertThat(icing.initialize().getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
 
     SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
     SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
     assertThat(
-            icing
+            icingSearchEngine
                 .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
                 .getStatus()
                 .getCode())
         .isEqualTo(StatusProto.Code.OK);
 
     DocumentProto emailDocument = createEmailDocument("namespace", "uri");
-    assertThat(icing.put(emailDocument).getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
 
     DeleteByNamespaceResultProto deleteByNamespaceResultProto =
-        icing.deleteByNamespace("namespace");
-    assertThat(deleteByNamespaceResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+        icingSearchEngine.deleteByNamespace("namespace");
+    assertStatusOk(deleteByNamespaceResultProto.getStatus());
 
-    GetResultProto getResultProto = icing.get("namespace", "uri");
+    GetResultProto getResultProto =
+        icingSearchEngine.get("namespace", "uri", GetResultSpecProto.getDefaultInstance());
     assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
   }
 
   @Test
   public void testDeleteBySchemaType() throws Exception {
-    IcingSearchEngineOptions options =
-        IcingSearchEngineOptions.newBuilder().setBaseDir(filesDir).build();
-    IcingSearchEngine icing = new IcingSearchEngine(options);
-    assertThat(icing.initialize().getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
 
     SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
     SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
     assertThat(
-            icing
+            icingSearchEngine
                 .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
                 .getStatus()
                 .getCode())
         .isEqualTo(StatusProto.Code.OK);
 
     DocumentProto emailDocument = createEmailDocument("namespace", "uri");
-    assertThat(icing.put(emailDocument).getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
 
     DeleteBySchemaTypeResultProto deleteBySchemaTypeResultProto =
-        icing.deleteBySchemaType(EMAIL_TYPE);
-    assertThat(deleteBySchemaTypeResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+        icingSearchEngine.deleteBySchemaType(EMAIL_TYPE);
+    assertStatusOk(deleteBySchemaTypeResultProto.getStatus());
 
-    GetResultProto getResultProto = icing.get("namespace", "uri");
+    GetResultProto getResultProto =
+        icingSearchEngine.get("namespace", "uri", GetResultSpecProto.getDefaultInstance());
     assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
   }
 
   @Test
+  public void testDeleteByQuery() throws Exception {
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
+    SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
+    assertThat(
+            icingSearchEngine
+                .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
+                .getStatus()
+                .getCode())
+        .isEqualTo(StatusProto.Code.OK);
+
+    DocumentProto emailDocument1 =
+        createEmailDocument("namespace", "uri1").toBuilder()
+            .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("foo"))
+            .build();
+
+    assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus());
+    DocumentProto emailDocument2 =
+        createEmailDocument("namespace", "uri2").toBuilder()
+            .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("bar"))
+            .build();
+
+    assertStatusOk(icingSearchEngine.put(emailDocument2).getStatus());
+
+    SearchSpecProto searchSpec =
+        SearchSpecProto.newBuilder()
+            .setQuery("foo")
+            .setTermMatchType(TermMatchType.Code.PREFIX)
+            .build();
+
+    SearchResultProto searchResultProto =
+        icingSearchEngine.search(
+            searchSpec,
+            ScoringSpecProto.getDefaultInstance(),
+            ResultSpecProto.getDefaultInstance());
+    assertStatusOk(searchResultProto.getStatus());
+    assertThat(searchResultProto.getResultsCount()).isEqualTo(1);
+    assertThat(searchResultProto.getResults(0).getDocument()).isEqualTo(emailDocument1);
+
+    DeleteByQueryResultProto deleteResultProto = icingSearchEngine.deleteByQuery(searchSpec);
+    assertStatusOk(deleteResultProto.getStatus());
+    // By default, the deleteByQuery API does not return the summary about deleted documents, unless
+    // the returnDeletedDocumentInfo parameter is set to true.
+    assertThat(deleteResultProto.getDeletedDocumentsList()).isEmpty();
+
+    GetResultProto getResultProto =
+        icingSearchEngine.get("namespace", "uri1", GetResultSpecProto.getDefaultInstance());
+    assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
+    getResultProto =
+        icingSearchEngine.get("namespace", "uri2", GetResultSpecProto.getDefaultInstance());
+    assertStatusOk(getResultProto.getStatus());
+  }
+
+  @Test
+  public void testDeleteByQueryWithDeletedDocumentInfo() throws Exception {
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
+    SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
+    assertThat(
+            icingSearchEngine
+                .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
+                .getStatus()
+                .getCode())
+        .isEqualTo(StatusProto.Code.OK);
+
+    DocumentProto emailDocument1 =
+        createEmailDocument("namespace", "uri1").toBuilder()
+            .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("foo"))
+            .build();
+
+    assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus());
+    DocumentProto emailDocument2 =
+        createEmailDocument("namespace", "uri2").toBuilder()
+            .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("bar"))
+            .build();
+
+    assertStatusOk(icingSearchEngine.put(emailDocument2).getStatus());
+
+    SearchSpecProto searchSpec =
+        SearchSpecProto.newBuilder()
+            .setQuery("foo")
+            .setTermMatchType(TermMatchType.Code.PREFIX)
+            .build();
+
+    DeleteByQueryResultProto deleteResultProto =
+        icingSearchEngine.deleteByQuery(searchSpec, /*returnDeletedDocumentInfo=*/ true);
+    assertStatusOk(deleteResultProto.getStatus());
+    DeleteByQueryResultProto.DocumentGroupInfo info =
+        DeleteByQueryResultProto.DocumentGroupInfo.newBuilder()
+            .setNamespace("namespace")
+            .setSchema("Email")
+            .addUris("uri1")
+            .build();
+    assertThat(deleteResultProto.getDeletedDocumentsList()).containsExactly(info);
+
+    GetResultProto getResultProto =
+        icingSearchEngine.get("namespace", "uri1", GetResultSpecProto.getDefaultInstance());
+    assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
+    getResultProto =
+        icingSearchEngine.get("namespace", "uri2", GetResultSpecProto.getDefaultInstance());
+    assertStatusOk(getResultProto.getStatus());
+  }
+
+  @Test
   public void testPersistToDisk() throws Exception {
-    IcingSearchEngineOptions options =
-        IcingSearchEngineOptions.newBuilder().setBaseDir(filesDir).build();
-    IcingSearchEngine icing = new IcingSearchEngine(options);
-    assertThat(icing.initialize().getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
 
-    PersistToDiskResultProto persistToDiskResultProto = icing.persistToDisk();
-    assertThat(persistToDiskResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    PersistToDiskResultProto persistToDiskResultProto =
+        icingSearchEngine.persistToDisk(PersistType.Code.LITE);
+    assertStatusOk(persistToDiskResultProto.getStatus());
   }
 
   @Test
   public void testOptimize() throws Exception {
-    IcingSearchEngineOptions options =
-        IcingSearchEngineOptions.newBuilder().setBaseDir(filesDir).build();
-    IcingSearchEngine icing = new IcingSearchEngine(options);
-    assertThat(icing.initialize().getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
 
-    OptimizeResultProto optimizeResultProto = icing.optimize();
-    assertThat(optimizeResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    OptimizeResultProto optimizeResultProto = icingSearchEngine.optimize();
+    assertStatusOk(optimizeResultProto.getStatus());
   }
 
   @Test
   public void testGetOptimizeInfo() throws Exception {
-    IcingSearchEngineOptions options =
-        IcingSearchEngineOptions.newBuilder().setBaseDir(filesDir).build();
-    IcingSearchEngine icing = new IcingSearchEngine(options);
-    assertThat(icing.initialize().getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
 
-    GetOptimizeInfoResultProto getOptimizeInfoResultProto = icing.getOptimizeInfo();
-    assertThat(getOptimizeInfoResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    GetOptimizeInfoResultProto getOptimizeInfoResultProto = icingSearchEngine.getOptimizeInfo();
+    assertStatusOk(getOptimizeInfoResultProto.getStatus());
     assertThat(getOptimizeInfoResultProto.getOptimizableDocs()).isEqualTo(0);
     assertThat(getOptimizeInfoResultProto.getEstimatedOptimizableBytes()).isEqualTo(0);
   }
 
   @Test
+  public void testGetStorageInfo() throws Exception {
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    StorageInfoResultProto storageInfoResultProto = icingSearchEngine.getStorageInfo();
+    assertStatusOk(storageInfoResultProto.getStatus());
+  }
+
+  @Test
+  public void testGetDebugInfo() throws Exception {
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
+    SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
+    assertThat(
+            icingSearchEngine
+                .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
+                .getStatus()
+                .getCode())
+        .isEqualTo(StatusProto.Code.OK);
+
+    DocumentProto emailDocument = createEmailDocument("namespace", "uri");
+    assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
+
+    DebugInfoResultProto debugInfoResultProtoBasic =
+        icingSearchEngine.getDebugInfo(DebugInfoVerbosity.Code.BASIC);
+    assertStatusOk(debugInfoResultProtoBasic.getStatus());
+    assertThat(debugInfoResultProtoBasic.getDebugInfo().getDocumentInfo().getCorpusInfoList())
+        .isEmpty(); // because verbosity=BASIC
+
+    DebugInfoResultProto debugInfoResultProtoDetailed =
+        icingSearchEngine.getDebugInfo(DebugInfoVerbosity.Code.DETAILED);
+    assertStatusOk(debugInfoResultProtoDetailed.getStatus());
+    assertThat(debugInfoResultProtoDetailed.getDebugInfo().getDocumentInfo().getCorpusInfoList())
+        .hasSize(1); // because verbosity=DETAILED
+  }
+
+  @Test
+  public void testGetAllNamespaces() throws Exception {
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
+    SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
+    assertThat(
+            icingSearchEngine
+                .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
+                .getStatus()
+                .getCode())
+        .isEqualTo(StatusProto.Code.OK);
+
+    DocumentProto emailDocument = createEmailDocument("namespace", "uri");
+    assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
+
+    GetAllNamespacesResultProto getAllNamespacesResultProto = icingSearchEngine.getAllNamespaces();
+    assertStatusOk(getAllNamespacesResultProto.getStatus());
+    assertThat(getAllNamespacesResultProto.getNamespacesList()).containsExactly("namespace");
+  }
+
+  @Test
   public void testReset() throws Exception {
-    IcingSearchEngineOptions options =
-        IcingSearchEngineOptions.newBuilder().setBaseDir(filesDir).build();
-    IcingSearchEngine icing = new IcingSearchEngine(options);
-    assertThat(icing.initialize().getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    ResetResultProto resetResultProto = icingSearchEngine.reset();
+    assertStatusOk(resetResultProto.getStatus());
+  }
+
+  @Test
+  public void testReportUsage() throws Exception {
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    // Set schema and put a document.
+    SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
+    SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
+    assertThat(
+            icingSearchEngine
+                .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
+                .getStatus()
+                .getCode())
+        .isEqualTo(StatusProto.Code.OK);
+
+    DocumentProto emailDocument = createEmailDocument("namespace", "uri");
+    PutResultProto putResultProto = icingSearchEngine.put(emailDocument);
+    assertStatusOk(putResultProto.getStatus());
+
+    // Report usage
+    UsageReport usageReport =
+        UsageReport.newBuilder()
+            .setDocumentNamespace("namespace")
+            .setDocumentUri("uri")
+            .setUsageTimestampMs(1)
+            .setUsageType(UsageReport.UsageType.USAGE_TYPE1)
+            .build();
+    ReportUsageResultProto reportUsageResultProto = icingSearchEngine.reportUsage(usageReport);
+    assertStatusOk(reportUsageResultProto.getStatus());
+  }
+
+  @Test
+  public void testCJKTSnippets() throws Exception {
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    SchemaProto schema = SchemaProto.newBuilder().addTypes(createEmailTypeConfig()).build();
+    assertStatusOk(
+        icingSearchEngine.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false).getStatus());
+
+    // String:     "天是蓝的"
+    //              ^ ^^ ^
+    // UTF16 idx:   0 1 2 3
+    // Breaks into segments: "天", "是", "蓝", "的"
+    // "The sky is blue"
+    String chinese = "天是蓝的";
+    assertThat(chinese.length()).isEqualTo(4);
+    DocumentProto emailDocument1 =
+        createEmailDocument("namespace", "uri1").toBuilder()
+            .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues(chinese))
+            .build();
+    assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus());
+
+    // Search and request snippet matching but no windowing.
+    SearchSpecProto searchSpec =
+        SearchSpecProto.newBuilder()
+            .setQuery("是")
+            .setTermMatchType(TermMatchType.Code.PREFIX)
+            .build();
+    ResultSpecProto resultSpecProto =
+        ResultSpecProto.newBuilder()
+            .setSnippetSpec(
+                ResultSpecProto.SnippetSpecProto.newBuilder()
+                    .setNumToSnippet(Integer.MAX_VALUE)
+                    .setNumMatchesPerProperty(Integer.MAX_VALUE))
+            .build();
+
+    // Search and make sure that we got a single successful results
+    SearchResultProto searchResultProto =
+        icingSearchEngine.search(
+            searchSpec, ScoringSpecProto.getDefaultInstance(), resultSpecProto);
+    assertStatusOk(searchResultProto.getStatus());
+    assertThat(searchResultProto.getResultsCount()).isEqualTo(1);
+
+    // Ensure that one and only one property was matched and it was "subject"
+    SnippetProto snippetProto = searchResultProto.getResults(0).getSnippet();
+    assertThat(snippetProto.getEntriesList()).hasSize(1);
+    SnippetProto.EntryProto entryProto = snippetProto.getEntries(0);
+    assertThat(entryProto.getPropertyName()).isEqualTo("subject");
+
+    // Get the content for "subject" and see what the match is.
+    DocumentProto resultDocument = searchResultProto.getResults(0).getDocument();
+    assertThat(resultDocument.getPropertiesList()).hasSize(1);
+    PropertyProto subjectProperty = resultDocument.getProperties(0);
+    assertThat(subjectProperty.getName()).isEqualTo("subject");
+    assertThat(subjectProperty.getStringValuesList()).hasSize(1);
+    String content = subjectProperty.getStringValues(0);
+
+    // Ensure that there is one and only one match within "subject"
+    assertThat(entryProto.getSnippetMatchesList()).hasSize(1);
+    SnippetMatchProto matchProto = entryProto.getSnippetMatches(0);
+
+    int matchStart = matchProto.getExactMatchUtf16Position();
+    int matchEnd = matchStart + matchProto.getExactMatchUtf16Length();
+    assertThat(matchStart).isEqualTo(1);
+    assertThat(matchEnd).isEqualTo(2);
+    String match = content.substring(matchStart, matchEnd);
+    assertThat(match).isEqualTo("是");
+  }
+
+  @Test
+  public void testUtf16MultiByteSnippets() throws Exception {
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    SchemaProto schema = SchemaProto.newBuilder().addTypes(createEmailTypeConfig()).build();
+    assertStatusOk(
+        icingSearchEngine.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false).getStatus());
+
+    // String:    "𐀀𐀁 𐀂𐀃 𐀄"
+    //             ^  ^  ^
+    // UTF16 idx:  0  5  10
+    // Breaks into segments: "𐀀𐀁", "𐀂𐀃", "𐀄"
+    String text = "𐀀𐀁 𐀂𐀃 𐀄";
+    assertThat(text.length()).isEqualTo(12);
+    DocumentProto emailDocument1 =
+        createEmailDocument("namespace", "uri1").toBuilder()
+            .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues(text))
+            .build();
+    assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus());
+
+    // Search and request snippet matching but no windowing.
+    SearchSpecProto searchSpec =
+        SearchSpecProto.newBuilder()
+            .setQuery("𐀂")
+            .setTermMatchType(TermMatchType.Code.PREFIX)
+            .build();
+    ResultSpecProto resultSpecProto =
+        ResultSpecProto.newBuilder()
+            .setSnippetSpec(
+                ResultSpecProto.SnippetSpecProto.newBuilder()
+                    .setNumToSnippet(Integer.MAX_VALUE)
+                    .setNumMatchesPerProperty(Integer.MAX_VALUE))
+            .build();
+
+    // Search and make sure that we got a single successful results
+    SearchResultProto searchResultProto =
+        icingSearchEngine.search(
+            searchSpec, ScoringSpecProto.getDefaultInstance(), resultSpecProto);
+    assertStatusOk(searchResultProto.getStatus());
+    assertThat(searchResultProto.getResultsCount()).isEqualTo(1);
+
+    // Ensure that one and only one property was matched and it was "subject"
+    SnippetProto snippetProto = searchResultProto.getResults(0).getSnippet();
+    assertThat(snippetProto.getEntriesList()).hasSize(1);
+    SnippetProto.EntryProto entryProto = snippetProto.getEntries(0);
+    assertThat(entryProto.getPropertyName()).isEqualTo("subject");
+
+    // Get the content for "subject" and see what the match is.
+    DocumentProto resultDocument = searchResultProto.getResults(0).getDocument();
+    assertThat(resultDocument.getPropertiesList()).hasSize(1);
+    PropertyProto subjectProperty = resultDocument.getProperties(0);
+    assertThat(subjectProperty.getName()).isEqualTo("subject");
+    assertThat(subjectProperty.getStringValuesList()).hasSize(1);
+    String content = subjectProperty.getStringValues(0);
+
+    // Ensure that there is one and only one match within "subject"
+    assertThat(entryProto.getSnippetMatchesList()).hasSize(1);
+    SnippetMatchProto matchProto = entryProto.getSnippetMatches(0);
+
+    int matchStart = matchProto.getExactMatchUtf16Position();
+    int matchEnd = matchStart + matchProto.getExactMatchUtf16Length();
+    assertThat(matchStart).isEqualTo(5);
+    assertThat(matchEnd).isEqualTo(9);
+    String match = content.substring(matchStart, matchEnd);
+    assertThat(match).isEqualTo("𐀂𐀃");
+  }
+
+  @Test
+  public void testSearchSuggestions() {
+    assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+    SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
+    SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
+    assertThat(
+            icingSearchEngine
+                .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
+                .getStatus()
+                .getCode())
+        .isEqualTo(StatusProto.Code.OK);
+
+    DocumentProto emailDocument1 =
+        createEmailDocument("namespace", "uri1").toBuilder()
+            .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("fo"))
+            .build();
+    DocumentProto emailDocument2 =
+        createEmailDocument("namespace", "uri2").toBuilder()
+            .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("foo"))
+            .build();
+    assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus());
+    assertStatusOk(icingSearchEngine.put(emailDocument2).getStatus());
+
+    SuggestionSpecProto suggestionSpec =
+        SuggestionSpecProto.newBuilder()
+            .setPrefix("f")
+            .setNumToReturn(10)
+            .setScoringSpec(
+                SuggestionScoringSpecProto.newBuilder()
+                    .setScoringMatchType(Code.EXACT_ONLY)
+                    .build())
+            .build();
+
+    SuggestionResponse response = icingSearchEngine.searchSuggestions(suggestionSpec);
+    assertStatusOk(response.getStatus());
+    assertThat(response.getSuggestionsList()).hasSize(2);
+    assertThat(response.getSuggestions(0).getQuery()).isEqualTo("foo");
+    assertThat(response.getSuggestions(1).getQuery()).isEqualTo("fo");
+  }
+
+  @Test
+  public void testLogging() throws Exception {
+    // Set to INFO
+    assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.INFO)).isTrue();
+    assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.INFO)).isTrue();
+    assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.DBG)).isFalse();
+
+    // Set to WARNING
+    assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.WARNING)).isTrue();
+    assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.WARNING)).isTrue();
+    assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.INFO)).isFalse();
+
+    // Set to DEBUG
+    assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.DBG)).isTrue();
+    assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.DBG)).isTrue();
+    assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE)).isFalse();
+
+    // Set to VERBOSE
+    assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.VERBOSE, (short) 1)).isTrue();
+    assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE, (short) 1)).isTrue();
+    assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE, (short) 2)).isFalse();
+
+    assertThat(IcingSearchEngine.getLoggingTag()).isNotEmpty();
+  }
 
-    ResetResultProto resetResultProto = icing.reset();
-    assertThat(resetResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.OK);
+  private static void assertStatusOk(StatusProto status) {
+    assertWithMessage(status.getMessage()).that(status.getCode()).isEqualTo(StatusProto.Code.OK);
   }
 }
diff --git a/lint-baseline.xml b/lint-baseline.xml
new file mode 100644
index 0000000..5d2b935
--- /dev/null
+++ b/lint-baseline.xml
@@ -0,0 +1,487 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<issues format="6" by="lint 8.1.0-beta02" type="baseline" client="gradle" dependencies="false" name="AGP (8.1.0-beta02)" variant="all" version="8.1.0-beta02">
+
+    <issue
+        id="KotlinPropertyAccess"
+        message="The getter return type (`GetSchemaResultProto`) and setter parameter type (`SchemaProto`) getter and setter methods for property `schema` should have exactly the same type to allow be accessed as a property from Kotlin; see https://android.github.io/kotlin-guides/interop.html#property-prefixes"
+        errorLine1="  public GetSchemaResultProto getSchema() {"
+        errorLine2="                              ~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngine.java"/>
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngine.java"
+            message="Setter here"/>
+    </issue>
+
+    <issue
+        id="KotlinPropertyAccess"
+        message="The getter return type (`GetSchemaResultProto`) and setter parameter type (`SchemaProto`) getter and setter methods for property `schema` should have exactly the same type to allow be accessed as a property from Kotlin; see https://android.github.io/kotlin-guides/interop.html#property-prefixes"
+        errorLine1="  GetSchemaResultProto getSchema();"
+        errorLine2="                       ~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"
+            message="Setter here"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  public BreakIteratorBatcher(Locale locale) {"
+        errorLine2="                              ~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/BreakIteratorBatcher.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  public void setText(String text) {"
+        errorLine2="                      ~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/BreakIteratorBatcher.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  public int[] next(int batchSize) {"
+        errorLine2="         ~~~~~">
+        <location
+            file="java/src/com/google/android/icing/BreakIteratorBatcher.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  public DebugInfoResultProto getDebugInfo(DebugInfoVerbosity.Code verbosity) {"
+        errorLine2="                                           ~~~~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngine.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  public static boolean shouldLog(LogSeverity.Code severity) {"
+        errorLine2="                                  ~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngine.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  public static boolean shouldLog(LogSeverity.Code severity, short verbosity) {"
+        errorLine2="                                  ~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngine.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  public static boolean setLoggingLevel(LogSeverity.Code severity) {"
+        errorLine2="                                        ~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngine.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  public static boolean setLoggingLevel(LogSeverity.Code severity, short verbosity) {"
+        errorLine2="                                        ~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngine.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  InitializeResultProto initialize();"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  SetSchemaResultProto setSchema(SchemaProto schema);"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  SetSchemaResultProto setSchema(SchemaProto schema);"
+        errorLine2="                                 ~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  SetSchemaResultProto setSchema(SchemaProto schema, boolean ignoreErrorsAndDeleteDocuments);"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  SetSchemaResultProto setSchema(SchemaProto schema, boolean ignoreErrorsAndDeleteDocuments);"
+        errorLine2="                                 ~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  GetSchemaResultProto getSchema();"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  GetSchemaTypeResultProto getSchemaType(String schemaType);"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  GetSchemaTypeResultProto getSchemaType(String schemaType);"
+        errorLine2="                                         ~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  PutResultProto put(DocumentProto document);"
+        errorLine2="  ~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  PutResultProto put(DocumentProto document);"
+        errorLine2="                     ~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  GetResultProto get(String namespace, String uri, GetResultSpecProto getResultSpec);"
+        errorLine2="  ~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  GetResultProto get(String namespace, String uri, GetResultSpecProto getResultSpec);"
+        errorLine2="                     ~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  GetResultProto get(String namespace, String uri, GetResultSpecProto getResultSpec);"
+        errorLine2="                                       ~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  GetResultProto get(String namespace, String uri, GetResultSpecProto getResultSpec);"
+        errorLine2="                                                   ~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  ReportUsageResultProto reportUsage(UsageReport usageReport);"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  ReportUsageResultProto reportUsage(UsageReport usageReport);"
+        errorLine2="                                     ~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  GetAllNamespacesResultProto getAllNamespaces();"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  SearchResultProto search("
+        errorLine2="  ~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="      SearchSpecProto searchSpec, ScoringSpecProto scoringSpec, ResultSpecProto resultSpec);"
+        errorLine2="      ~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="      SearchSpecProto searchSpec, ScoringSpecProto scoringSpec, ResultSpecProto resultSpec);"
+        errorLine2="                                  ~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="      SearchSpecProto searchSpec, ScoringSpecProto scoringSpec, ResultSpecProto resultSpec);"
+        errorLine2="                                                                ~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  SearchResultProto getNextPage(long nextPageToken);"
+        errorLine2="  ~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  DeleteResultProto delete(String namespace, String uri);"
+        errorLine2="  ~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  DeleteResultProto delete(String namespace, String uri);"
+        errorLine2="                           ~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  DeleteResultProto delete(String namespace, String uri);"
+        errorLine2="                                             ~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  SuggestionResponse searchSuggestions(SuggestionSpecProto suggestionSpec);"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  SuggestionResponse searchSuggestions(SuggestionSpecProto suggestionSpec);"
+        errorLine2="                                       ~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  DeleteByNamespaceResultProto deleteByNamespace(String namespace);"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  DeleteByNamespaceResultProto deleteByNamespace(String namespace);"
+        errorLine2="                                                 ~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  DeleteBySchemaTypeResultProto deleteBySchemaType(String schemaType);"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  DeleteBySchemaTypeResultProto deleteBySchemaType(String schemaType);"
+        errorLine2="                                                   ~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  DeleteByQueryResultProto deleteByQuery(SearchSpecProto searchSpec);"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  DeleteByQueryResultProto deleteByQuery(SearchSpecProto searchSpec);"
+        errorLine2="                                         ~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  DeleteByQueryResultProto deleteByQuery("
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="      SearchSpecProto searchSpec, boolean returnDeletedDocumentInfo);"
+        errorLine2="      ~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  PersistToDiskResultProto persistToDisk(PersistType.Code persistTypeCode);"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  PersistToDiskResultProto persistToDisk(PersistType.Code persistTypeCode);"
+        errorLine2="                                         ~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  OptimizeResultProto optimize();"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  GetOptimizeInfoResultProto getOptimizeInfo();"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  StorageInfoResultProto getStorageInfo();"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  DebugInfoResultProto getDebugInfo(DebugInfoVerbosity.Code verbosity);"
+        errorLine2="  ~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  DebugInfoResultProto getDebugInfo(DebugInfoVerbosity.Code verbosity);"
+        errorLine2="                                    ~~~~~~~~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+    <issue
+        id="UnknownNullness"
+        message="Unknown nullability; explicitly declare as `@Nullable` or `@NonNull` to improve Kotlin interoperability; see https://developer.android.com/kotlin/interop#nullability_annotations"
+        errorLine1="  ResetResultProto reset();"
+        errorLine2="  ~~~~~~~~~~~~~~~~">
+        <location
+            file="java/src/com/google/android/icing/IcingSearchEngineInterface.java"/>
+    </issue>
+
+</issues>
diff --git a/nativeLib/build.gradle b/nativeLib/build.gradle
index ce7dca7..6b30451 100644
--- a/nativeLib/build.gradle
+++ b/nativeLib/build.gradle
@@ -14,43 +14,6 @@
  * limitations under the License.
  */
 
-buildscript {
-    boolean unbundleBuild = (new File('unbundled-build')).exists()
-    repositories {
-        maven { url '../../../prebuilts/androidx/external' }
-        if (unbundleBuild) {
-            jcenter()
-        }
-    }
-    dependencies {
-        classpath('gradle.plugin.com.google.protobuf:protobuf-gradle-plugin:0.8.8')
-    }
-}
-
-apply plugin: 'AndroidXPlugin'
-apply plugin: 'com.android.library'
-
-android {
-    defaultConfig {
-        externalNativeBuild {
-            cmake {
-                cppFlags "-std=c++17"
-                arguments "-DCMAKE_VERBOSE_MAKEFILE=ON"
-                targets "icing"
-            }
-        }
-    }
-
-    sourceSets {
-        main {
-            manifest.srcFile '../AndroidManifest.xml'
-        }
-    }
-
-    externalNativeBuild {
-        cmake {
-            version '3.10.2'
-            path '../CMakeLists.txt'
-        }
-    }
-}
+// TODO(b/161205849): We've had to move libicing.so compilation into appsearch:appsearch to get
+//  it included into the exported aar. Find a proper solution for bundling libicing.so into
+//  appsearch-release.aar and move compilation of libicing.so back into the external/icing tree.
diff --git a/proto/icing/index/numeric/wildcard-property-storage.proto b/proto/icing/index/numeric/wildcard-property-storage.proto
new file mode 100644
index 0000000..7f02b77
--- /dev/null
+++ b/proto/icing/index/numeric/wildcard-property-storage.proto
@@ -0,0 +1,22 @@
+// Copyright 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+// Next tag: 2
+message WildcardPropertyStorage {
+  repeated string property_entries = 1;
+}
diff --git a/proto/icing/proto/debug.proto b/proto/icing/proto/debug.proto
new file mode 100644
index 0000000..90d1981
--- /dev/null
+++ b/proto/icing/proto/debug.proto
@@ -0,0 +1,137 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+import "icing/proto/schema.proto";
+import "icing/proto/status.proto";
+import "icing/proto/storage.proto";
+
+option java_package = "com.google.android.icing.proto";
+option java_multiple_files = true;
+option objc_class_prefix = "ICNG";
+
+message LogSeverity {
+  enum Code {
+    VERBOSE = 0;
+    // Unable to use DEBUG at this time because it breaks YTM's iOS tests
+    // cs/?q=%22-DDEBUG%3D1%22%20f:%2FYoutubeMusic%20f:blueprint&ssfr=1
+    DBG = 1;
+    INFO = 2;
+    WARNING = 3;
+    ERROR = 4;
+    FATAL = 5;
+  }
+}
+
+message DebugInfoVerbosity {
+  enum Code {
+    // Simplest debug information.
+    BASIC = 0;
+    // More detailed debug information as indicated in the field documentation
+    // below.
+    DETAILED = 1;
+  }
+}
+
+// Next tag: 4
+message IndexDebugInfoProto {
+  // Storage information of the index.
+  optional IndexStorageInfoProto index_storage_info = 1;
+
+  // A formatted string containing the following information:
+  // lexicon_info: Information about the main lexicon
+  // last_added_document_id: Last added document id
+  // flash_index_storage_info: If verbosity = DETAILED, return information about
+  //                           the posting list storage
+  //
+  // No direct contents from user-provided documents will ever appear in this
+  // string.
+  optional string main_index_info = 2;
+
+  // A formatted string containing the following information:
+  // curr_size: Current number of hits
+  // hit_buffer_size: The maximum possible number of hits
+  // last_added_document_id: Last added document id
+  // searchable_end: The first position in the hit buffer that is not sorted
+  //                 yet, or curr_size if all hits are sorted
+  // index_crc: The most recent checksum of the lite index, by calling
+  //            LiteIndex::ComputeChecksum()
+  // lexicon_info: Information about the lite lexicon
+  //
+  // No direct contents from user-provided documents will ever appear in this
+  // string.
+  optional string lite_index_info = 3;
+}
+
+// Next tag: 4
+message DocumentDebugInfoProto {
+  // Storage information of the document store.
+  optional DocumentStorageInfoProto document_storage_info = 1;
+
+  // The most recent checksum of the document store, by calling
+  // DocumentStore::ComputeChecksum().
+  optional uint32 crc = 2;
+
+  message CorpusInfo {
+    optional string namespace = 1;
+    optional string schema = 2;
+    optional uint32 total_documents = 3;
+    optional uint32 total_token = 4;
+  }
+
+  // If verbosity = DETAILED, return the total number of documents and tokens in
+  // each (namespace, schema type) pair.
+  // Note that deleted and expired documents are skipped in the output.
+  repeated CorpusInfo corpus_info = 3;
+}
+
+// Next tag: 3
+message SchemaDebugInfoProto {
+  // Copy of the SchemaProto if it has been set in the schema store.
+  // Modifying this does not affect the Schema that IcingSearchEngine holds.
+  optional SchemaProto schema = 1;
+
+  // The most recent checksum of the schema store, by calling
+  // SchemaStore::ComputeChecksum().
+  optional uint32 crc = 2;
+}
+
+// Next tag: 4
+message DebugInfoProto {
+  // Debug information of the index.
+  optional IndexDebugInfoProto index_info = 1;
+
+  // Debug information of the document store.
+  optional DocumentDebugInfoProto document_info = 2;
+
+  // Debug information of the schema store.
+  optional SchemaDebugInfoProto schema_info = 3;
+}
+
+// Next tag: 3
+message DebugInfoResultProto {
+  // Status code can be one of:
+  //   OK
+  //   FAILED_PRECONDITION if IcingSearchEngine has not been initialized yet
+  //   INTERNAL on IO errors, crc compute error.
+  //
+  // See status.proto for more details.
+  optional StatusProto status = 1;
+
+  // Debug information for Icing.
+  optional DebugInfoProto debug_info = 2;
+}
diff --git a/proto/icing/proto/document.proto b/proto/icing/proto/document.proto
index 1caf169..1a501e7 100644
--- a/proto/icing/proto/document.proto
+++ b/proto/icing/proto/document.proto
@@ -16,6 +16,7 @@ syntax = "proto2";
 
 package icing.lib;
 
+import "icing/proto/logging.proto";
 import "icing/proto/status.proto";
 
 option java_package = "com.google.android.icing.proto";
@@ -23,7 +24,7 @@ option java_multiple_files = true;
 option objc_class_prefix = "ICNG";
 
 // Defines a unit of data understood by the IcingSearchEngine.
-// Next tag: 9
+// Next tag: 10
 message DocumentProto {
   // REQUIRED: Namespace that this Document resides in.
   // Namespaces can affect read/write permissions.
@@ -49,11 +50,6 @@ message DocumentProto {
   // already defined in the schema for this Document's schema_type.
   repeated PropertyProto properties = 5;
 
-  // OPTIONAL: Properties that will not be validated against the schema,
-  // indexed, or be searchable. The properties will be stored in the Documents,
-  // but never looked at by Icing.
-  repeated PropertyProto custom_properties = 6;
-
   // OPTIONAL: Score of the document which could be used during search result
   // ranking. Negative values will lead to validation errors. The default is the
   // lowest score 0.
@@ -68,6 +64,17 @@ message DocumentProto {
   // TODO(cassiewang): Benchmark if fixed64 or some other proto type is better
   // in terms of space/time efficiency. Both for ttl_ms and timestamp fields
   optional int64 ttl_ms = 8 [default = 0];
+
+  // Defines document level data that's generated internally by Icing.
+  message InternalFields {
+    // The length of the document as a count of tokens (or terms) in all indexed
+    // text properties. This field is used in the computation of BM25F relevance
+    // score.
+    optional int32 length_in_tokens = 1;
+  }
+  optional InternalFields internal_fields = 9;
+
+  reserved 6;
 }
 
 // Holds a property field of the Document.
@@ -88,7 +95,7 @@ message PropertyProto {
 }
 
 // Result of a call to IcingSearchEngine.Put
-// Next tag: 2
+// Next tag: 3
 message PutResultProto {
   // Status code can be one of:
   //   OK
@@ -102,6 +109,12 @@ message PutResultProto {
   // TODO(b/147699081): Fix error codes: +ABORTED
   // go/icing-library-apis.
   optional StatusProto status = 1;
+
+  // Stats of the function call. Inside PutDocumentStatsProto, the function
+  // call latency 'latency_ms' will always be populated. The other fields will
+  // be accurate only when the status above is OK. See logging.proto for
+  // details.
+  optional PutDocumentStatsProto put_document_stats = 2;
 }
 
 // Result of a call to IcingSearchEngine.Get
@@ -139,7 +152,7 @@ message GetAllNamespacesResultProto {
 }
 
 // Result of a call to IcingSearchEngine.Delete
-// Next tag: 2
+// Next tag: 3
 message DeleteResultProto {
   // Status code can be one of:
   //   OK
@@ -152,10 +165,13 @@ message DeleteResultProto {
   // TODO(b/147699081): Fix error codes: +ABORTED.
   // go/icing-library-apis.
   optional StatusProto status = 1;
+
+  // Stats for delete execution performance.
+  optional DeleteStatsProto delete_stats = 2;
 }
 
 // Result of a call to IcingSearchEngine.DeleteByNamespace
-// Next tag: 2
+// Next tag: 3
 message DeleteByNamespaceResultProto {
   // Status code can be one of:
   //   OK
@@ -168,10 +184,13 @@ message DeleteByNamespaceResultProto {
   // TODO(b/147699081): Fix error codes: +ABORTED.
   // go/icing-library-apis.
   optional StatusProto status = 1;
+
+  // Stats for delete execution performance.
+  optional DeleteStatsProto delete_stats = 2;
 }
 
 // Result of a call to IcingSearchEngine.DeleteBySchemaType
-// Next tag: 2
+// Next tag: 3
 message DeleteBySchemaTypeResultProto {
   // Status code can be one of:
   //   OK
@@ -184,4 +203,41 @@ message DeleteBySchemaTypeResultProto {
   // TODO(b/147699081): Fix error codes: +ABORTED.
   // go/icing-library-apis.
   optional StatusProto status = 1;
+
+  // Stats for delete execution performance.
+  optional DeleteStatsProto delete_stats = 2;
+}
+
+// Result of a call to IcingSearchEngine.DeleteByQuery
+// Next tag: 5
+message DeleteByQueryResultProto {
+  // Status code can be one of:
+  //   OK
+  //   FAILED_PRECONDITION
+  //   NOT_FOUND
+  //   INTERNAL
+  //
+  // See status.proto for more details.
+  //
+  // TODO(b/147699081): Fix error codes: +ABORTED.
+  // go/icing-library-apis.
+  optional StatusProto status = 1;
+
+  // Stats for delete execution performance.
+  optional DeleteByQueryStatsProto delete_by_query_stats = 3;
+
+  // Used by DeleteByQueryResultProto to return information about deleted
+  // documents.
+  message DocumentGroupInfo {
+    optional string namespace = 1;
+    optional string schema = 2;
+    repeated string uris = 3;
+  }
+
+  // Additional return message that shows the uris of the deleted documents, if
+  // users set return_deleted_document_info to true.
+  // The result is grouped by the corresponding namespace and type.
+  repeated DocumentGroupInfo deleted_documents = 4;
+
+  reserved 2;
 }
diff --git a/proto/icing/proto/document_wrapper.proto b/proto/icing/proto/document_wrapper.proto
index e8eb992..929ee33 100644
--- a/proto/icing/proto/document_wrapper.proto
+++ b/proto/icing/proto/document_wrapper.proto
@@ -20,7 +20,6 @@ import "icing/proto/document.proto";
 
 option java_package = "com.google.android.icing.proto";
 option java_multiple_files = true;
-
 option objc_class_prefix = "ICNG";
 
 // DocumentWrapper as a wrapper of the user-facing DocumentProto is meant to
@@ -30,6 +29,5 @@ option objc_class_prefix = "ICNG";
 message DocumentWrapper {
   optional DocumentProto document = 1;
 
-  // Indicates if the document is marked as deleted
-  optional bool deleted = 2;
+  reserved 2;
 }
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index eac88e6..9dd9e88 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -16,33 +16,20 @@ syntax = "proto2";
 
 package icing.lib;
 
+import "icing/proto/logging.proto";
 import "icing/proto/status.proto";
 
 option java_package = "com.google.android.icing.proto";
 option java_multiple_files = true;
-
 option objc_class_prefix = "ICNG";
 
-// Next tag: 5
+// Next tag: 16
 message IcingSearchEngineOptions {
   // Directory to persist files for Icing. Required.
   // If Icing was previously initialized with this directory, it will reload
   // the index saved by the last instance.
   optional string base_dir = 1;
 
-  // The maximum number of tokens to be allowed per document. If a document
-  // exceeds this number of tokens, then only the first max_tokens_per_doc
-  // will be indexed.
-  //
-  // Clients may use this value to prevent the possibility of a select few
-  // documents from exhausting limits in the index that are shared between all
-  // documents (ie max allowed index size).
-  //
-  // Valid values: [1, INT_MAX], Current default is 1/5 of the default of
-  // max_document_size.
-  // Optional.
-  optional int32 max_tokens_per_doc = 2 [default = 13107];
-
   // The maximum allowable token length. All tokens in excess of this size
   // will be truncated to max_token_length before being indexed.
   //
@@ -70,10 +57,90 @@ message IcingSearchEngineOptions {
   // Valid values: [1, INT_MAX]
   // Optional.
   optional int32 index_merge_size = 4 [default = 1048576];  // 1 MiB
+
+  // Whether to use namespace id or namespace name to build up fingerprint for
+  // document_key_mapper_ and corpus_mapper_ in document store.
+  // TODO(b/259969017) Flip the default value of this flag to true at the time
+  // when we switch to use persistent hash map for document_key_mapper_ so that
+  // we just need one reconstruction of the internal mappers.
+  optional bool document_store_namespace_id_fingerprint = 5;
+
+  // The threshold of the percentage of invalid documents to rebuild index
+  // during optimize, i.e. we rebuild index if and only if
+  // |invalid_documents| / |all_documents| >= optimize_rebuild_index_threshold
+  //
+  // Rebuilding the index could be faster than optimizing the index if we have
+  // removed most of the documents.
+  // Based on benchmarks, 85%~95% seems to be a good threshold for most cases.
+  //
+  // Default to 0 for better rollout of the new index optimize.
+  optional float optimize_rebuild_index_threshold = 6 [default = 0.0];
+
+  // Level of compression, NO_COMPRESSION = 0, BEST_SPEED = 1,
+  // BEST_COMPRESSION = 9
+  // Valid values: [0, 9]
+  // Optional.
+  optional int32 compression_level = 7 [default = 3];
+
+  // OPTIONAL: Whether to allow circular references between schema types for
+  // the schema definition.
+  //
+  // Even when set to true, circular references are still not allowed in the
+  // following cases:
+  //    1. All edges of a cycle have index_nested_properties=true
+  //    2. One of the types in the cycle has a joinable property, or depends on
+  //       a type with a joinable property.
+  // This is because such a cycle would lead to an infinite number of
+  // indexed/joinable properties:
+  //
+  // The default value is false.
+  optional bool allow_circular_schema_definitions = 8;
+
+  // Whether memory map max possible file size for FileBackedVector before
+  // growing the actual file size.
+  optional bool pre_mapping_fbv = 9;
+
+  // Whether use persistent hash map as the key mapper (if false, then fall back
+  // to dynamic trie key mapper).
+  optional bool use_persistent_hash_map = 10;
+
+  // Integer index bucket split threshold.
+  optional int32 integer_index_bucket_split_threshold = 11 [default = 65536];
+
+  // Whether Icing should sort and merge its lite index HitBuffer unsorted tail
+  // at indexing time.
+  //
+  // If set to true, the HitBuffer will be sorted at indexing time after
+  // exceeding the sort threshold. If false, the HifBuffer will be sorted at
+  // querying time, before the first query after inserting new elements into the
+  // HitBuffer.
+  //
+  // The default value is false.
+  optional bool lite_index_sort_at_indexing = 12;
+
+  // Size (in bytes) at which Icing's lite index should sort and merge the
+  // HitBuffer's unsorted tail into the sorted head for sorting at indexing
+  // time. Size specified here is the maximum byte size to allow for the
+  // unsorted tail section.
+  //
+  // Setting a lower sort size reduces querying latency at the expense of
+  // indexing latency.
+  optional int32 lite_index_sort_size = 13 [default = 8192];  // 8 KiB
+
+  optional bool use_new_qualified_id_join_index = 14;
+
+  // Whether to build the metadata hits used for property existence check, which
+  // is required to support the hasProperty function in advanced query.
+  //
+  // TODO(b/309826655): Implement the feature flag derived files rebuild
+  // mechanism to handle index rebuild, instead of using index's magic value.
+  optional bool build_property_existence_metadata_hits = 15;
+
+  reserved 2;
 }
 
 // Result of a call to IcingSearchEngine.Initialize
-// Next tag: 2
+// Next tag: 3
 message InitializeResultProto {
   // Status code can be one of:
   //   OK
@@ -88,6 +155,12 @@ message InitializeResultProto {
   // go/icing-library-apis.
   optional StatusProto status = 1;
 
+  // Stats of the function call. Inside InitializeStatsProto, the function call
+  // latency 'latency_ms' will always be populated. The other fields will be
+  // accurate only when the status above is OK or WARNING_DATA_LOSS. See
+  // logging.proto for details.
+  optional InitializeStatsProto initialize_stats = 2;
+
   // TODO(b/147699081): Add a field to indicate lost_schema and lost_documents.
   // go/icing-library-apis.
 }
diff --git a/proto/icing/proto/internal/optimize.proto b/proto/icing/proto/internal/optimize.proto
new file mode 100644
index 0000000..4ed3d73
--- /dev/null
+++ b/proto/icing/proto/internal/optimize.proto
@@ -0,0 +1,29 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+option java_package = "com.google.android.icing.internal.proto";
+option java_multiple_files = true;
+option objc_class_prefix = "ICNG";
+
+// A status that is saved internally in Icing to track information about how
+// often Optimize runs.
+// Next tag: 2
+message OptimizeStatusProto {
+  // The Epoch time at which the last successfuly optimize ran.
+  optional int64 last_successful_optimize_run_time_ms = 1;
+}
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
new file mode 100644
index 0000000..fcedeed
--- /dev/null
+++ b/proto/icing/proto/logging.proto
@@ -0,0 +1,364 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+import "icing/proto/scoring.proto";
+
+option java_package = "com.google.android.icing.proto";
+option java_multiple_files = true;
+option objc_class_prefix = "ICNG";
+
+// Stats of the top-level function IcingSearchEngine::Initialize().
+// Next tag: 14
+message InitializeStatsProto {
+  // Overall time used for the function call.
+  optional int32 latency_ms = 1;
+
+  // The cause of IcingSearchEngine recovering from a previous bad state during
+  // initialization.
+  enum RecoveryCause {
+    // No recovery happened.
+    NONE = 0;
+
+    // Data loss in ground truth.
+    DATA_LOSS = 1;
+
+    // Data in index is inconsistent with ground truth.
+    INCONSISTENT_WITH_GROUND_TRUTH = 2;
+
+    // Changes were made to the schema, but possibly not fully applied to the
+    // document store and the index - requiring a recovery.
+    SCHEMA_CHANGES_OUT_OF_SYNC = 3;
+
+    // Random I/O errors.
+    IO_ERROR = 4;
+
+    // The document log is using legacy format.
+    LEGACY_DOCUMENT_LOG_FORMAT = 5;
+
+    // The current code version is different from existing data version.
+    VERSION_CHANGED = 6;
+
+    // Any dependencies have changed.
+    DEPENDENCIES_CHANGED = 7;
+  }
+
+  // Possible recovery causes for document store:
+  // - DATA_LOSS
+  // - SCHEMA_CHANGES_OUT_OF_SYNC
+  // - IO_ERROR
+  optional RecoveryCause document_store_recovery_cause = 2;
+
+  // Possible recovery causes for index:
+  // - INCONSISTENT_WITH_GROUND_TRUTH
+  // - SCHEMA_CHANGES_OUT_OF_SYNC
+  // - IO_ERROR
+  optional RecoveryCause index_restoration_cause = 3;
+
+  // Possible recovery causes for index:
+  // - IO_ERROR
+  optional RecoveryCause schema_store_recovery_cause = 4;
+
+  // Time used to recover the document store.
+  optional int32 document_store_recovery_latency_ms = 5;
+
+  // Time used to restore the index.
+  optional int32 index_restoration_latency_ms = 6;
+
+  // Time used to restore the schema store.
+  optional int32 schema_store_recovery_latency_ms = 7;
+
+  // Status regarding how much data is lost during the initialization.
+  enum DocumentStoreDataStatus {
+    // Document store is successfully initialized or fully recovered.
+    NO_DATA_LOSS = 0;
+
+    // Ground truth data is partially lost.
+    PARTIAL_LOSS = 1;
+
+    // Ground truth data is completely lost.
+    COMPLETE_LOSS = 2;
+  }
+  optional DocumentStoreDataStatus document_store_data_status = 8;
+
+  // Number of documents currently in document store. Those may
+  // include alive, deleted, and expired documents.
+  optional int32 num_documents = 9;
+
+  // Number of schema types currently in schema store.
+  optional int32 num_schema_types = 10;
+
+  // Number of consecutive initialization failures that immediately preceded
+  // this initialization.
+  optional int32 num_previous_init_failures = 11;
+
+  // Possible recovery causes for integer index:
+  // - INCONSISTENT_WITH_GROUND_TRUTH
+  // - SCHEMA_CHANGES_OUT_OF_SYNC
+  // - IO_ERROR
+  optional RecoveryCause integer_index_restoration_cause = 12;
+
+  // Possible recovery causes for qualified id join index:
+  // - INCONSISTENT_WITH_GROUND_TRUTH
+  // - SCHEMA_CHANGES_OUT_OF_SYNC
+  // - IO_ERROR
+  optional RecoveryCause qualified_id_join_index_restoration_cause = 13;
+}
+
+// Stats of the top-level function IcingSearchEngine::Put().
+// Next tag: 12
+message PutDocumentStatsProto {
+  // Overall time used for the function call.
+  optional int32 latency_ms = 1;
+
+  // Time used to store the document.
+  optional int32 document_store_latency_ms = 2;
+
+  // Time used to index the document.
+  optional int32 index_latency_ms = 3;
+
+  // Time used to merge the indices.
+  optional int32 index_merge_latency_ms = 4;
+
+  // Document size in bytes.
+  optional int32 document_size = 5;
+
+  message TokenizationStats {
+    // Number of tokens added to the index.
+    optional int32 num_tokens_indexed = 1;
+
+    // Number of metadata tokens added to the index, which can only be added by
+    // PropertyExistenceIndexingHandler currently.
+    optional int32 num_metadata_tokens_indexed = 3;
+
+    reserved 2;
+  }
+  optional TokenizationStats tokenization_stats = 6;
+
+  // Time used to index all indexable string terms and property existence
+  // metadata terms in the document. It does not include the time to merge
+  // indices or the time to sort the lite index.
+  optional int32 term_index_latency_ms = 7;
+
+  // Time used to index all indexable integers in the document.
+  optional int32 integer_index_latency_ms = 8;
+
+  // Time used to index all qualified id join strings in the document.
+  optional int32 qualified_id_join_index_latency_ms = 9;
+
+  // Time used to sort the LiteIndex's HitBuffer.
+  optional int32 lite_index_sort_latency_ms = 10;
+
+  // Time used to index all metadata terms in the document, which can only be
+  // added by PropertyExistenceIndexingHandler currently.
+  optional int32 metadata_term_index_latency_ms = 11;
+}
+
+// Stats of the top-level function IcingSearchEngine::Search() and
+// IcingSearchEngine::GetNextPage().
+// Next tag: 26
+message QueryStatsProto {
+  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
+  // The UTF-8 length of the query string
+  optional int32 query_length = 16;
+
+  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
+  // Number of terms in the query string.
+  optional int32 num_terms = 1;
+
+  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
+  // Number of namespaces filtered.
+  optional int32 num_namespaces_filtered = 2;
+
+  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
+  // Number of schema types filtered.
+  optional int32 num_schema_types_filtered = 3;
+
+  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
+  // Strategy of scoring and ranking.
+  optional ScoringSpecProto.RankingStrategy.Code ranking_strategy = 4;
+
+  // Whether the function call is querying the first page. If it’s
+  // not, Icing will fetch the results from cache so that some steps
+  // may be skipped.
+  optional bool is_first_page = 5;
+
+  // The requested number of results in one page.
+  optional int32 requested_page_size = 6;
+
+  // The actual number of results returned in the current page.
+  optional int32 num_results_returned_current_page = 7;
+
+  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
+  // Number of documents scored.
+  optional int32 num_documents_scored = 8;
+
+  // How many of the results in the page returned were snippeted.
+  optional int32 num_results_with_snippets = 15;
+
+  // Overall time used for the function call.
+  optional int32 latency_ms = 10;
+
+  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
+  // Time used to parse the query, including 2 parts: tokenizing and
+  // transforming tokens into an iterator tree.
+  optional int32 parse_query_latency_ms = 11;
+
+  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
+  // Time used to score the raw results.
+  optional int32 scoring_latency_ms = 12;
+
+  // Time used to rank the scored results.
+  optional int32 ranking_latency_ms = 13;
+
+  // Time used to fetch the document protos. Note that it includes the
+  // time to snippet if ‘has_snippets’ is true.
+  optional int32 document_retrieval_latency_ms = 14;
+
+  // Time passed while waiting to acquire the lock before query execution.
+  optional int32 lock_acquisition_latency_ms = 17;
+
+  // Timestamp taken just before sending proto across the JNI boundary from
+  // native to java side.
+  optional int64 native_to_java_start_timestamp_ms = 18;
+
+  // Time used to send protos across the JNI boundary from java to native side.
+  optional int32 java_to_native_jni_latency_ms = 19;
+
+  // Time used to send protos across the JNI boundary from native to java side.
+  optional int32 native_to_java_jni_latency_ms = 20;
+
+  // The native latency due to the join operation.
+  optional int32 join_latency_ms = 21;
+
+  // Number of documents scored.
+  optional int32 num_joined_results_returned_current_page = 22;
+
+  // Whether it contains join query or not.
+  optional bool is_join_query = 23;
+
+  // Stats of the search. Only valid for first page.
+  // Next tag: 13
+  message SearchStats {
+    // The UTF-8 length of the query string
+    optional int32 query_length = 1;
+
+    // Number of terms in the query string.
+    optional int32 num_terms = 2;
+
+    // Number of namespaces filtered.
+    optional int32 num_namespaces_filtered = 3;
+
+    // Number of schema types filtered.
+    optional int32 num_schema_types_filtered = 4;
+
+    // Strategy of scoring and ranking.
+    optional ScoringSpecProto.RankingStrategy.Code ranking_strategy = 5;
+
+    // Number of documents scored.
+    optional int32 num_documents_scored = 6;
+
+    // Time used to parse the query, including 2 parts: tokenizing and
+    // transforming tokens into an iterator tree.
+    optional int32 parse_query_latency_ms = 7;
+
+    // Time used to score the raw results.
+    optional int32 scoring_latency_ms = 8;
+
+    // Whether it contains numeric query or not.
+    optional bool is_numeric_query = 9;
+
+    // Number of hits fetched by lite index before applying any filters.
+    optional int32 num_fetched_hits_lite_index = 10;
+
+    // Number of hits fetched by main index before applying any filters.
+    optional int32 num_fetched_hits_main_index = 11;
+
+    // Number of hits fetched by integer index before applying any filters.
+    optional int32 num_fetched_hits_integer_index = 12;
+  }
+
+  // Search stats for parent. Only valid for first page.
+  optional SearchStats parent_search_stats = 24;
+
+  // Search stats for child.
+  optional SearchStats child_search_stats = 25;
+
+  reserved 9;
+}
+
+// Stats of the top-level functions IcingSearchEngine::Delete,
+// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType.
+// Next tag: 4
+message DeleteStatsProto {
+  // Overall time used for the function call.
+  optional int32 latency_ms = 1;
+
+  message DeleteType {
+    enum Code {
+      // Default. Should never be used.
+      UNKNOWN = 0;
+
+      // Delete one document.
+      SINGLE = 1;
+
+      // Delete by query. This value is deprecated.
+      // IcingSearchEngine::DeleteByQuery will return a DeleteByQueryStatsProto
+      // rather than a DeleteStatsProto.
+      DEPRECATED_QUERY = 2 [deprecated = true];
+
+      // Delete by namespace.
+      NAMESPACE = 3;
+
+      // Delete by schema type.
+      SCHEMA_TYPE = 4;
+    }
+  }
+  optional DeleteType.Code delete_type = 2;
+
+  // Number of documents deleted by this call.
+  optional int32 num_documents_deleted = 3;
+}
+
+// Stats of the top-level functions IcingSearchEngine::DeleteByQuery.
+// Next tag: 9
+message DeleteByQueryStatsProto {
+  // Overall time used for the function call.
+  optional int32 latency_ms = 1;
+
+  // Number of documents deleted by this call.
+  optional int32 num_documents_deleted = 2;
+
+  // The UTF-8 length of the query string
+  optional int32 query_length = 3;
+
+  // Number of terms in the query string.
+  optional int32 num_terms = 4;
+
+  // Number of namespaces filtered.
+  optional int32 num_namespaces_filtered = 5;
+
+  // Number of schema types filtered.
+  optional int32 num_schema_types_filtered = 6;
+
+  // Time used to parse the query, including 2 parts: tokenizing and
+  // transforming tokens into an iterator tree.
+  optional int32 parse_query_latency_ms = 7;
+
+  // Time used to delete each document.
+  optional int32 document_removal_latency_ms = 8;
+}
diff --git a/proto/icing/proto/optimize.proto b/proto/icing/proto/optimize.proto
index 1baa64c..675f980 100644
--- a/proto/icing/proto/optimize.proto
+++ b/proto/icing/proto/optimize.proto
@@ -23,7 +23,7 @@ option java_multiple_files = true;
 option objc_class_prefix = "ICNG";
 
 // Result of a call to IcingSearchEngine.Optimize
-// Next tag: 2
+// Next tag: 3
 message OptimizeResultProto {
   // Status code can be one of:
   //   OK
@@ -35,12 +35,13 @@ message OptimizeResultProto {
   // See status.proto for more details.
   optional StatusProto status = 1;
 
+  optional OptimizeStatsProto optimize_stats = 2;
   // TODO(b/147699081): Add a field to indicate lost_schema and lost_documents.
   // go/icing-library-apis.
 }
 
 // Result of a call to IcingSearchEngine.GetOptimizeInfo
-// Next tag: 4
+// Next tag: 5
 message GetOptimizeInfoResultProto {
   // Status code can be one of:
   //   OK
@@ -57,4 +58,54 @@ message GetOptimizeInfoResultProto {
   // Estimated bytes that could be recovered. The exact size per document isn't
   // tracked, so this is based off an average document size.
   optional int64 estimated_optimizable_bytes = 3;
+
+  // The amount of time since the last optimize ran.
+  optional int64 time_since_last_optimize_ms = 4;
+}
+
+// Next tag: 13
+message OptimizeStatsProto {
+  // Overall time used for the function call.
+  optional int32 latency_ms = 1;
+
+  // Time used to optimize the document store.
+  optional int32 document_store_optimize_latency_ms = 2;
+
+  // Time used to restore the index.
+  optional int32 index_restoration_latency_ms = 3;
+
+  // Number of documents before the optimization.
+  optional int32 num_original_documents = 4;
+
+  // Number of documents deleted.
+  optional int32 num_deleted_documents = 5;
+
+  // Number of documents expired.
+  optional int32 num_expired_documents = 6;
+
+  // Size of storage before the optimize.
+  optional int64 storage_size_before = 7;
+
+  // Size of storage after the optimize.
+  optional int64 storage_size_after = 8;
+
+  // The amount of time since the last optimize ran.
+  optional int64 time_since_last_optimize_ms = 9;
+
+  enum IndexRestorationMode {
+    // The index has been translated in place to match the optimized document
+    // store.
+    INDEX_TRANSLATION = 0;
+    // The index has been rebuilt from scratch during optimization. This could
+    // happen when we received a DATA_LOSS error from OptimizeDocumentStore,
+    // Index::Optimize failed, or rebuilding could be faster.
+    FULL_INDEX_REBUILD = 1;
+  }
+  optional IndexRestorationMode index_restoration_mode = 10;
+
+  // Number of namespaces before the optimization.
+  optional int32 num_original_namespaces = 11;
+
+  // Number of namespaces deleted.
+  optional int32 num_deleted_namespaces = 12;
 }
diff --git a/proto/icing/proto/persist.proto b/proto/icing/proto/persist.proto
index 77cf987..8d6b372 100644
--- a/proto/icing/proto/persist.proto
+++ b/proto/icing/proto/persist.proto
@@ -22,6 +22,28 @@ option java_package = "com.google.android.icing.proto";
 option java_multiple_files = true;
 option objc_class_prefix = "ICNG";
 
+// The type of persistence guarantee that PersistToDisk should provide.
+// Next tag: 3
+message PersistType {
+  enum Code {
+    // Default. Should never be used.
+    UNKNOWN = 0;
+
+    // Only persist the ground truth. A successful PersistToDisk(LITE) should
+    // ensure that no data is lost the next time Icing initializes. This
+    // should be called after each batch of mutations.
+    LITE = 1;
+
+    // Persists all data in internal Icing components. A successful
+    // PersistToDisk(FULL) should not only ensure no data loss like
+    // PersistToDisk(LITE), but also prevent the need to recover internal data
+    // structures the next time Icing initializes. This should be called at
+    // some point before the app terminates.
+    FULL = 2;
+  }
+  optional Code code = 1;
+}
+
 // Result of a call to IcingSearchEngine.Persist
 // Next tag: 2
 message PersistToDiskResultProto {
diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto
index 3a7ee5d..c716dba 100644
--- a/proto/icing/proto/schema.proto
+++ b/proto/icing/proto/schema.proto
@@ -34,7 +34,7 @@ option objc_class_prefix = "ICNG";
 // TODO(cassiewang) Define a sample proto file that can be used by tests and for
 // documentation.
 //
-// Next tag: 5
+// Next tag: 7
 message SchemaTypeConfigProto {
   // REQUIRED: Named type that uniquely identifies the structured, logical
   // schema being defined.
@@ -51,19 +51,32 @@ message SchemaTypeConfigProto {
   // easier.
   repeated PropertyConfigProto properties = 4;
 
+  // Version is an arbitrary number that the client may use to keep track of
+  // different incarnations of the schema. Icing library imposes no requirements
+  // on this field and will not validate it in anyway. If a client calls
+  // SetSchema with a schema that contains one or more new version numbers, then
+  // those version numbers will be updated so long as the SetSchema call
+  // succeeds. Clients are free to leave the version number unset, in which case
+  // it will default to value == 0.
+  optional int32 version = 5;
+
+  // An experimental field to make the type as a subtype of parent_types, which
+  // enables parent_types to be interpreted as its subtypes in the context of
+  // the Search APIs, including schema type filters and projections specified in
+  // TypePropertyMask.
+  repeated string parent_types = 6;
+
   reserved 2, 3;
 }
 
-// Describes how a single property should be indexed.
+// Describes how a string property should be indexed.
 // Next tag: 3
-message IndexingConfig {
+message StringIndexingConfig {
   // Indicates how the content of this property should be matched in the index.
   //
   // TermMatchType.Code=UNKNOWN
   // Content in this property will not be tokenized or indexed. Useful if the
-  // data type is not made up of terms (e.g. DOCUMENT or BYTES type). All the
-  // properties inside the nested property won't be indexed regardless of the
-  // value of the term_match_type field for the nested properties.
+  // data type is not indexable. See schema-util for details.
   //
   // TermMatchType.Code=EXACT_ONLY
   // Content in this property should only be returned for queries matching the
@@ -79,20 +92,123 @@ message IndexingConfig {
   message TokenizerType {
     enum Code {
       // It is only valid for tokenizer_type to be 'NONE' if the data type is
-      // DOCUMENT.
+      // not indexed.
       NONE = 0;
 
       // Tokenization for plain text.
       PLAIN = 1;
+
+      // Tokenizes text in verbatim. This means no normalization or segmentation
+      // is applied to string values that are tokenized using this type.
+      // Therefore, the output token is equivalent to the raw string text. For
+      // example, "Hello, world!" would be tokenized as "Hello, world!"
+      // preserving punctuation and capitalization, and not creating separate
+      // tokens between the space.
+      VERBATIM = 2;
+
+      // Tokenizes text as an email address. This means it will tokenize a
+      // string into multiple emails, and further tokenize those into parts of
+      // an email address. These parts include the local address, host
+      // components, local components, as well as the name and comments. For
+      // example, "User (comment) <user@domain.com>" would be tokenized into a
+      // "User" name token, a "comment" comment token, a "user" local address, a
+      // "user" local component token, a "domain" host component token, a "com"
+      // host component token, a "user@domain.com" address token, and the entire
+      // original string as an rfc822 token.
+      // See more here: https://datatracker.ietf.org/doc/html/rfc822
+      RFC822 = 3;
+
+      // Tokenizes text as an url address. This tokenizes a url string into a
+      // token for each component in the url, as well as any significant
+      // url suffixes. For example,
+      // https://www.google.com/path/subpath?query#ref would be tokenizes into a
+      // scheme token "https“; 3 host tokens "www", "google", "com"; 2 path
+      // tokens "path", "subpath"; a query token "query"; a reference token
+      // "ref"; and 3 suffix tokens
+      // "https://www.google.com/path/subpath?query#ref",
+      // "www.google.com/path/subpath?query#ref",
+      // "google.com/path/subpath?query#ref".
+      // Currently only supports tokenization of one url string at a time
+      // i.e. the input string cannot have spaces in the middle, but can have
+      // leading or trailing spaces.
+      URL = 4;
     }
   }
   optional TokenizerType.Code tokenizer_type = 2;
 }
 
+// Describes how a document property should be indexed.
+// Next tag: 3
+message DocumentIndexingConfig {
+  // OPTIONAL: Whether nested properties within the document property should be
+  // indexed. If true, then all nested properties will be indexed according to
+  // the property's own indexing configurations. If false, nested documents'
+  // properties will not be indexed even if they have an indexing configuration.
+  //
+  // The default value is false.
+  optional bool index_nested_properties = 1;
+
+  // List of nested properties within the document to index. Only the
+  // provided list of properties will be indexed according to the property's
+  // indexing configurations.
+  //
+  // index_nested_properties must be false in order to use this feature.
+  repeated string indexable_nested_properties_list = 2;
+}
+
+// Describes how a int64 property should be indexed.
+// Next tag: 3
+message IntegerIndexingConfig {
+  // OPTIONAL: Indicates how the int64 contents of this property should be
+  // matched.
+  //
+  // The default value is UNKNOWN.
+  message NumericMatchType {
+    enum Code {
+      // Contents in this property will not be indexed. Useful if the int64
+      // property type is not indexable.
+      UNKNOWN = 0;
+
+      // Contents in this property should only be returned for queries matching
+      // the range.
+      RANGE = 1;
+    }
+  }
+  optional NumericMatchType.Code numeric_match_type = 1;
+}
+
+// Describes how a property can be used to join this document with another
+// document. See JoinSpecProto (in search.proto) for more details.
+// Next tag: 3
+message JoinableConfig {
+  // OPTIONAL: Indicates what joinable type the content value of this property
+  // is.
+  //
+  // The default value is NONE.
+  message ValueType {
+    enum Code {
+      // Value in this property is not joinable.
+      NONE = 0;
+
+      // Value in this property is a joinable (string) qualified id, which is
+      // composed of namespace and uri.
+      // See JoinSpecProto (in search.proto) and DocumentProto (in
+      // document.proto) for more details about qualified id, namespace and uri.
+      QUALIFIED_ID = 1;
+    }
+  }
+  optional ValueType.Code value_type = 1;
+
+  // If the parent document a child document is joined to is deleted, delete the
+  // child document as well. This will only apply to children joined through
+  // QUALIFIED_ID, other (future) joinable value types won't use it.
+  optional bool propagate_delete = 2 [default = false];
+}
+
 // Describes the schema of a single property of Documents that belong to a
 // specific SchemaTypeConfigProto. These can be considered as a rich, structured
 // type for each property of Documents accepted by IcingSearchEngine.
-// Next tag: 6
+// Next tag: 9
 message PropertyConfigProto {
   // REQUIRED: Name that uniquely identifies a property within an Document of
   // a specific SchemaTypeConfigProto.
@@ -106,9 +222,10 @@ message PropertyConfigProto {
   // REQUIRED: Physical data-types of the contents of the property.
   message DataType {
     enum Code {
-      // This should never purposely be set. This is used for backwards
+      // This value should never purposely be used. This is used for backwards
       // compatibility reasons.
       UNKNOWN = 0;
+
       STRING = 1;
       INT64 = 2;
       DOUBLE = 3;
@@ -162,9 +279,26 @@ message PropertyConfigProto {
   }
   optional Cardinality.Code cardinality = 4;
 
-  // OPTIONAL: Properties that do not set the indexing config will not be
-  // indexed.
-  optional IndexingConfig indexing_config = 5;
+  // OPTIONAL: Describes how string properties should be indexed. String
+  // properties that do not set the indexing config will not be indexed.
+  optional StringIndexingConfig string_indexing_config = 5;
+
+  // OPTIONAL: Describes how document properties should be indexed.
+  optional DocumentIndexingConfig document_indexing_config = 6;
+
+  // OPTIONAL: Describes how int64 properties should be indexed. Int64
+  // properties that do not set the indexing config will not be indexed.
+  optional IntegerIndexingConfig integer_indexing_config = 7;
+
+  // OPTIONAL: Describes how string properties can be used as a document joining
+  // matcher.
+  //
+  // Note: currently we only support STRING single joining, so if a property is
+  // set as joinable (i.e. joinable_config.content_type is not NONE), then:
+  // - DataType should be STRING. Otherwise joinable_config will be ignored.
+  // - The property itself and any upper-level (nested doc) property should
+  //   contain at most one element (i.e. Cardinality is OPTIONAL or REQUIRED).
+  optional JoinableConfig joinable_config = 8;
 }
 
 // List of all supported types constitutes the schema used by Icing.
@@ -174,7 +308,7 @@ message SchemaProto {
 }
 
 // Result of a call to IcingSearchEngine.SetSchema
-// Next tag: 4
+// Next tag: 9
 message SetSchemaResultProto {
   // Status code can be one of:
   //   OK
@@ -198,6 +332,29 @@ message SetSchemaResultProto {
   // documents that fail validation against the new schema types would also be
   // deleted.
   repeated string incompatible_schema_types = 3;
+
+  // Schema types that did not exist in the previous schema and were added with
+  // the new schema type.
+  repeated string new_schema_types = 4;
+
+  // Schema types that were changed in a way that was backwards compatible and
+  // didn't invalidate the index.
+  repeated string fully_compatible_changed_schema_types = 5;
+
+  // Schema types that were changed in a way that was backwards compatible, but
+  // invalidated the index.
+  repeated string index_incompatible_changed_schema_types = 6;
+
+  // Overall time used for the function call.
+  optional int32 latency_ms = 7;
+
+  // Schema types that were changed in a way that was backwards compatible, but
+  // invalidated the joinable cache.
+  //
+  // For example, a property was set non joinable in the old schema definition,
+  // but changed to joinable in the new definition. In this case, this property
+  // will be considered join incompatible when setting new schema.
+  repeated string join_incompatible_changed_schema_types = 8;
 }
 
 // Result of a call to IcingSearchEngine.GetSchema
diff --git a/proto/icing/proto/scoring.proto b/proto/icing/proto/scoring.proto
index 667ff4f..a8040a1 100644
--- a/proto/icing/proto/scoring.proto
+++ b/proto/icing/proto/scoring.proto
@@ -16,14 +16,16 @@ syntax = "proto2";
 
 package icing.lib;
 
+import "icing/proto/term.proto";
+
 option java_package = "com.google.android.icing.proto";
 option java_multiple_files = true;
-
 option objc_class_prefix = "ICNG";
 
 // Encapsulates the configurations on how Icing should score and rank the search
 // results.
-// Next tag: 3
+// TODO(b/170347684): Change all timestamps to seconds.
+// Next tag: 12
 message ScoringSpecProto {
   // OPTIONAL: Indicates how the search results will be ranked.
   message RankingStrategy {
@@ -37,6 +39,41 @@ message ScoringSpecProto {
 
       // Ranked by document creation timestamps.
       CREATION_TIMESTAMP = 2;
+
+      // The following ranking strategies are based on usage reporting. Please
+      // see usage.proto for more information. If one of the usage ranking
+      // strategy is used but none of result documents have reported usage, the
+      // documents will be returned in the default reverse insertion order.
+
+      // Ranked by count of reports with usage type 1.
+      USAGE_TYPE1_COUNT = 3;
+
+      // Ranked by count of reports with usage type 2.
+      USAGE_TYPE2_COUNT = 4;
+
+      // Ranked by count of reports with usage type 3.
+      USAGE_TYPE3_COUNT = 5;
+
+      // Ranked by last used timestamp with usage type 1. The timestamps are
+      // compared in seconds.
+      USAGE_TYPE1_LAST_USED_TIMESTAMP = 6;
+
+      // Ranked by last used timestamp with usage type 2. The timestamps are
+      // compared in seconds.
+      USAGE_TYPE2_LAST_USED_TIMESTAMP = 7;
+
+      // Ranked by last used timestamp with usage type 3. The timestamps are
+      // compared in seconds.
+      USAGE_TYPE3_LAST_USED_TIMESTAMP = 8;
+
+      // Ranked by relevance score, currently computed as BM25F score.
+      RELEVANCE_SCORE = 9;
+
+      // Ranked by the aggregated score of the joined documents.
+      JOIN_AGGREGATE_SCORE = 10;
+
+      // Ranked by the advanced scoring expression provided.
+      ADVANCED_SCORING_EXPRESSION = 11;
     }
   }
   optional RankingStrategy.Code rank_by = 1;
@@ -54,4 +91,78 @@ message ScoringSpecProto {
     }
   }
   optional Order.Code order_by = 2;
+
+  // OPTIONAL: Specifies property weights for RELEVANCE_SCORE scoring strategy.
+  // Property weights are used for promoting or demoting query term matches in a
+  // document property. When property weights are provided, the term frequency
+  // is multiplied by the normalized property weight when computing the
+  // normalized term frequency component of BM25F. To prefer query term matches
+  // in the "subject" property over the "body" property of "Email" documents,
+  // set a higher property weight value for "subject" than "body". By default,
+  // all properties that are not specified are given a raw, pre-normalized
+  // weight of 1.0 when scoring.
+  repeated TypePropertyWeights type_property_weights = 3;
+
+  // OPTIONAL: Specifies the scoring expression for ADVANCED_SCORING_EXPRESSION
+  // RankingStrategy.
+  optional string advanced_scoring_expression = 4;
+}
+
+// Next tag: 3
+message SuggestionScoringSpecProto {
+  message SuggestionRankingStrategy {
+    enum Code {
+      // No ranking strategy specified, terms may be returned in an arbitrary
+      // order.
+      NONE = 0;
+
+      // Ranked by the term's hit count.
+      DOCUMENT_COUNT = 1;
+
+      // Ranked by the term's frequency.
+      TERM_FREQUENCY = 2;
+    }
+  }
+
+  // TermMatchType.Code=UNKNOWN
+  // Should never purposely be set and may lead to undefined behavior. This is
+  // used for backwards compatibility reasons.
+  //
+  // TermMatchType.Code=EXACT_ONLY
+  // Only exact hits will be counted to score a suggestion term.
+  //
+  // TermMatchType.Code=PREFIX
+  // Both exact hits and prefix hits will be counted to score a suggestion
+  // term.
+  optional TermMatchType.Code scoring_match_type = 1;
+
+  // Rank the output suggested result by given SuggestionRankingStrategy.
+  optional SuggestionRankingStrategy.Code rank_by = 2;
+}
+
+// Next tag: 3
+message TypePropertyWeights {
+  // Schema type to apply property weights to.
+  optional string schema_type = 1;
+
+  // Property weights to apply to the schema type.
+  repeated PropertyWeight property_weights = 2;
+}
+
+// Next tag: 3
+message PropertyWeight {
+  // Property path to assign property weight to. Property paths must be composed
+  // only of property names and property separators (the '.' character).
+  // For example, if an "Email" schema type has string property "subject" and
+  // document property "sender", which has string property "name", the property
+  // path for the email's subject would just be "subject" and the property path
+  // for the sender's name would be "sender.name". If an invalid path is
+  // specified, the property weight is discarded.
+  optional string path = 1;
+
+  // Property weight, valid values are positive and zero. Setting a zero
+  // property weight will remove scoring contribution for a query term match in
+  // the property. Negative weights are invalid and will result in an error.
+  // By default, a property is given a raw, pre-normalized weight of 1.0.
+  optional double weight = 2;
 }
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index 8ea5036..7f4fb3e 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -17,6 +17,8 @@ syntax = "proto2";
 package icing.lib;
 
 import "icing/proto/document.proto";
+import "icing/proto/logging.proto";
+import "icing/proto/scoring.proto";
 import "icing/proto/status.proto";
 import "icing/proto/term.proto";
 
@@ -25,7 +27,7 @@ option java_multiple_files = true;
 option objc_class_prefix = "ICNG";
 
 // Client-supplied specifications on what documents to retrieve.
-// Next tag: 5
+// Next tag: 11
 message SearchSpecProto {
   // REQUIRED: The "raw" query string that users may type. For example, "cat"
   // will search for documents with the term cat in it.
@@ -60,11 +62,61 @@ message SearchSpecProto {
   // applies to the entire 'query'. To issue different queries for different
   // schema types, separate Search()'s will need to be made.
   repeated string schema_type_filters = 4;
+
+  // Timestamp taken just before sending proto across the JNI boundary from java
+  // to native side.
+  optional int64 java_to_native_start_timestamp_ms = 5;
+
+  message SearchType {
+    enum Code {
+      UNDEFINED = 0;
+      ICING_RAW_QUERY = 1;
+      EXPERIMENTAL_ICING_ADVANCED_QUERY = 2;
+    }
+  }
+  // This field determines which type of query parsing Icing will use to fulfill
+  // the query.
+  // ICING_RAW_QUERY is the current query language as released, which supports
+  // basic ands, ors and nots as well as grouping and property restricts.
+  // EXPERIMENTAL_ICING_ADVANCED_QUERY is a superset of ICING_RAW_QUERY that
+  // will also support the use of functions defined by Icing Lib.
+  // This field is only temporary. When fully complete, all queries will be
+  // parsed by EXPERIMENTAL_ICING_ADVANCED_QUERY. This field only exists to
+  // enable testing.
+  // TODO(b/208654892) Remove this field once EXPERIMENTAL_ICING_ADVANCED_QUERY
+  // is fully supported.
+  optional SearchType.Code search_type = 6
+      [default = EXPERIMENTAL_ICING_ADVANCED_QUERY];
+
+  // OPTIONAL: If this field is present, join documents based on a nested
+  // SearchSpec.
+  optional JoinSpecProto join_spec = 7;
+
+  // Features enabled in this search spec.
+  repeated string enabled_features = 8;
+
+  // OPTIONAL: Whether to use the read-only implementation of
+  // IcingSearchEngine::Search.
+  // The read-only version enables multiple queries to be performed concurrently
+  // as it only acquires the read lock at IcingSearchEngine's level.
+  // Finer-grained locks are implemented around code paths that write changes to
+  // Icing during Search.
+  optional bool use_read_only_search = 9 [default = true];
+
+  // TODO(b/294266822): Handle multiple property filter lists for same schema
+  // type.
+  // How to specify a subset of properties to be searched. If no type property
+  // filter has been specified for a schema type (no TypePropertyMask for the
+  // given schema type), then *all* properties of that schema type will be
+  // searched. If an empty property filter is specified for a given schema type
+  // (TypePropertyMask for the given schema type has empty paths field), no
+  // properties of that schema type will be searched.
+  repeated TypePropertyMask type_property_filters = 10;
 }
 
 // Client-supplied specifications on what to include/how to format the search
 // results.
-// Next tag: 4
+// Next tag: 10
 message ResultSpecProto {
   // The results will be returned in pages, and num_per_page specifies the
   // number of documents in one page.
@@ -84,46 +136,161 @@ message ResultSpecProto {
     // have snippet information provided. If set to 0, snippeting is disabled.
     optional int32 num_matches_per_property = 2;
 
-    // How large of a window to provide. Windows start at max_window_bytes / 2
-    // bytes before the middle of the matching token and end at max_window_bytes
-    // / 2 bytes after the middle of the matching token. Windowing respects
-    // token boundaries.
-    // Therefore, the returned window may be smaller than requested. Setting
-    // max_window_bytes to 0 will disable windowing information. If matches
-    // enabled is also set to false, then snippeting is disabled.
-    // Ex. max_window_bytes = 16. "foo bar baz bat rat" with a query of "baz"
+    // How large of a window to provide. Windows start at
+    // max_window_utf32_length / 2 bytes before the middle of the matching token
+    // and end at max_window_utf32_length / 2 bytes after the middle of the
+    // matching token. Windowing respects token boundaries. Therefore, the
+    // returned window may be smaller than requested. Setting
+    // max_window_utf32_length to 0 will disable windowing information. If
+    // matches enabled is also set to false, then snippeting is disabled. Ex.
+    // max_window_utf32_length = 16. "foo bar baz bat rat" with a query of "baz"
     // will return a window of "bar baz bat" which is only 11 bytes long.
-    optional int32 max_window_bytes = 3;
+    optional int32 max_window_utf32_length = 3;
   }
   optional SnippetSpecProto snippet_spec = 3;
+
+  // How to specify a subset of properties to retrieve. If no type property mask
+  // has been specified for a schema type, then *all* properties of that schema
+  // type will be retrieved.
+  repeated TypePropertyMask type_property_masks = 4;
+
+  // Groupings of namespaces and schema types whose total returned results
+  // should be limited together.
+  // Next tag: 3
+  message ResultGrouping {
+    // Grouping of namespace and schema type.
+    // Next tag: 3
+    message Entry {
+      // The namespace in this grouping that should be returned.
+      // This field should be empty if ResultGroupingType is SCHEMA_TYPE
+      optional string namespace = 1;
+
+      // The schema in this grouping that should be returned.
+      // This field should be empty if ResultGroupingType is NAMESPACE
+      optional string schema = 2;
+    }
+
+    // Identifier for namespace and schema type pairs.
+    repeated Entry entry_groupings = 1;
+
+    // The maximum number of results in this grouping that should be returned.
+    optional int32 max_results = 2;
+  }
+
+  // How to limit the number of results returned per set of namespaces or schema
+  // type. If results match for a namespace or schema type that is not present
+  // in any result groupings, then those results will be returned without limit.
+  //
+  // Non-existent namespaces and/or schema type will be ignored.
+  //
+  // Example : Suppose that there are four namespaces each with three results
+  // matching the query for "foo". Without any result groupings, Icing would
+  // return the following results:
+  // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns0doc2", "ns3doc1",
+  //  "ns2doc1", "ns3doc2", "ns2doc0", "ns1doc1", "ns2doc2", "ns1doc1"].
+  //
+  // The following result groupings will be returned if that the
+  // ResultGroupingType is set to NAMESPACE:
+  // [ { [ {"namespace0"} ], 2 }, { [ {"namespace1"}, {"namespace2"} ], 2} ]
+  //
+  // The following results will be returned:
+  // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns3doc1", "ns2doc1",
+  //  "ns3doc2"].
+  repeated ResultGrouping result_groupings = 5;
+
+  // The threshold of total bytes of all documents to cutoff, in order to limit
+  // # of bytes in a single page.
+  // Note that it doesn't guarantee the result # of bytes will be smaller, equal
+  // to, or larger than the threshold. Instead, it is just a threshold to
+  // cutoff, and only guarantees total bytes of search results will exceed the
+  // threshold by less than the size of the final search result.
+  optional int32 num_total_bytes_per_page_threshold = 6
+      [default = 2147483647];  // INT_MAX
+
+  // The value by which the search results will get grouped by.
+  // Can get grouped by schema type, namespace (default), or by namespace and
+  // schema type.
+  enum ResultGroupingType {
+    NONE = 0;
+    SCHEMA_TYPE = 1;
+    NAMESPACE = 2;
+    NAMESPACE_AND_SCHEMA_TYPE = 3;
+  }
+  optional ResultGroupingType result_group_type = 7;
+
+  // The max # of child documents will be attached and returned in the result
+  // for each parent. It is only used for join API.
+  optional int32 max_joined_children_per_parent_to_return = 8;
+
+  // The max # of results being scored and ranked.
+  // Running time of ScoringProcessor and Ranker is O(num_to_score) according to
+  // results of //icing/scoring:score-and-rank_benchmark. Note that
+  // the process includes scoring, building a heap, and popping results from the
+  // heap.
+  //
+  // 30000 results can be scored and ranked within 3 ms on a Pixel 3 XL
+  // according to results of
+  // //icing/scoring:score-and-rank_benchmark, so set it as the
+  // default value.
+  optional int32 num_to_score = 9 [default = 30000];
 }
 
 // The representation of a single match within a DocumentProto property.
-// Next tag: 6
+//
+// Example : A document whose content is "Necesito comprar comida mañana." and a
+// query for "mana" with window=15
+// Next tag: 12
 message SnippetMatchProto {
-  // Properties may have multiple values. values_index indicates which of these
-  // multiple string values the match occurred in. For properties with only one
-  // value, the values_index will always be 0.
-  // Ex. "Recipients" [
-  //      { { "Name"         : "Daffy Duck" }
-  //        { "EmailAddress" : "daffduck@gmail.com" } },
-  //      { { "Name"         : "Donald Duck" }
-  //        { "EmailAddress" : "donduck@gmail.com" }  }
-  // "Daffy Duck" is the string value with a value_index of 0 for property
-  // "Recipients.Name". "Donald Duck" is the string value with a value_index of
-  // 1 for property "Recipients.Name".
-  optional int32 values_index = 1;
-
-  // The position and length within the matched string at which the exact
-  // match begins.
-  optional int32 exact_match_position = 2;
-
-  optional int32 exact_match_bytes = 3;
-
-  // The position and length of the suggested snippet window.
-  optional int32 window_position = 4;
-
-  optional int32 window_bytes = 5;
+  // The index of the byte in the string at which the match begins and the
+  // length in bytes of the match.
+  //
+  // For the example above, the values of these fields would be
+  // exact_match_byte_position=24, exact_match_byte_length=7 "mañana"
+  optional int32 exact_match_byte_position = 2;
+  optional int32 exact_match_byte_length = 3;
+
+  // The length in bytes of the subterm that matches the query. The beginning of
+  // the submatch is the same as exact_match_byte_position.
+  //
+  // For the example above, the value of this field would be 5. With
+  // exact_match_byte_position=24 above, it would produce the substring "maña"
+  optional int32 submatch_byte_length = 10;
+
+  // The index of the UTF-16 code unit in the string at which the match begins
+  // and the length in UTF-16 code units of the match. This is for use with
+  // UTF-16 encoded strings like Java.lang.String.
+  //
+  // For the example above, the values of these fields would be
+  // exact_match_utf16_position=24, exact_match_utf16_length=6 "mañana"
+  optional int32 exact_match_utf16_position = 6;
+  optional int32 exact_match_utf16_length = 7;
+
+  // The length in UTF-16 code units of the subterm that matches the query. The
+  // beginning of the submatch is the same as exact_match_utf16_position. This
+  // is for use with UTF-16 encoded strings like Java.lang.String.
+  //
+  // For the example above, the value of this field would be 4. With
+  // exact_match_utf16_position=24 above, it would produce the substring "maña"
+  optional int32 submatch_utf16_length = 11;
+
+  // The index of the byte in the string at which the suggested snippet window
+  // begins and the length in bytes of the window.
+  //
+  // For the example above, the values of these fields would be
+  // window_byte_position=17, window_byte_length=15 "comida mañana."
+  optional int32 window_byte_position = 4;
+  optional int32 window_byte_length = 5;
+
+  // The index of the UTF-16 code unit in the string at which the suggested
+  // snippet window begins and the length in UTF-16 code units of the window.
+  // This is for use with UTF-16 encoded strings like Java.lang.String.
+  //
+  // For the example above, the values of these fields would be
+  // window_utf16_position=17, window_utf16_length=14 "comida mañana."
+  optional int32 window_utf16_position = 8;
+  optional int32 window_utf16_length = 9;
+
+  reserved 1;
 }
 
 // A Proto representing all snippets for a single DocumentProto.
@@ -133,9 +300,29 @@ message SnippetProto {
   // property values in the corresponding DocumentProto.
   // Next tag: 3
   message EntryProto {
-    // A '.'-delimited sequence of property names indicating which property in
-    // the DocumentProto these snippets correspond to.
-    // Example properties: 'body', 'sender.name', 'sender.emailaddress', etc.
+    // A property path indicating which property in the DocumentProto these
+    // snippets correspond to. Property paths will contain 1) property names,
+    // 2) the property separator character '.' used to represent nested property
+    // and 3) indices surrounded by brackets to represent a specific value in
+    // that property.
+    //
+    // Example properties:
+    // - 'body'               : the first and only string value of a top-level
+    //                          property called 'body'.
+    // - 'sender.name'        : the first and only string value of a property
+    //                          called 'name' that is a subproperty of a
+    //                          property called 'sender'.
+    // - 'bcc[1].emailaddress': the first and only string value of a property
+    //                          called 'emailaddress' that is a subproperty of
+    //                          the second document value of a property called
+    //                          'bcc'.
+    // - 'attachments[0]'     : the first (of more than one) string value of a
+    //                          property called 'attachments'.
+    // NOTE: If there is only a single value for a property (like
+    // 'sender.name'), then no value index will be added to the property path.
+    // An index of [0] is implied. If there is more than one value for a
+    // property, then the value index will be added to the property path (like
+    // 'attachements[0]').
     optional string property_name = 1;
 
     repeated SnippetMatchProto snippet_matches = 2;
@@ -145,7 +332,7 @@ message SnippetProto {
 }
 
 // Icing lib-supplied results from a search results.
-// Next tag: 5
+// Next tag: 6
 message SearchResultProto {
   // Status code can be one of:
   //   OK
@@ -161,7 +348,7 @@ message SearchResultProto {
   optional StatusProto status = 1;
 
   // The Results that matched the query. Empty if there was an error.
-  // Next tag: 3
+  // Next tag: 5
   message ResultProto {
     // Document that matches the SearchSpecProto.
     optional DocumentProto document = 1;
@@ -169,30 +356,203 @@ message SearchResultProto {
     // Snippeting information for the document if requested in the
     // ResultSpecProto. A default instance, if not requested.
     optional SnippetProto snippet = 2;
+
+    // The score that the document was ranked by. The meaning of this score is
+    // determined by ScoringSpecProto.rank_by.
+    optional double score = 3;
+
+    // The child documents that were joined to a parent document.
+    repeated ResultProto joined_results = 4;
   }
   repeated ResultProto results = 2;
 
   // Various debug fields. Not populated if ResultSpecProto.debug_info = false.
+  // Next tag: 4
   message DebugInfoProto {
-    // The number of results that actually matched the SearchSpecProto. This is
-    // different from the number of `documents` returned since the user can
-    // set a ResultSpecProto.limit on how many results are returned to them.
-    optional uint64 num_results = 1;
-
-    // Latency to parse and execute the query, in milliseconds.
-    optional uint64 latency_ms = 2;
-
     // The internal representation of the actual query string that was executed.
     // This may be different from the SearchSpecProto.query if the original
     // query was malformed.
     optional string executed_query = 3;
+
+    reserved 1, 2;
   }
   optional DebugInfoProto debug_info = 3;
 
   // An opaque token used internally to keep track of information needed for
   // pagination. A valid pagination token is required to fetch other pages of
-  // results. The default value 0 means that there're no more pages.
+  // results. A value 0 means that there're no more pages.
   // LINT.IfChange(next_page_token)
-  optional uint64 next_page_token = 4 [default = 0];
+  optional uint64 next_page_token = 4;
   // LINT.ThenChange(//depot/google3/icing/result/result-state-manager.h:kInvalidNextPageToken)
+
+  // Stats for query execution performance.
+  optional QueryStatsProto query_stats = 5;
+}
+
+// Next tag: 3
+message TypePropertyMask {
+  // The schema type to which these property masks should apply.
+  // If the schema type is the wildcard ("*"), then the type property masks
+  // will apply to all results of types that don't have their own, specific
+  // type property mask entry.
+  optional string schema_type = 1;
+
+  // The property masks specifying the property to be retrieved. Property
+  // masks must be composed only of property names, property separators (the
+  // '.' character). For example, "subject", "recipients.name". Specifying no
+  // property masks will result in *no* properties being retrieved.
+  repeated string paths = 2;
+}
+
+// Next tag: 2
+message GetResultSpecProto {
+  // How to specify a subset of properties to retrieve. If no type property mask
+  // has been specified for a schema type, then *all* properties of that schema
+  // type will be retrieved.
+  repeated TypePropertyMask type_property_masks = 1;
+}
+
+// Next tag: 8
+message SuggestionSpecProto {
+  // REQUIRED: The "raw" prefix string that users may type. For example, "f"
+  // will search for suggested query that start with "f" like "foo", "fool".
+  optional string prefix = 1;
+
+  // OPTIONAL: Only search for suggestions that under the specified namespaces.
+  // If unset, the suggestion will search over all namespaces. Note that this
+  // applies to the entire 'prefix'. To issue different suggestions for
+  // different namespaces, separate RunSuggestion()'s will need to be made.
+  repeated string namespace_filters = 2;
+
+  // REQUIRED: The number of suggestions to be returned.
+  optional int32 num_to_return = 3;
+
+  // Indicates how the suggestion terms should be scored and ranked.
+  optional SuggestionScoringSpecProto scoring_spec = 4;
+
+  // OPTIONAL: Only search for suggestions that under the specified
+  // DocumentUris. If unset, the suggestion will search over all Documents.
+  //
+  // All namespace in the given NamespaceDocumentUriGroup should match the
+  // namespace_filters. i.e. appears in the namespace_filter or namespace_filter
+  // is empty.
+  //
+  // All given NamespaceDocumentUriGroup cannot have empty. Please use the
+  // namespace_filter to exclude a namespace.
+  //
+  // Note that this applies to the entire 'prefix'. To issue different
+  // suggestions for different DocumentIds, separate RunSuggestion()'s will need
+  // to be made.
+  repeated NamespaceDocumentUriGroup document_uri_filters = 5;
+
+  // OPTIONAL: Only search for suggestions that under the specified schemas.
+  // If unset, the suggestion will search over all schema types. Note that this
+  // applies to the entire 'prefix'. To issue different suggestions for
+  // different schema typs, separate RunSuggestion()'s will need to be made.
+  repeated string schema_type_filters = 6;
+
+  // OPTIONAL: Only search for suggestions that under the specified types and
+  // properties.
+  //
+  // If unset, the suggestion will search over all types.
+  // If the TypePropertyMask.paths is unset, the suggestion will search over all
+  // properties under the TypePropertyMask.schema_type.
+  //
+  // Note that this applies to the entire 'prefix'. To issue different
+  // suggestions for different types, separate RunSuggestion()'s will need to be
+  // made.
+  repeated TypePropertyMask type_property_filters = 7;
+}
+
+// A group that holds namespace and document_uris under it.
+message NamespaceDocumentUriGroup {
+  optional string namespace_ = 1;
+  repeated string document_uris = 2;
+}
+
+// Next tag: 3
+message SuggestionResponse {
+  message Suggestion {
+    // The suggested query string for client to search for.
+    optional string query = 1;
+  }
+
+  // Status code can be one of:
+  //   OK
+  //   FAILED_PRECONDITION
+  //   INTERNAL
+  //
+  // See status.proto for more details.
+  optional StatusProto status = 1;
+
+  repeated Suggestion suggestions = 2;
+}
+
+// Specification for a left outer join.
+//
+// Next tag: 7
+message JoinSpecProto {
+  // Collection of several specs that will be used for searching and joining
+  // child documents.
+  //
+  // Next tag: 4
+  message NestedSpecProto {
+    // A nested SearchSpec that will be used to retrieve child documents. If you
+    // are only looking to join on a specific type documents, you could set a
+    // schema filter in this SearchSpec. This includes the nested search query.
+    // See SearchSpecProto.
+    optional SearchSpecProto search_spec = 1;
+
+    // A nested ScoringSpec that will be used to score child documents.
+    // See ScoringSpecProto.
+    optional ScoringSpecProto scoring_spec = 2;
+
+    // A nested ResultSpec that will be used to format child documents in the
+    // result joined documents, e.g. snippeting, projection.
+    // See ResultSpecProto.
+    optional ResultSpecProto result_spec = 3;
+  }
+  optional NestedSpecProto nested_spec = 1;
+
+  // The equivalent of a primary key in SQL. This is an expression that will be
+  // used to match child documents from the nested search to this document. One
+  // such expression is qualifiedId(). When used, it means the contents of
+  // child_property_expression property in the child documents must be equal to
+  // the qualified id.
+  // TODO(b/256022027) allow for parent_property_expression to be any property
+  // of the parent document.
+  optional string parent_property_expression = 2;
+
+  // The equivalent of a foreign key in SQL. This defines an equality constraint
+  // between a property in a child document and a property in the parent
+  // document. For example, if you want to join child documents which an
+  // entityId property containing a fully qualified document id,
+  // child_property_expression can be set to "entityId".
+  // TODO(b/256022027) figure out how to allow this to refer to documents
+  // outside of same pkg+db+ns.
+  optional string child_property_expression = 3;
+
+  // The max number of child documents to join to a parent document.
+  // DEPRECATED: use ResultSpecProto.max_joined_children_per_parent_to_return to
+  // control the number of children that are returned. There is no supported
+  // control for the number of children being scored at this time.
+  optional int32 max_joined_child_count = 4 [deprecated = true];
+
+  // The strategy by which to score the aggregation of child documents. For
+  // example, you might want to know which entity document has the most actions
+  // taken on it. If JOIN_AGGREGATE_SCORE is used in the base SearchSpecProto,
+  // the COUNT value will rank entity documents based on the number of child
+  // documents.
+  message AggregationScoringStrategy {
+    enum Code {
+      NONE = 0;  // No aggregation strategy for child documents and use parent
+                 // document score.
+      COUNT = 1;
+      MIN = 2;
+      AVG = 3;
+      MAX = 4;
+      SUM = 5;
+    }
+  }
+  optional AggregationScoringStrategy.Code aggregation_scoring_strategy = 5;
 }
diff --git a/proto/icing/proto/status.proto b/proto/icing/proto/status.proto
index 2733a15..06ec6c4 100644
--- a/proto/icing/proto/status.proto
+++ b/proto/icing/proto/status.proto
@@ -24,7 +24,7 @@ option objc_class_prefix = "ICNG";
 // Canonical status to indicate the results of API calls.
 // Next tag: 3
 message StatusProto {
-  // Next tag: 9
+  // Next tag: 10
   enum Code {
     // A default for all other use-cases. Should never be used in practice. This
     // may happen if there are backwards-compatibility issues.
@@ -36,6 +36,9 @@ message StatusProto {
     // The IcingSearchEngine instance is still usable. But the schema and/or
     // documents may need to be re-added to prevent future API calls from
     // failing or returning correct information.
+    //
+    // TODO(b/171750324): split into WARNING_PARTIAL_LOSS and
+    // WARNING_COMPLETE_LOSS.
     WARNING_DATA_LOSS = 2;
 
     // Parameters to API call are invalid and cannot be processed.
@@ -62,6 +65,12 @@ message StatusProto {
     // make some space on the underlying filesystem.
     OUT_OF_SPACE = 8;
 
+    // An operation is invalid because the resource already exists and can't be
+    // replaced. For example, this status is used when a SchemaProto contains
+    // multiple definitions of the same type or multiple properties with the
+    // same name within a type.
+    ALREADY_EXISTS = 9;
+
     // Any future status codes.
   }
   optional Code code = 1;
diff --git a/proto/icing/proto/storage.proto b/proto/icing/proto/storage.proto
new file mode 100644
index 0000000..39dab6b
--- /dev/null
+++ b/proto/icing/proto/storage.proto
@@ -0,0 +1,187 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+import "icing/proto/status.proto";
+
+option java_package = "com.google.android.icing.proto";
+option java_multiple_files = true;
+option objc_class_prefix = "ICNG";
+
+// Next tag: 10
+message NamespaceStorageInfoProto {
+  // Name of the namespace
+  optional string namespace = 1;
+
+  // Number of alive documents in this namespace.
+  optional int32 num_alive_documents = 2;
+
+  // NOTE: We don't have stats on number of deleted documents in a namespace
+  // since we completely erase all data on a document when it's deleted. And we
+  // can't figure out which namespace it belonged to.
+
+  // Number of expired documents in this namespace.
+  optional int32 num_expired_documents = 3;
+
+  // LINT.IfChange(namespace_storage_info_usage_types)
+  // Number of alive documents that have a UsageReport.usage_type reported
+  optional int32 num_alive_documents_usage_type1 = 4;
+  optional int32 num_alive_documents_usage_type2 = 5;
+  optional int32 num_alive_documents_usage_type3 = 6;
+
+  // Number of expired documents that have a UsageReport.usage_type reported
+  optional int32 num_expired_documents_usage_type1 = 7;
+  optional int32 num_expired_documents_usage_type2 = 8;
+  optional int32 num_expired_documents_usage_type3 = 9;
+  // LINT.ThenChange()
+}
+
+// Next tag: 15
+message DocumentStorageInfoProto {
+  // Total number of alive documents.
+  optional int32 num_alive_documents = 1;
+
+  // Total number of deleted documents.
+  optional int32 num_deleted_documents = 2;
+
+  // Total number of expired documents.
+  optional int32 num_expired_documents = 3;
+
+  // Total size of the document store in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 document_store_size = 4;
+
+  // Total size of the ground truth in bytes. The ground truth may
+  // include deleted or expired documents. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 document_log_size = 5;
+
+  // Size of the key mapper in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 key_mapper_size = 6;
+
+  // Size of the document id mapper in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 document_id_mapper_size = 7;
+
+  // Size of the score cache in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 score_cache_size = 8;
+
+  // Size of the filter cache in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 filter_cache_size = 9;
+
+  // Size of the corpus mapper in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 corpus_mapper_size = 10;
+
+  // Size of the corpus score cache in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 corpus_score_cache_size = 11;
+
+  // Size of the namespace id mapper in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 namespace_id_mapper_size = 12;
+
+  // Number of namespaces seen from the current documents.
+  //
+  // TODO(cassiewang): This isn't technically needed anymore since clients can
+  // get this number from namespace_storage_info. Consider removing this.
+  optional int32 num_namespaces = 13;
+
+  // Storage information of each namespace.
+  repeated NamespaceStorageInfoProto namespace_storage_info = 14;
+}
+
+// Next tag: 5
+message SchemaStoreStorageInfoProto {
+  // Size of the schema store in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 schema_store_size = 1;
+
+  // Total number of schema types.
+  optional int32 num_schema_types = 2;
+
+  // Total number of all sections across all types
+  optional int32 num_total_sections = 3;
+
+  // Total number of types at the current section limit.
+  optional int32 num_schema_types_sections_exhausted = 4;
+}
+
+// Next tag: 9
+message IndexStorageInfoProto {
+  // Total size of the index in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 index_size = 1;
+
+  // Size of the lite index lexicon in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 lite_index_lexicon_size = 2;
+
+  // Size of the lite index hit buffer in bytes. Will be set to -1 if an IO
+  // error is encountered while calculating this field.
+  optional int64 lite_index_hit_buffer_size = 3;
+
+  // Size of the main index lexicon in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 main_index_lexicon_size = 4;
+
+  // Size of the main index storage in bytes. Will be set to -1 if an IO error
+  // is encountered while calculating this field.
+  optional int64 main_index_storage_size = 5;
+
+  // Size of one main index block in bytes.
+  optional int64 main_index_block_size = 6;
+
+  // Number of main index blocks.
+  optional int32 num_blocks = 7;
+
+  // Percentage of the main index blocks that are free, assuming
+  // allocated blocks are fully used.
+  optional float min_free_fraction = 8;
+}
+
+// Next tag: 5
+message StorageInfoProto {
+  // Total size of Icing’s storage in bytes. Will be set to -1 if an IO error is
+  // encountered while calculating this field.
+  optional int64 total_storage_size = 1;
+
+  // Storage information of the document store.
+  optional DocumentStorageInfoProto document_storage_info = 2;
+
+  // Storage information of the schema store.
+  optional SchemaStoreStorageInfoProto schema_store_storage_info = 3;
+
+  // Storage information of the index.
+  optional IndexStorageInfoProto index_storage_info = 4;
+}
+
+// Next tag: 3
+message StorageInfoResultProto {
+  // Status code can be one of:
+  //   OK
+  //   FAILED_PRECONDITION
+  //
+  // See status.proto for more details.
+  optional StatusProto status = 1;
+
+  // Storage information of Icing.
+  optional StorageInfoProto storage_info = 2;
+}
diff --git a/proto/icing/proto/usage.proto b/proto/icing/proto/usage.proto
new file mode 100644
index 0000000..eaa2671
--- /dev/null
+++ b/proto/icing/proto/usage.proto
@@ -0,0 +1,69 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+import "icing/proto/status.proto";
+
+option java_package = "com.google.android.icing.proto";
+option java_multiple_files = true;
+option objc_class_prefix = "ICNG";
+
+// Representation of a usage report that is generated from the client and sent
+// to Icing.
+// Next tag: 5
+message UsageReport {
+  // Namespace of the document.
+  optional string document_namespace = 1;
+
+  // Uri of the document.
+  optional string document_uri = 2;
+
+  // Timestamp in milliseconds of when the usage happens.
+  optional int64 usage_timestamp_ms = 3;
+
+  // LINT.IfChange
+  // Next tag: 3
+  enum UsageType {
+    // A custom usage type that clients can assign a meaning to. UsageReports of
+    // the same type are combined to provide usage counts that clients may use
+    // in scoring.
+    USAGE_TYPE1 = 0;
+
+    // Same as above.
+    USAGE_TYPE2 = 1;
+
+    // Same as above.
+    USAGE_TYPE3 = 2;
+  }
+  // LINT.ThenChange(
+  //   //depot/google3/icing/store/usage-store.h:UsageScores,
+  //   //depot/google3/icing/proto/\
+  //     storage.proto:namespace_storage_info_usage_types)
+  optional UsageType usage_type = 4;
+}
+
+// Result of a call to IcingSearchEngine.ReportUsage
+// Next tag: 2
+message ReportUsageResultProto {
+  // Status code can be one of:
+  //   OK
+  //   NOT_FOUND
+  //   INTERNAL
+  //
+  // See status.proto for more details.
+  optional StatusProto status = 1;
+}
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
new file mode 100644
index 0000000..dd08fd1
--- /dev/null
+++ b/synced_AOSP_CL_number.txt
@@ -0,0 +1 @@
+set(synced_AOSP_CL_number=587883838)
author	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2024-05-20 16:37:59 +0000
committer	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2024-05-20 16:37:59 +0000
commit	f280dc28ba4fd713d8b92243fc97dd32b79bd902 (patch)
tree	aaa7cb313ca956a5e7b01f65223311730f0c5388
parent	1db05b5d41d80b78de1acafa6f061af6dac689f2 (diff)
parent	b1b8c8771490b286182357d1f2f8418a47e3297c (diff)
download	icing-f280dc28ba4fd713d8b92243fc97dd32b79bd902.tar.gz